Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amd/display: Add Renoir DML

DML provides the display configuration validation as provided
by the hw teams.

Acked-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Bhawanpreet Lakha and committed by
Alex Deucher
b04641a3 64ce485c

+8058
+4
drivers/gpu/drm/amd/display/dc/dml/Makefile
··· 45 45 CFLAGS_display_mode_vba_20v2.o := $(dml_ccflags) 46 46 CFLAGS_display_rq_dlg_calc_20v2.o := $(dml_ccflags) 47 47 endif 48 + ifdef CONFIG_DRM_AMD_DC_DCN2_1 49 + CFLAGS_display_mode_vba_21.o := $(dml_ccflags) 50 + CFLAGS_display_rq_dlg_calc_21.o := $(dml_ccflags) 51 + endif 48 52 ifdef CONFIG_DRM_AMD_DCN3AG 49 53 CFLAGS_display_mode_vba_3ag.o := $(dml_ccflags) 50 54 endif
+6123
drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
··· 1 + /* 2 + * Copyright 2017 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + * Authors: AMD 23 + * 24 + */ 25 + 26 + #ifdef CONFIG_DRM_AMD_DC_DCN2_0 27 + 28 + #include "../display_mode_lib.h" 29 + #include "../dml_inline_defs.h" 30 + #include "../display_mode_vba.h" 31 + #include "display_mode_vba_21.h" 32 + 33 + 34 + /* 35 + * NOTE: 36 + * This file is gcc-parsable HW gospel, coming straight from HW engineers. 37 + * 38 + * It doesn't adhere to Linux kernel style and sometimes will do things in odd 39 + * ways. Unless there is something clearly wrong with it the code should 40 + * remain as-is as it provides us with a guarantee from HW that it is correct. 41 + */ 42 + 43 + typedef unsigned int uint; 44 + 45 + typedef struct { 46 + double DPPCLK; 47 + double DISPCLK; 48 + double PixelClock; 49 + double DCFCLKDeepSleep; 50 + unsigned int DPPPerPlane; 51 + bool ScalerEnabled; 52 + enum scan_direction_class SourceScan; 53 + unsigned int BlockWidth256BytesY; 54 + unsigned int BlockHeight256BytesY; 55 + unsigned int BlockWidth256BytesC; 56 + unsigned int BlockHeight256BytesC; 57 + unsigned int InterlaceEnable; 58 + unsigned int NumberOfCursors; 59 + unsigned int VBlank; 60 + unsigned int HTotal; 61 + } Pipe; 62 + 63 + typedef struct { 64 + bool Enable; 65 + unsigned int MaxPageTableLevels; 66 + unsigned int CachedPageTableLevels; 67 + } HostVM; 68 + 69 + #define BPP_INVALID 0 70 + #define BPP_BLENDED_PIPE 0xffffffff 71 + 72 + static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); 73 + static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( 74 + struct display_mode_lib *mode_lib); 75 + static unsigned int dscceComputeDelay( 76 + unsigned int bpc, 77 + double bpp, 78 + unsigned int sliceWidth, 79 + unsigned int numSlices, 80 + enum output_format_class pixelFormat); 81 + static unsigned int dscComputeDelay(enum output_format_class pixelFormat); 82 + // Super monster function with some 45 argument 83 + static bool CalculatePrefetchSchedule( 84 + struct display_mode_lib *mode_lib, 85 + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 86 + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 87 + Pipe *myPipe, 88 + unsigned int DSCDelay, 89 + double DPPCLKDelaySubtotal, 90 + double DPPCLKDelaySCL, 91 + double DPPCLKDelaySCLLBOnly, 92 + double DPPCLKDelayCNVCFormater, 93 + double DPPCLKDelayCNVCCursor, 94 + double DISPCLKDelaySubtotal, 95 + unsigned int ScalerRecoutWidth, 96 + enum output_format_class OutputFormat, 97 + unsigned int MaxInterDCNTileRepeaters, 98 + unsigned int VStartup, 99 + unsigned int MaxVStartup, 100 + unsigned int GPUVMPageTableLevels, 101 + bool GPUVMEnable, 102 + HostVM *myHostVM, 103 + bool DynamicMetadataEnable, 104 + int DynamicMetadataLinesBeforeActiveRequired, 105 + unsigned int DynamicMetadataTransmittedBytes, 106 + bool DCCEnable, 107 + double UrgentLatency, 108 + double UrgentExtraLatency, 109 + double TCalc, 110 + unsigned int PDEAndMetaPTEBytesFrame, 111 + unsigned int MetaRowByte, 112 + unsigned int PixelPTEBytesPerRow, 113 + double PrefetchSourceLinesY, 114 + unsigned int SwathWidthY, 115 + double BytePerPixelDETY, 116 + double VInitPreFillY, 117 + unsigned int MaxNumSwathY, 118 + double PrefetchSourceLinesC, 119 + double BytePerPixelDETC, 120 + double VInitPreFillC, 121 + unsigned int MaxNumSwathC, 122 + unsigned int SwathHeightY, 123 + unsigned int SwathHeightC, 124 + double TWait, 125 + bool XFCEnabled, 126 + double XFCRemoteSurfaceFlipDelay, 127 + bool ProgressiveToInterlaceUnitInOPP, 128 + double *DSTXAfterScaler, 129 + double *DSTYAfterScaler, 130 + double *DestinationLinesForPrefetch, 131 + double *PrefetchBandwidth, 132 + double *DestinationLinesToRequestVMInVBlank, 133 + double *DestinationLinesToRequestRowInVBlank, 134 + double *VRatioPrefetchY, 135 + double *VRatioPrefetchC, 136 + double *RequiredPrefetchPixDataBWLuma, 137 + double *RequiredPrefetchPixDataBWChroma, 138 + unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, 139 + double *Tno_bw, 140 + double *prefetch_vmrow_bw, 141 + unsigned int *swath_width_luma_ub, 142 + unsigned int *swath_width_chroma_ub, 143 + unsigned int *VUpdateOffsetPix, 144 + double *VUpdateWidthPix, 145 + double *VReadyOffsetPix); 146 + static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); 147 + static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); 148 + static double CalculateDCCConfiguration( 149 + bool DCCEnabled, 150 + bool DCCProgrammingAssumesScanDirectionUnknown, 151 + unsigned int ViewportWidth, 152 + unsigned int ViewportHeight, 153 + double DETBufferSize, 154 + unsigned int RequestHeight256Byte, 155 + unsigned int SwathHeight, 156 + enum dm_swizzle_mode TilingFormat, 157 + unsigned int BytePerPixel, 158 + enum scan_direction_class ScanOrientation, 159 + unsigned int *MaxUncompressedBlock, 160 + unsigned int *MaxCompressedBlock, 161 + unsigned int *Independent64ByteBlock); 162 + static double CalculatePrefetchSourceLines( 163 + struct display_mode_lib *mode_lib, 164 + double VRatio, 165 + double vtaps, 166 + bool Interlace, 167 + bool ProgressiveToInterlaceUnitInOPP, 168 + unsigned int SwathHeight, 169 + unsigned int ViewportYStart, 170 + double *VInitPreFill, 171 + unsigned int *MaxNumSwath); 172 + static unsigned int CalculateVMAndRowBytes( 173 + struct display_mode_lib *mode_lib, 174 + bool DCCEnable, 175 + unsigned int BlockHeight256Bytes, 176 + unsigned int BlockWidth256Bytes, 177 + enum source_format_class SourcePixelFormat, 178 + unsigned int SurfaceTiling, 179 + unsigned int BytePerPixel, 180 + enum scan_direction_class ScanDirection, 181 + unsigned int ViewportWidth, 182 + unsigned int ViewportHeight, 183 + unsigned int SwathWidthY, 184 + bool GPUVMEnable, 185 + bool HostVMEnable, 186 + unsigned int HostVMMaxPageTableLevels, 187 + unsigned int HostVMCachedPageTableLevels, 188 + unsigned int VMMPageSize, 189 + unsigned int PTEBufferSizeInRequests, 190 + unsigned int Pitch, 191 + unsigned int DCCMetaPitch, 192 + unsigned int *MacroTileWidth, 193 + unsigned int *MetaRowByte, 194 + unsigned int *PixelPTEBytesPerRow, 195 + bool *PTEBufferSizeNotExceeded, 196 + unsigned int *dpte_row_width_ub, 197 + unsigned int *dpte_row_height, 198 + unsigned int *MetaRequestWidth, 199 + unsigned int *MetaRequestHeight, 200 + unsigned int *meta_row_width, 201 + unsigned int *meta_row_height, 202 + unsigned int *vm_group_bytes, 203 + long *dpte_group_bytes, 204 + unsigned int *PixelPTEReqWidth, 205 + unsigned int *PixelPTEReqHeight, 206 + unsigned int *PTERequestSize, 207 + unsigned int *DPDE0BytesFrame, 208 + unsigned int *MetaPTEBytesFrame); 209 + 210 + static double CalculateTWait( 211 + unsigned int PrefetchMode, 212 + double DRAMClockChangeLatency, 213 + double UrgentLatency, 214 + double SREnterPlusExitTime); 215 + static double CalculateRemoteSurfaceFlipDelay( 216 + struct display_mode_lib *mode_lib, 217 + double VRatio, 218 + double SwathWidth, 219 + double Bpp, 220 + double LineTime, 221 + double XFCTSlvVupdateOffset, 222 + double XFCTSlvVupdateWidth, 223 + double XFCTSlvVreadyOffset, 224 + double XFCXBUFLatencyTolerance, 225 + double XFCFillBWOverhead, 226 + double XFCSlvChunkSize, 227 + double XFCBusTransportTime, 228 + double TCalc, 229 + double TWait, 230 + double *SrcActiveDrainRate, 231 + double *TInitXFill, 232 + double *TslvChk); 233 + static void CalculateActiveRowBandwidth( 234 + bool GPUVMEnable, 235 + enum source_format_class SourcePixelFormat, 236 + double VRatio, 237 + bool DCCEnable, 238 + double LineTime, 239 + unsigned int MetaRowByteLuma, 240 + unsigned int MetaRowByteChroma, 241 + unsigned int meta_row_height_luma, 242 + unsigned int meta_row_height_chroma, 243 + unsigned int PixelPTEBytesPerRowLuma, 244 + unsigned int PixelPTEBytesPerRowChroma, 245 + unsigned int dpte_row_height_luma, 246 + unsigned int dpte_row_height_chroma, 247 + double *meta_row_bw, 248 + double *dpte_row_bw); 249 + static void CalculateFlipSchedule( 250 + struct display_mode_lib *mode_lib, 251 + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 252 + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 253 + double UrgentExtraLatency, 254 + double UrgentLatency, 255 + unsigned int GPUVMMaxPageTableLevels, 256 + bool HostVMEnable, 257 + unsigned int HostVMMaxPageTableLevels, 258 + unsigned int HostVMCachedPageTableLevels, 259 + bool GPUVMEnable, 260 + double PDEAndMetaPTEBytesPerFrame, 261 + double MetaRowBytes, 262 + double DPTEBytesPerRow, 263 + double BandwidthAvailableForImmediateFlip, 264 + unsigned int TotImmediateFlipBytes, 265 + enum source_format_class SourcePixelFormat, 266 + double LineTime, 267 + double VRatio, 268 + double Tno_bw, 269 + bool DCCEnable, 270 + unsigned int dpte_row_height, 271 + unsigned int meta_row_height, 272 + unsigned int dpte_row_height_chroma, 273 + unsigned int meta_row_height_chroma, 274 + double *DestinationLinesToRequestVMInImmediateFlip, 275 + double *DestinationLinesToRequestRowInImmediateFlip, 276 + double *final_flip_bw, 277 + bool *ImmediateFlipSupportedForPipe); 278 + static double CalculateWriteBackDelay( 279 + enum source_format_class WritebackPixelFormat, 280 + double WritebackHRatio, 281 + double WritebackVRatio, 282 + unsigned int WritebackLumaHTaps, 283 + unsigned int WritebackLumaVTaps, 284 + unsigned int WritebackChromaHTaps, 285 + unsigned int WritebackChromaVTaps, 286 + unsigned int WritebackDestinationWidth); 287 + static void CalculateWatermarksAndDRAMSpeedChangeSupport( 288 + struct display_mode_lib *mode_lib, 289 + unsigned int PrefetchMode, 290 + unsigned int NumberOfActivePlanes, 291 + unsigned int MaxLineBufferLines, 292 + unsigned int LineBufferSize, 293 + unsigned int DPPOutputBufferPixels, 294 + double DETBufferSizeInKByte, 295 + unsigned int WritebackInterfaceLumaBufferSize, 296 + unsigned int WritebackInterfaceChromaBufferSize, 297 + double DCFCLK, 298 + double UrgentOutOfOrderReturn, 299 + double ReturnBW, 300 + bool GPUVMEnable, 301 + long dpte_group_bytes[], 302 + unsigned int MetaChunkSize, 303 + double UrgentLatency, 304 + double ExtraLatency, 305 + double WritebackLatency, 306 + double WritebackChunkSize, 307 + double SOCCLK, 308 + double DRAMClockChangeLatency, 309 + double SRExitTime, 310 + double SREnterPlusExitTime, 311 + double DCFCLKDeepSleep, 312 + int DPPPerPlane[], 313 + bool DCCEnable[], 314 + double DPPCLK[], 315 + unsigned int SwathWidthSingleDPPY[], 316 + unsigned int SwathHeightY[], 317 + double ReadBandwidthPlaneLuma[], 318 + unsigned int SwathHeightC[], 319 + double ReadBandwidthPlaneChroma[], 320 + unsigned int LBBitPerPixel[], 321 + unsigned int SwathWidthY[], 322 + double HRatio[], 323 + unsigned int vtaps[], 324 + unsigned int VTAPsChroma[], 325 + double VRatio[], 326 + unsigned int HTotal[], 327 + double PixelClock[], 328 + unsigned int BlendingAndTiming[], 329 + double BytePerPixelDETY[], 330 + double BytePerPixelDETC[], 331 + bool WritebackEnable[], 332 + enum source_format_class WritebackPixelFormat[], 333 + double WritebackDestinationWidth[], 334 + double WritebackDestinationHeight[], 335 + double WritebackSourceHeight[], 336 + enum clock_change_support *DRAMClockChangeSupport, 337 + double *UrgentWatermark, 338 + double *WritebackUrgentWatermark, 339 + double *DRAMClockChangeWatermark, 340 + double *WritebackDRAMClockChangeWatermark, 341 + double *StutterExitWatermark, 342 + double *StutterEnterPlusExitWatermark, 343 + double *MinActiveDRAMClockChangeLatencySupported); 344 + static void CalculateDCFCLKDeepSleep( 345 + struct display_mode_lib *mode_lib, 346 + unsigned int NumberOfActivePlanes, 347 + double BytePerPixelDETY[], 348 + double BytePerPixelDETC[], 349 + double VRatio[], 350 + unsigned int SwathWidthY[], 351 + int DPPPerPlane[], 352 + double HRatio[], 353 + double PixelClock[], 354 + double PSCL_THROUGHPUT[], 355 + double PSCL_THROUGHPUT_CHROMA[], 356 + double DPPCLK[], 357 + double *DCFCLKDeepSleep); 358 + static void CalculateDETBufferSize( 359 + double DETBufferSizeInKByte, 360 + unsigned int SwathHeightY, 361 + unsigned int SwathHeightC, 362 + double *DETBufferSizeY, 363 + double *DETBufferSizeC); 364 + static void CalculateUrgentBurstFactor( 365 + unsigned int DETBufferSizeInKByte, 366 + unsigned int SwathHeightY, 367 + unsigned int SwathHeightC, 368 + unsigned int SwathWidthY, 369 + double LineTime, 370 + double UrgentLatency, 371 + double CursorBufferSize, 372 + unsigned int CursorWidth, 373 + unsigned int CursorBPP, 374 + double VRatio, 375 + double VRatioPreY, 376 + double VRatioPreC, 377 + double BytePerPixelInDETY, 378 + double BytePerPixelInDETC, 379 + double *UrgentBurstFactorCursor, 380 + double *UrgentBurstFactorCursorPre, 381 + double *UrgentBurstFactorLuma, 382 + double *UrgentBurstFactorLumaPre, 383 + double *UrgentBurstFactorChroma, 384 + double *UrgentBurstFactorChromaPre, 385 + unsigned int *NotEnoughUrgentLatencyHiding, 386 + unsigned int *NotEnoughUrgentLatencyHidingPre); 387 + 388 + static void CalculatePixelDeliveryTimes( 389 + unsigned int NumberOfActivePlanes, 390 + double VRatio[], 391 + double VRatioPrefetchY[], 392 + double VRatioPrefetchC[], 393 + unsigned int swath_width_luma_ub[], 394 + unsigned int swath_width_chroma_ub[], 395 + int DPPPerPlane[], 396 + double HRatio[], 397 + double PixelClock[], 398 + double PSCL_THROUGHPUT[], 399 + double PSCL_THROUGHPUT_CHROMA[], 400 + double DPPCLK[], 401 + double BytePerPixelDETC[], 402 + enum scan_direction_class SourceScan[], 403 + unsigned int BlockWidth256BytesY[], 404 + unsigned int BlockHeight256BytesY[], 405 + unsigned int BlockWidth256BytesC[], 406 + unsigned int BlockHeight256BytesC[], 407 + double DisplayPipeLineDeliveryTimeLuma[], 408 + double DisplayPipeLineDeliveryTimeChroma[], 409 + double DisplayPipeLineDeliveryTimeLumaPrefetch[], 410 + double DisplayPipeLineDeliveryTimeChromaPrefetch[], 411 + double DisplayPipeRequestDeliveryTimeLuma[], 412 + double DisplayPipeRequestDeliveryTimeChroma[], 413 + double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 414 + double DisplayPipeRequestDeliveryTimeChromaPrefetch[]); 415 + 416 + static void CalculateMetaAndPTETimes( 417 + unsigned int NumberOfActivePlanes, 418 + bool GPUVMEnable, 419 + unsigned int MetaChunkSize, 420 + unsigned int MinMetaChunkSizeBytes, 421 + unsigned int GPUVMMaxPageTableLevels, 422 + unsigned int HTotal[], 423 + double VRatio[], 424 + double VRatioPrefetchY[], 425 + double VRatioPrefetchC[], 426 + double DestinationLinesToRequestRowInVBlank[], 427 + double DestinationLinesToRequestRowInImmediateFlip[], 428 + double DestinationLinesToRequestVMInVBlank[], 429 + double DestinationLinesToRequestVMInImmediateFlip[], 430 + bool DCCEnable[], 431 + double PixelClock[], 432 + double BytePerPixelDETY[], 433 + double BytePerPixelDETC[], 434 + enum scan_direction_class SourceScan[], 435 + unsigned int dpte_row_height[], 436 + unsigned int dpte_row_height_chroma[], 437 + unsigned int meta_row_width[], 438 + unsigned int meta_row_height[], 439 + unsigned int meta_req_width[], 440 + unsigned int meta_req_height[], 441 + long dpte_group_bytes[], 442 + unsigned int PTERequestSizeY[], 443 + unsigned int PTERequestSizeC[], 444 + unsigned int PixelPTEReqWidthY[], 445 + unsigned int PixelPTEReqHeightY[], 446 + unsigned int PixelPTEReqWidthC[], 447 + unsigned int PixelPTEReqHeightC[], 448 + unsigned int dpte_row_width_luma_ub[], 449 + unsigned int dpte_row_width_chroma_ub[], 450 + unsigned int vm_group_bytes[], 451 + unsigned int dpde0_bytes_per_frame_ub_l[], 452 + unsigned int dpde0_bytes_per_frame_ub_c[], 453 + unsigned int meta_pte_bytes_per_frame_ub_l[], 454 + unsigned int meta_pte_bytes_per_frame_ub_c[], 455 + double DST_Y_PER_PTE_ROW_NOM_L[], 456 + double DST_Y_PER_PTE_ROW_NOM_C[], 457 + double DST_Y_PER_META_ROW_NOM_L[], 458 + double TimePerMetaChunkNominal[], 459 + double TimePerMetaChunkVBlank[], 460 + double TimePerMetaChunkFlip[], 461 + double time_per_pte_group_nom_luma[], 462 + double time_per_pte_group_vblank_luma[], 463 + double time_per_pte_group_flip_luma[], 464 + double time_per_pte_group_nom_chroma[], 465 + double time_per_pte_group_vblank_chroma[], 466 + double time_per_pte_group_flip_chroma[], 467 + double TimePerVMGroupVBlank[], 468 + double TimePerVMGroupFlip[], 469 + double TimePerVMRequestVBlank[], 470 + double TimePerVMRequestFlip[]); 471 + 472 + static double CalculateExtraLatency( 473 + double UrgentRoundTripAndOutOfOrderLatency, 474 + int TotalNumberOfActiveDPP, 475 + int PixelChunkSizeInKByte, 476 + int TotalNumberOfDCCActiveDPP, 477 + int MetaChunkSize, 478 + double ReturnBW, 479 + bool GPUVMEnable, 480 + bool HostVMEnable, 481 + int NumberOfActivePlanes, 482 + int NumberOfDPP[], 483 + long dpte_group_bytes[], 484 + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 485 + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 486 + int HostVMMaxPageTableLevels, 487 + int HostVMCachedPageTableLevels); 488 + 489 + void dml21_recalculate(struct display_mode_lib *mode_lib) 490 + { 491 + ModeSupportAndSystemConfiguration(mode_lib); 492 + PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); 493 + DisplayPipeConfiguration(mode_lib); 494 + DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); 495 + } 496 + 497 + static unsigned int dscceComputeDelay( 498 + unsigned int bpc, 499 + double bpp, 500 + unsigned int sliceWidth, 501 + unsigned int numSlices, 502 + enum output_format_class pixelFormat) 503 + { 504 + // valid bpc = source bits per component in the set of {8, 10, 12} 505 + // valid bpp = increments of 1/16 of a bit 506 + // min = 6/7/8 in N420/N422/444, respectively 507 + // max = such that compression is 1:1 508 + //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 509 + //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 510 + //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 511 + 512 + // fixed value 513 + unsigned int rcModelSize = 8192; 514 + 515 + // N422/N420 operate at 2 pixels per clock 516 + unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, S, ix, wx, p, l0, a, ax, l, 517 + Delay, pixels; 518 + 519 + if (pixelFormat == dm_n422 || pixelFormat == dm_420) 520 + pixelsPerClock = 2; 521 + // #all other modes operate at 1 pixel per clock 522 + else 523 + pixelsPerClock = 1; 524 + 525 + //initial transmit delay as per PPS 526 + initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock); 527 + 528 + //compute ssm delay 529 + if (bpc == 8) 530 + D = 81; 531 + else if (bpc == 10) 532 + D = 89; 533 + else 534 + D = 113; 535 + 536 + //divide by pixel per cycle to compute slice width as seen by DSC 537 + w = sliceWidth / pixelsPerClock; 538 + 539 + //422 mode has an additional cycle of delay 540 + if (pixelFormat == dm_s422) 541 + S = 1; 542 + else 543 + S = 0; 544 + 545 + //main calculation for the dscce 546 + ix = initalXmitDelay + 45; 547 + wx = (w + 2) / 3; 548 + p = 3 * wx - w; 549 + l0 = ix / w; 550 + a = ix + p * l0; 551 + ax = (a + 2) / 3 + D + 6 + 1; 552 + l = (ax + wx - 1) / wx; 553 + if ((ix % w) == 0 && p != 0) 554 + lstall = 1; 555 + else 556 + lstall = 0; 557 + Delay = l * wx * (numSlices - 1) + ax + S + lstall + 22; 558 + 559 + //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 560 + pixels = Delay * 3 * pixelsPerClock; 561 + return pixels; 562 + } 563 + 564 + static unsigned int dscComputeDelay(enum output_format_class pixelFormat) 565 + { 566 + unsigned int Delay = 0; 567 + 568 + if (pixelFormat == dm_420) { 569 + // sfr 570 + Delay = Delay + 2; 571 + // dsccif 572 + Delay = Delay + 0; 573 + // dscc - input deserializer 574 + Delay = Delay + 3; 575 + // dscc gets pixels every other cycle 576 + Delay = Delay + 2; 577 + // dscc - input cdc fifo 578 + Delay = Delay + 12; 579 + // dscc gets pixels every other cycle 580 + Delay = Delay + 13; 581 + // dscc - cdc uncertainty 582 + Delay = Delay + 2; 583 + // dscc - output cdc fifo 584 + Delay = Delay + 7; 585 + // dscc gets pixels every other cycle 586 + Delay = Delay + 3; 587 + // dscc - cdc uncertainty 588 + Delay = Delay + 2; 589 + // dscc - output serializer 590 + Delay = Delay + 1; 591 + // sft 592 + Delay = Delay + 1; 593 + } else if (pixelFormat == dm_n422) { 594 + // sfr 595 + Delay = Delay + 2; 596 + // dsccif 597 + Delay = Delay + 1; 598 + // dscc - input deserializer 599 + Delay = Delay + 5; 600 + // dscc - input cdc fifo 601 + Delay = Delay + 25; 602 + // dscc - cdc uncertainty 603 + Delay = Delay + 2; 604 + // dscc - output cdc fifo 605 + Delay = Delay + 10; 606 + // dscc - cdc uncertainty 607 + Delay = Delay + 2; 608 + // dscc - output serializer 609 + Delay = Delay + 1; 610 + // sft 611 + Delay = Delay + 1; 612 + } else { 613 + // sfr 614 + Delay = Delay + 2; 615 + // dsccif 616 + Delay = Delay + 0; 617 + // dscc - input deserializer 618 + Delay = Delay + 3; 619 + // dscc - input cdc fifo 620 + Delay = Delay + 12; 621 + // dscc - cdc uncertainty 622 + Delay = Delay + 2; 623 + // dscc - output cdc fifo 624 + Delay = Delay + 7; 625 + // dscc - output serializer 626 + Delay = Delay + 1; 627 + // dscc - cdc uncertainty 628 + Delay = Delay + 2; 629 + // sft 630 + Delay = Delay + 1; 631 + } 632 + 633 + return Delay; 634 + } 635 + 636 + static bool CalculatePrefetchSchedule( 637 + struct display_mode_lib *mode_lib, 638 + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 639 + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 640 + Pipe *myPipe, 641 + unsigned int DSCDelay, 642 + double DPPCLKDelaySubtotal, 643 + double DPPCLKDelaySCL, 644 + double DPPCLKDelaySCLLBOnly, 645 + double DPPCLKDelayCNVCFormater, 646 + double DPPCLKDelayCNVCCursor, 647 + double DISPCLKDelaySubtotal, 648 + unsigned int ScalerRecoutWidth, 649 + enum output_format_class OutputFormat, 650 + unsigned int MaxInterDCNTileRepeaters, 651 + unsigned int VStartup, 652 + unsigned int MaxVStartup, 653 + unsigned int GPUVMPageTableLevels, 654 + bool GPUVMEnable, 655 + HostVM *myHostVM, 656 + bool DynamicMetadataEnable, 657 + int DynamicMetadataLinesBeforeActiveRequired, 658 + unsigned int DynamicMetadataTransmittedBytes, 659 + bool DCCEnable, 660 + double UrgentLatency, 661 + double UrgentExtraLatency, 662 + double TCalc, 663 + unsigned int PDEAndMetaPTEBytesFrame, 664 + unsigned int MetaRowByte, 665 + unsigned int PixelPTEBytesPerRow, 666 + double PrefetchSourceLinesY, 667 + unsigned int SwathWidthY, 668 + double BytePerPixelDETY, 669 + double VInitPreFillY, 670 + unsigned int MaxNumSwathY, 671 + double PrefetchSourceLinesC, 672 + double BytePerPixelDETC, 673 + double VInitPreFillC, 674 + unsigned int MaxNumSwathC, 675 + unsigned int SwathHeightY, 676 + unsigned int SwathHeightC, 677 + double TWait, 678 + bool XFCEnabled, 679 + double XFCRemoteSurfaceFlipDelay, 680 + bool ProgressiveToInterlaceUnitInOPP, 681 + double *DSTXAfterScaler, 682 + double *DSTYAfterScaler, 683 + double *DestinationLinesForPrefetch, 684 + double *PrefetchBandwidth, 685 + double *DestinationLinesToRequestVMInVBlank, 686 + double *DestinationLinesToRequestRowInVBlank, 687 + double *VRatioPrefetchY, 688 + double *VRatioPrefetchC, 689 + double *RequiredPrefetchPixDataBWLuma, 690 + double *RequiredPrefetchPixDataBWChroma, 691 + unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, 692 + double *Tno_bw, 693 + double *prefetch_vmrow_bw, 694 + unsigned int *swath_width_luma_ub, 695 + unsigned int *swath_width_chroma_ub, 696 + unsigned int *VUpdateOffsetPix, 697 + double *VUpdateWidthPix, 698 + double *VReadyOffsetPix) 699 + { 700 + bool MyError = false; 701 + unsigned int DPPCycles, DISPCLKCycles; 702 + double DSTTotalPixelsAfterScaler, TotalRepeaterDelayTime; 703 + double Tdm, LineTime, Tsetup; 704 + double dst_y_prefetch_equ; 705 + double Tsw_oto; 706 + double prefetch_bw_oto; 707 + double Tvm_oto; 708 + double Tr0_oto; 709 + double Tvm_oto_lines; 710 + double Tr0_oto_lines; 711 + double Tsw_oto_lines; 712 + double dst_y_prefetch_oto; 713 + double TimeForFetchingMetaPTE = 0; 714 + double TimeForFetchingRowInVBlank = 0; 715 + double LinesToRequestPrefetchPixelData = 0; 716 + double HostVMInefficiencyFactor; 717 + unsigned int HostVMDynamicLevels; 718 + 719 + if (GPUVMEnable == true && myHostVM->Enable == true) { 720 + HostVMInefficiencyFactor = 721 + PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData 722 + / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; 723 + HostVMDynamicLevels = myHostVM->MaxPageTableLevels 724 + - myHostVM->CachedPageTableLevels; 725 + } else { 726 + HostVMInefficiencyFactor = 1; 727 + HostVMDynamicLevels = 0; 728 + } 729 + 730 + if (myPipe->ScalerEnabled) 731 + DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL; 732 + else 733 + DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly; 734 + 735 + DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 736 + 737 + DISPCLKCycles = DISPCLKDelaySubtotal; 738 + 739 + if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) 740 + return true; 741 + 742 + *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK 743 + + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; 744 + 745 + if (myPipe->DPPPerPlane > 1) 746 + *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth; 747 + 748 + if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP)) 749 + *DSTYAfterScaler = 1; 750 + else 751 + *DSTYAfterScaler = 0; 752 + 753 + DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * myPipe->HTotal)) + *DSTXAfterScaler; 754 + *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 755 + *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 756 + 757 + *VUpdateOffsetPix = dml_ceil(myPipe->HTotal / 4.0, 1); 758 + TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / myPipe->DPPCLK + 3.0 / myPipe->DISPCLK); 759 + *VUpdateWidthPix = (14.0 / myPipe->DCFCLKDeepSleep + 12.0 / myPipe->DPPCLK + TotalRepeaterDelayTime) 760 + * myPipe->PixelClock; 761 + 762 + *VReadyOffsetPix = dml_max( 763 + 150.0 / myPipe->DPPCLK, 764 + TotalRepeaterDelayTime + 20.0 / myPipe->DCFCLKDeepSleep + 10.0 / myPipe->DPPCLK) 765 + * myPipe->PixelClock; 766 + 767 + Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / myPipe->PixelClock; 768 + 769 + LineTime = (double) myPipe->HTotal / myPipe->PixelClock; 770 + 771 + if (DynamicMetadataEnable) { 772 + double Tdmbf, Tdmec, Tdmsks; 773 + 774 + Tdm = dml_max(0.0, UrgentExtraLatency - TCalc); 775 + Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / myPipe->DISPCLK; 776 + Tdmec = LineTime; 777 + if (DynamicMetadataLinesBeforeActiveRequired == -1) 778 + Tdmsks = myPipe->VBlank * LineTime / 2.0; 779 + else 780 + Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime; 781 + if (myPipe->InterlaceEnable && !ProgressiveToInterlaceUnitInOPP) 782 + Tdmsks = Tdmsks / 2; 783 + if (VStartup * LineTime 784 + < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) { 785 + MyError = true; 786 + *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = (Tsetup + TWait 787 + + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) / LineTime; 788 + } else 789 + *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = 0.0; 790 + } else 791 + Tdm = 0; 792 + 793 + if (GPUVMEnable) { 794 + if (GPUVMPageTableLevels >= 3) 795 + *Tno_bw = UrgentExtraLatency + UrgentLatency * ((GPUVMPageTableLevels - 2) * (myHostVM->MaxPageTableLevels + 1) - 1); 796 + else 797 + *Tno_bw = 0; 798 + } else if (!DCCEnable) 799 + *Tno_bw = LineTime; 800 + else 801 + *Tno_bw = LineTime / 4; 802 + 803 + dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime 804 + - (Tsetup + Tdm) / LineTime 805 + - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); 806 + 807 + Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; 808 + 809 + if (myPipe->SourceScan == dm_horz) { 810 + *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY; 811 + *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC; 812 + } else { 813 + *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY; 814 + *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC; 815 + } 816 + 817 + prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto; 818 + 819 + 820 + if (GPUVMEnable == true) { 821 + Tvm_oto = dml_max(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, 822 + dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1), 823 + LineTime / 4.0)); 824 + } else 825 + Tvm_oto = LineTime / 4.0; 826 + 827 + if ((GPUVMEnable == true || DCCEnable == true)) { 828 + Tr0_oto = dml_max( 829 + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, 830 + dml_max(UrgentLatency * (HostVMDynamicLevels + 1), dml_max(LineTime - Tvm_oto, LineTime / 4))); 831 + } else 832 + Tr0_oto = (LineTime - Tvm_oto) / 2.0; 833 + 834 + Tvm_oto_lines = dml_ceil(4 * Tvm_oto / LineTime, 1) / 4.0; 835 + Tr0_oto_lines = dml_ceil(4 * Tr0_oto / LineTime, 1) / 4.0; 836 + Tsw_oto_lines = dml_ceil(4 * Tsw_oto / LineTime, 1) / 4.0; 837 + dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Tsw_oto_lines + 0.75; 838 + 839 + dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 840 + 841 + if (dst_y_prefetch_oto < dst_y_prefetch_equ) 842 + *DestinationLinesForPrefetch = dst_y_prefetch_oto; 843 + else 844 + *DestinationLinesForPrefetch = dst_y_prefetch_equ; 845 + 846 + dml_print("DML: VStartup: %d\n", VStartup); 847 + dml_print("DML: TCalc: %f\n", TCalc); 848 + dml_print("DML: TWait: %f\n", TWait); 849 + dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay); 850 + dml_print("DML: LineTime: %f\n", LineTime); 851 + dml_print("DML: Tsetup: %f\n", Tsetup); 852 + dml_print("DML: Tdm: %f\n", Tdm); 853 + dml_print("DML: DSTYAfterScaler: %f\n", *DSTYAfterScaler); 854 + dml_print("DML: DSTXAfterScaler: %f\n", *DSTXAfterScaler); 855 + dml_print("DML: HTotal: %d\n", myPipe->HTotal); 856 + 857 + *PrefetchBandwidth = 0; 858 + *DestinationLinesToRequestVMInVBlank = 0; 859 + *DestinationLinesToRequestRowInVBlank = 0; 860 + *VRatioPrefetchY = 0; 861 + *VRatioPrefetchC = 0; 862 + *RequiredPrefetchPixDataBWLuma = 0; 863 + if (*DestinationLinesForPrefetch > 1) { 864 + double PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 865 + + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 866 + + PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) 867 + + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) 868 + / (*DestinationLinesForPrefetch * LineTime - *Tno_bw); 869 + 870 + double PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * 871 + HostVMInefficiencyFactor + PrefetchSourceLinesY * 872 + *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + 873 + PrefetchSourceLinesC * *swath_width_chroma_ub * 874 + dml_ceil(BytePerPixelDETC, 2)) / 875 + (*DestinationLinesForPrefetch * LineTime - *Tno_bw - 2 * 876 + UrgentLatency * (1 + HostVMDynamicLevels)); 877 + 878 + double PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow 879 + * HostVMInefficiencyFactor + PrefetchSourceLinesY * 880 + *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + 881 + PrefetchSourceLinesC * *swath_width_chroma_ub * 882 + dml_ceil(BytePerPixelDETC, 2)) / 883 + (*DestinationLinesForPrefetch * LineTime - 884 + UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels 885 + * (HostVMDynamicLevels + 1) - 1)); 886 + 887 + double PrefetchBandwidth4 = (PrefetchSourceLinesY * *swath_width_luma_ub * 888 + dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * 889 + *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / 890 + (*DestinationLinesForPrefetch * LineTime - 891 + UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels 892 + * (HostVMDynamicLevels + 1) - 1) - 2 * UrgentLatency * 893 + (1 + HostVMDynamicLevels)); 894 + 895 + if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw > 0) { 896 + PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / ((*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw); 897 + } 898 + if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= UrgentLatency * (1 + HostVMDynamicLevels)) { 899 + *PrefetchBandwidth = PrefetchBandwidth1; 900 + } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < UrgentLatency * (1 + HostVMDynamicLevels)) { 901 + *PrefetchBandwidth = PrefetchBandwidth2; 902 + } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= UrgentLatency * (1 + HostVMDynamicLevels)) { 903 + *PrefetchBandwidth = PrefetchBandwidth3; 904 + } else { 905 + *PrefetchBandwidth = PrefetchBandwidth4; 906 + } 907 + 908 + if (GPUVMEnable) { 909 + TimeForFetchingMetaPTE = dml_max(*Tno_bw + (double) PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / *PrefetchBandwidth, 910 + dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1), LineTime / 4)); 911 + } else { 912 + // 5/30/2018 - This was an optimization requested from Sy but now NumberOfCursors is no longer a factor 913 + // so if this needs to be reinstated, then it should be officially done in the VBA code as well. 914 + // if (mode_lib->NumberOfCursors > 0 || XFCEnabled) 915 + TimeForFetchingMetaPTE = LineTime / 4; 916 + // else 917 + // TimeForFetchingMetaPTE = 0.0; 918 + } 919 + 920 + if ((GPUVMEnable == true || DCCEnable == true)) { 921 + TimeForFetchingRowInVBlank = 922 + dml_max( 923 + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 924 + / *PrefetchBandwidth, 925 + dml_max( 926 + UrgentLatency * (1 + HostVMDynamicLevels), 927 + dml_max( 928 + (LineTime 929 + - TimeForFetchingMetaPTE) / 2.0, 930 + LineTime 931 + / 4.0))); 932 + } else { 933 + // See note above dated 5/30/2018 934 + // if (NumberOfCursors > 0 || XFCEnabled) 935 + TimeForFetchingRowInVBlank = (LineTime - TimeForFetchingMetaPTE) / 2.0; 936 + // else // TODO: Did someone else add this?? 937 + // TimeForFetchingRowInVBlank = 0.0; 938 + } 939 + 940 + *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 941 + 942 + *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 943 + 944 + LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch 945 + // See note above dated 5/30/2018 946 + // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? 947 + - ((GPUVMEnable || DCCEnable) ? 948 + (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 949 + 0.0); // TODO: Did someone else add this?? 950 + 951 + if (LinesToRequestPrefetchPixelData > 0) { 952 + 953 + *VRatioPrefetchY = (double) PrefetchSourceLinesY 954 + / LinesToRequestPrefetchPixelData; 955 + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 956 + if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 957 + if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 958 + *VRatioPrefetchY = 959 + dml_max( 960 + (double) PrefetchSourceLinesY 961 + / LinesToRequestPrefetchPixelData, 962 + (double) MaxNumSwathY 963 + * SwathHeightY 964 + / (LinesToRequestPrefetchPixelData 965 + - (VInitPreFillY 966 + - 3.0) 967 + / 2.0)); 968 + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 969 + } else { 970 + MyError = true; 971 + *VRatioPrefetchY = 0; 972 + } 973 + } 974 + 975 + *VRatioPrefetchC = (double) PrefetchSourceLinesC 976 + / LinesToRequestPrefetchPixelData; 977 + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 978 + 979 + if ((SwathHeightC > 4)) { 980 + if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 981 + *VRatioPrefetchC = 982 + dml_max( 983 + *VRatioPrefetchC, 984 + (double) MaxNumSwathC 985 + * SwathHeightC 986 + / (LinesToRequestPrefetchPixelData 987 + - (VInitPreFillC 988 + - 3.0) 989 + / 2.0)); 990 + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 991 + } else { 992 + MyError = true; 993 + *VRatioPrefetchC = 0; 994 + } 995 + } 996 + 997 + *RequiredPrefetchPixDataBWLuma = myPipe->DPPPerPlane 998 + * (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData 999 + * dml_ceil(BytePerPixelDETY, 1) 1000 + * *swath_width_luma_ub / LineTime; 1001 + *RequiredPrefetchPixDataBWChroma = myPipe->DPPPerPlane 1002 + * (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData 1003 + * dml_ceil(BytePerPixelDETC, 2) 1004 + * *swath_width_chroma_ub / LineTime; 1005 + } else { 1006 + MyError = true; 1007 + *VRatioPrefetchY = 0; 1008 + *VRatioPrefetchC = 0; 1009 + *RequiredPrefetchPixDataBWLuma = 0; 1010 + *RequiredPrefetchPixDataBWChroma = 0; 1011 + } 1012 + 1013 + dml_print("DML: Tvm: %fus\n", TimeForFetchingMetaPTE); 1014 + dml_print("DML: Tr0: %fus\n", TimeForFetchingRowInVBlank); 1015 + dml_print("DML: Tsw: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime - TimeForFetchingMetaPTE - TimeForFetchingRowInVBlank); 1016 + dml_print("DML: Tpre: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime); 1017 + dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); 1018 + 1019 + } else { 1020 + MyError = true; 1021 + } 1022 + 1023 + { 1024 + double prefetch_vm_bw; 1025 + double prefetch_row_bw; 1026 + 1027 + if (PDEAndMetaPTEBytesFrame == 0) { 1028 + prefetch_vm_bw = 0; 1029 + } else if (*DestinationLinesToRequestVMInVBlank > 0) { 1030 + prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); 1031 + } else { 1032 + prefetch_vm_bw = 0; 1033 + MyError = true; 1034 + } 1035 + if (MetaRowByte + PixelPTEBytesPerRow == 0) { 1036 + prefetch_row_bw = 0; 1037 + } else if (*DestinationLinesToRequestRowInVBlank > 0) { 1038 + prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); 1039 + } else { 1040 + prefetch_row_bw = 0; 1041 + MyError = true; 1042 + } 1043 + 1044 + *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 1045 + } 1046 + 1047 + if (MyError) { 1048 + *PrefetchBandwidth = 0; 1049 + TimeForFetchingMetaPTE = 0; 1050 + TimeForFetchingRowInVBlank = 0; 1051 + *DestinationLinesToRequestVMInVBlank = 0; 1052 + *DestinationLinesToRequestRowInVBlank = 0; 1053 + *DestinationLinesForPrefetch = 0; 1054 + LinesToRequestPrefetchPixelData = 0; 1055 + *VRatioPrefetchY = 0; 1056 + *VRatioPrefetchC = 0; 1057 + *RequiredPrefetchPixDataBWLuma = 0; 1058 + *RequiredPrefetchPixDataBWChroma = 0; 1059 + } 1060 + 1061 + return MyError; 1062 + } 1063 + 1064 + static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) 1065 + { 1066 + return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); 1067 + } 1068 + 1069 + static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) 1070 + { 1071 + return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1); 1072 + } 1073 + 1074 + static double CalculateDCCConfiguration( 1075 + bool DCCEnabled, 1076 + bool DCCProgrammingAssumesScanDirectionUnknown, 1077 + unsigned int ViewportWidth, 1078 + unsigned int ViewportHeight, 1079 + double DETBufferSize, 1080 + unsigned int RequestHeight256Byte, 1081 + unsigned int SwathHeight, 1082 + enum dm_swizzle_mode TilingFormat, 1083 + unsigned int BytePerPixel, 1084 + enum scan_direction_class ScanOrientation, 1085 + unsigned int *MaxUncompressedBlock, 1086 + unsigned int *MaxCompressedBlock, 1087 + unsigned int *Independent64ByteBlock) 1088 + { 1089 + double MaximumDCCCompressionSurface = 0.0; 1090 + enum { 1091 + REQ_256Bytes, 1092 + REQ_128BytesNonContiguous, 1093 + REQ_128BytesContiguous, 1094 + REQ_NA 1095 + } Request = REQ_NA; 1096 + 1097 + if (DCCEnabled == true) { 1098 + if (DCCProgrammingAssumesScanDirectionUnknown == true) { 1099 + if (DETBufferSize >= RequestHeight256Byte * ViewportWidth * BytePerPixel 1100 + && DETBufferSize 1101 + >= 256 / RequestHeight256Byte 1102 + * ViewportHeight) { 1103 + Request = REQ_256Bytes; 1104 + } else if ((DETBufferSize 1105 + < RequestHeight256Byte * ViewportWidth * BytePerPixel 1106 + && (BytePerPixel == 2 || BytePerPixel == 4)) 1107 + || (DETBufferSize 1108 + < 256 / RequestHeight256Byte 1109 + * ViewportHeight 1110 + && BytePerPixel == 8 1111 + && (TilingFormat == dm_sw_4kb_d 1112 + || TilingFormat 1113 + == dm_sw_4kb_d_x 1114 + || TilingFormat 1115 + == dm_sw_var_d 1116 + || TilingFormat 1117 + == dm_sw_var_d_x 1118 + || TilingFormat 1119 + == dm_sw_64kb_d 1120 + || TilingFormat 1121 + == dm_sw_64kb_d_x 1122 + || TilingFormat 1123 + == dm_sw_64kb_d_t 1124 + || TilingFormat 1125 + == dm_sw_64kb_r_x))) { 1126 + Request = REQ_128BytesNonContiguous; 1127 + } else { 1128 + Request = REQ_128BytesContiguous; 1129 + } 1130 + } else { 1131 + if (BytePerPixel == 1) { 1132 + if (ScanOrientation == dm_vert || SwathHeight == 16) { 1133 + Request = REQ_256Bytes; 1134 + } else { 1135 + Request = REQ_128BytesContiguous; 1136 + } 1137 + } else if (BytePerPixel == 2) { 1138 + if ((ScanOrientation == dm_vert && SwathHeight == 16) || (ScanOrientation != dm_vert && SwathHeight == 8)) { 1139 + Request = REQ_256Bytes; 1140 + } else if (ScanOrientation == dm_vert) { 1141 + Request = REQ_128BytesContiguous; 1142 + } else { 1143 + Request = REQ_128BytesNonContiguous; 1144 + } 1145 + } else if (BytePerPixel == 4) { 1146 + if (SwathHeight == 8) { 1147 + Request = REQ_256Bytes; 1148 + } else if (ScanOrientation == dm_vert) { 1149 + Request = REQ_128BytesContiguous; 1150 + } else { 1151 + Request = REQ_128BytesNonContiguous; 1152 + } 1153 + } else if (BytePerPixel == 8) { 1154 + if (TilingFormat == dm_sw_4kb_d || TilingFormat == dm_sw_4kb_d_x 1155 + || TilingFormat == dm_sw_var_d 1156 + || TilingFormat == dm_sw_var_d_x 1157 + || TilingFormat == dm_sw_64kb_d 1158 + || TilingFormat == dm_sw_64kb_d_x 1159 + || TilingFormat == dm_sw_64kb_d_t 1160 + || TilingFormat == dm_sw_64kb_r_x) { 1161 + if ((ScanOrientation == dm_vert && SwathHeight == 8) 1162 + || (ScanOrientation != dm_vert 1163 + && SwathHeight == 4)) { 1164 + Request = REQ_256Bytes; 1165 + } else if (ScanOrientation != dm_vert) { 1166 + Request = REQ_128BytesContiguous; 1167 + } else { 1168 + Request = REQ_128BytesNonContiguous; 1169 + } 1170 + } else { 1171 + if (ScanOrientation != dm_vert || SwathHeight == 8) { 1172 + Request = REQ_256Bytes; 1173 + } else { 1174 + Request = REQ_128BytesContiguous; 1175 + } 1176 + } 1177 + } 1178 + } 1179 + } else { 1180 + Request = REQ_NA; 1181 + } 1182 + 1183 + if (Request == REQ_256Bytes) { 1184 + *MaxUncompressedBlock = 256; 1185 + *MaxCompressedBlock = 256; 1186 + *Independent64ByteBlock = false; 1187 + MaximumDCCCompressionSurface = 4.0; 1188 + } else if (Request == REQ_128BytesContiguous) { 1189 + *MaxUncompressedBlock = 128; 1190 + *MaxCompressedBlock = 128; 1191 + *Independent64ByteBlock = false; 1192 + MaximumDCCCompressionSurface = 2.0; 1193 + } else if (Request == REQ_128BytesNonContiguous) { 1194 + *MaxUncompressedBlock = 256; 1195 + *MaxCompressedBlock = 64; 1196 + *Independent64ByteBlock = true; 1197 + MaximumDCCCompressionSurface = 4.0; 1198 + } else { 1199 + *MaxUncompressedBlock = 0; 1200 + *MaxCompressedBlock = 0; 1201 + *Independent64ByteBlock = 0; 1202 + MaximumDCCCompressionSurface = 0.0; 1203 + } 1204 + 1205 + return MaximumDCCCompressionSurface; 1206 + } 1207 + 1208 + static double CalculatePrefetchSourceLines( 1209 + struct display_mode_lib *mode_lib, 1210 + double VRatio, 1211 + double vtaps, 1212 + bool Interlace, 1213 + bool ProgressiveToInterlaceUnitInOPP, 1214 + unsigned int SwathHeight, 1215 + unsigned int ViewportYStart, 1216 + double *VInitPreFill, 1217 + unsigned int *MaxNumSwath) 1218 + { 1219 + unsigned int MaxPartialSwath; 1220 + 1221 + if (ProgressiveToInterlaceUnitInOPP) 1222 + *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); 1223 + else 1224 + *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 1225 + 1226 + if (!mode_lib->vba.IgnoreViewportPositioning) { 1227 + 1228 + *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; 1229 + 1230 + if (*VInitPreFill > 1.0) 1231 + MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; 1232 + else 1233 + MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) 1234 + % SwathHeight; 1235 + MaxPartialSwath = dml_max(1U, MaxPartialSwath); 1236 + 1237 + } else { 1238 + 1239 + if (ViewportYStart != 0) 1240 + dml_print( 1241 + "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); 1242 + 1243 + *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); 1244 + 1245 + if (*VInitPreFill > 1.0) 1246 + MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; 1247 + else 1248 + MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) 1249 + % SwathHeight; 1250 + } 1251 + 1252 + return *MaxNumSwath * SwathHeight + MaxPartialSwath; 1253 + } 1254 + 1255 + static unsigned int CalculateVMAndRowBytes( 1256 + struct display_mode_lib *mode_lib, 1257 + bool DCCEnable, 1258 + unsigned int BlockHeight256Bytes, 1259 + unsigned int BlockWidth256Bytes, 1260 + enum source_format_class SourcePixelFormat, 1261 + unsigned int SurfaceTiling, 1262 + unsigned int BytePerPixel, 1263 + enum scan_direction_class ScanDirection, 1264 + unsigned int ViewportWidth, 1265 + unsigned int ViewportHeight, 1266 + unsigned int SwathWidth, 1267 + bool GPUVMEnable, 1268 + bool HostVMEnable, 1269 + unsigned int HostVMMaxPageTableLevels, 1270 + unsigned int HostVMCachedPageTableLevels, 1271 + unsigned int VMMPageSize, 1272 + unsigned int PTEBufferSizeInRequests, 1273 + unsigned int Pitch, 1274 + unsigned int DCCMetaPitch, 1275 + unsigned int *MacroTileWidth, 1276 + unsigned int *MetaRowByte, 1277 + unsigned int *PixelPTEBytesPerRow, 1278 + bool *PTEBufferSizeNotExceeded, 1279 + unsigned int *dpte_row_width_ub, 1280 + unsigned int *dpte_row_height, 1281 + unsigned int *MetaRequestWidth, 1282 + unsigned int *MetaRequestHeight, 1283 + unsigned int *meta_row_width, 1284 + unsigned int *meta_row_height, 1285 + unsigned int *vm_group_bytes, 1286 + long *dpte_group_bytes, 1287 + unsigned int *PixelPTEReqWidth, 1288 + unsigned int *PixelPTEReqHeight, 1289 + unsigned int *PTERequestSize, 1290 + unsigned int *DPDE0BytesFrame, 1291 + unsigned int *MetaPTEBytesFrame) 1292 + { 1293 + unsigned int MPDEBytesFrame; 1294 + unsigned int DCCMetaSurfaceBytes; 1295 + unsigned int MacroTileSizeBytes; 1296 + unsigned int MacroTileHeight; 1297 + unsigned int ExtraDPDEBytesFrame; 1298 + unsigned int PDEAndMetaPTEBytesFrame; 1299 + unsigned int PixelPTEReqHeightPTEs; 1300 + 1301 + if (DCCEnable == true) { 1302 + *MetaRequestHeight = 8 * BlockHeight256Bytes; 1303 + *MetaRequestWidth = 8 * BlockWidth256Bytes; 1304 + if (ScanDirection == dm_horz) { 1305 + *meta_row_height = *MetaRequestHeight; 1306 + *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) 1307 + + *MetaRequestWidth; 1308 + *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 1309 + } else { 1310 + *meta_row_height = *MetaRequestWidth; 1311 + *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) 1312 + + *MetaRequestHeight; 1313 + *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 1314 + } 1315 + if (ScanDirection == dm_horz) { 1316 + DCCMetaSurfaceBytes = DCCMetaPitch 1317 + * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) 1318 + + 64 * BlockHeight256Bytes) * BytePerPixel 1319 + / 256; 1320 + } else { 1321 + DCCMetaSurfaceBytes = DCCMetaPitch 1322 + * (dml_ceil( 1323 + (double) ViewportHeight - 1, 1324 + 64 * BlockHeight256Bytes) 1325 + + 64 * BlockHeight256Bytes) * BytePerPixel 1326 + / 256; 1327 + } 1328 + if (GPUVMEnable == true) { 1329 + *MetaPTEBytesFrame = (dml_ceil( 1330 + (double) (DCCMetaSurfaceBytes - VMMPageSize) 1331 + / (8 * VMMPageSize), 1332 + 1) + 1) * 64; 1333 + MPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 2); 1334 + } else { 1335 + *MetaPTEBytesFrame = 0; 1336 + MPDEBytesFrame = 0; 1337 + } 1338 + } else { 1339 + *MetaPTEBytesFrame = 0; 1340 + MPDEBytesFrame = 0; 1341 + *MetaRowByte = 0; 1342 + } 1343 + 1344 + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { 1345 + MacroTileSizeBytes = 256; 1346 + MacroTileHeight = BlockHeight256Bytes; 1347 + } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x 1348 + || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) { 1349 + MacroTileSizeBytes = 4096; 1350 + MacroTileHeight = 4 * BlockHeight256Bytes; 1351 + } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t 1352 + || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d 1353 + || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x 1354 + || SurfaceTiling == dm_sw_64kb_r_x) { 1355 + MacroTileSizeBytes = 65536; 1356 + MacroTileHeight = 16 * BlockHeight256Bytes; 1357 + } else { 1358 + MacroTileSizeBytes = 262144; 1359 + MacroTileHeight = 32 * BlockHeight256Bytes; 1360 + } 1361 + *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; 1362 + 1363 + if (GPUVMEnable == true && (mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) > 2) { 1364 + if (ScanDirection == dm_horz) { 1365 + *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1); 1366 + } else { 1367 + *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1); 1368 + } 1369 + ExtraDPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 3); 1370 + } else { 1371 + *DPDE0BytesFrame = 0; 1372 + ExtraDPDEBytesFrame = 0; 1373 + } 1374 + 1375 + PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame 1376 + + ExtraDPDEBytesFrame; 1377 + 1378 + if (HostVMEnable == true) { 1379 + PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels)); 1380 + } 1381 + 1382 + if (GPUVMEnable == true) { 1383 + double FractionOfPTEReturnDrop; 1384 + 1385 + if (SurfaceTiling == dm_sw_linear) { 1386 + PixelPTEReqHeightPTEs = 1; 1387 + *PixelPTEReqHeight = 1; 1388 + *PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel; 1389 + *PTERequestSize = 64; 1390 + FractionOfPTEReturnDrop = 0; 1391 + } else if (MacroTileSizeBytes == 4096) { 1392 + PixelPTEReqHeightPTEs = 1; 1393 + *PixelPTEReqHeight = MacroTileHeight; 1394 + *PixelPTEReqWidth = 8 * *MacroTileWidth; 1395 + *PTERequestSize = 64; 1396 + if (ScanDirection == dm_horz) 1397 + FractionOfPTEReturnDrop = 0; 1398 + else 1399 + FractionOfPTEReturnDrop = 7 / 8; 1400 + } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) { 1401 + PixelPTEReqHeightPTEs = 16; 1402 + *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 1403 + *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 1404 + *PTERequestSize = 128; 1405 + FractionOfPTEReturnDrop = 0; 1406 + } else { 1407 + PixelPTEReqHeightPTEs = 1; 1408 + *PixelPTEReqHeight = MacroTileHeight; 1409 + *PixelPTEReqWidth = 8 * *MacroTileWidth; 1410 + *PTERequestSize = 64; 1411 + FractionOfPTEReturnDrop = 0; 1412 + } 1413 + 1414 + if (SurfaceTiling == dm_sw_linear) { 1415 + *dpte_row_height = dml_min(128, 1416 + 1 << (unsigned int) dml_floor( 1417 + dml_log2( 1418 + (double) PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1419 + 1)); 1420 + *dpte_row_width_ub = (dml_ceil((double) (Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 1421 + *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 1422 + } else if (ScanDirection == dm_horz) { 1423 + *dpte_row_height = *PixelPTEReqHeight; 1424 + *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 1425 + *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 1426 + } else { 1427 + *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); 1428 + *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; 1429 + *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 1430 + } 1431 + if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) 1432 + <= 64 * PTEBufferSizeInRequests) { 1433 + *PTEBufferSizeNotExceeded = true; 1434 + } else { 1435 + *PTEBufferSizeNotExceeded = false; 1436 + } 1437 + } else { 1438 + *PixelPTEBytesPerRow = 0; 1439 + *PTEBufferSizeNotExceeded = true; 1440 + } 1441 + dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %d\n", *MetaPTEBytesFrame); 1442 + 1443 + if (HostVMEnable == true) { 1444 + *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels)); 1445 + } 1446 + 1447 + if (HostVMEnable == true) { 1448 + *vm_group_bytes = 512; 1449 + *dpte_group_bytes = 512; 1450 + } else if (GPUVMEnable == true) { 1451 + *vm_group_bytes = 2048; 1452 + if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection != dm_horz) { 1453 + *dpte_group_bytes = 512; 1454 + } else { 1455 + *dpte_group_bytes = 2048; 1456 + } 1457 + } else { 1458 + *vm_group_bytes = 0; 1459 + *dpte_group_bytes = 0; 1460 + } 1461 + 1462 + return PDEAndMetaPTEBytesFrame; 1463 + } 1464 + 1465 + static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( 1466 + struct display_mode_lib *mode_lib) 1467 + { 1468 + struct vba_vars_st *locals = &mode_lib->vba; 1469 + unsigned int j, k; 1470 + 1471 + mode_lib->vba.WritebackDISPCLK = 0.0; 1472 + mode_lib->vba.DISPCLKWithRamping = 0; 1473 + mode_lib->vba.DISPCLKWithoutRamping = 0; 1474 + mode_lib->vba.GlobalDPPCLK = 0.0; 1475 + 1476 + // DISPCLK and DPPCLK Calculation 1477 + // 1478 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 1479 + if (mode_lib->vba.WritebackEnable[k]) { 1480 + mode_lib->vba.WritebackDISPCLK = 1481 + dml_max( 1482 + mode_lib->vba.WritebackDISPCLK, 1483 + CalculateWriteBackDISPCLK( 1484 + mode_lib->vba.WritebackPixelFormat[k], 1485 + mode_lib->vba.PixelClock[k], 1486 + mode_lib->vba.WritebackHRatio[k], 1487 + mode_lib->vba.WritebackVRatio[k], 1488 + mode_lib->vba.WritebackLumaHTaps[k], 1489 + mode_lib->vba.WritebackLumaVTaps[k], 1490 + mode_lib->vba.WritebackChromaHTaps[k], 1491 + mode_lib->vba.WritebackChromaVTaps[k], 1492 + mode_lib->vba.WritebackDestinationWidth[k], 1493 + mode_lib->vba.HTotal[k], 1494 + mode_lib->vba.WritebackChromaLineBufferWidth)); 1495 + } 1496 + } 1497 + 1498 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 1499 + if (mode_lib->vba.HRatio[k] > 1) { 1500 + locals->PSCL_THROUGHPUT_LUMA[k] = dml_min( 1501 + mode_lib->vba.MaxDCHUBToPSCLThroughput, 1502 + mode_lib->vba.MaxPSCLToLBThroughput 1503 + * mode_lib->vba.HRatio[k] 1504 + / dml_ceil( 1505 + mode_lib->vba.htaps[k] 1506 + / 6.0, 1507 + 1)); 1508 + } else { 1509 + locals->PSCL_THROUGHPUT_LUMA[k] = dml_min( 1510 + mode_lib->vba.MaxDCHUBToPSCLThroughput, 1511 + mode_lib->vba.MaxPSCLToLBThroughput); 1512 + } 1513 + 1514 + mode_lib->vba.DPPCLKUsingSingleDPPLuma = 1515 + mode_lib->vba.PixelClock[k] 1516 + * dml_max( 1517 + mode_lib->vba.vtaps[k] / 6.0 1518 + * dml_min( 1519 + 1.0, 1520 + mode_lib->vba.HRatio[k]), 1521 + dml_max( 1522 + mode_lib->vba.HRatio[k] 1523 + * mode_lib->vba.VRatio[k] 1524 + / locals->PSCL_THROUGHPUT_LUMA[k], 1525 + 1.0)); 1526 + 1527 + if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6) 1528 + && mode_lib->vba.DPPCLKUsingSingleDPPLuma 1529 + < 2 * mode_lib->vba.PixelClock[k]) { 1530 + mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k]; 1531 + } 1532 + 1533 + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 1534 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { 1535 + locals->PSCL_THROUGHPUT_CHROMA[k] = 0.0; 1536 + locals->DPPCLKUsingSingleDPP[k] = 1537 + mode_lib->vba.DPPCLKUsingSingleDPPLuma; 1538 + } else { 1539 + if (mode_lib->vba.HRatio[k] > 1) { 1540 + locals->PSCL_THROUGHPUT_CHROMA[k] = 1541 + dml_min( 1542 + mode_lib->vba.MaxDCHUBToPSCLThroughput, 1543 + mode_lib->vba.MaxPSCLToLBThroughput 1544 + * mode_lib->vba.HRatio[k] 1545 + / 2 1546 + / dml_ceil( 1547 + mode_lib->vba.HTAPsChroma[k] 1548 + / 6.0, 1549 + 1.0)); 1550 + } else { 1551 + locals->PSCL_THROUGHPUT_CHROMA[k] = dml_min( 1552 + mode_lib->vba.MaxDCHUBToPSCLThroughput, 1553 + mode_lib->vba.MaxPSCLToLBThroughput); 1554 + } 1555 + mode_lib->vba.DPPCLKUsingSingleDPPChroma = 1556 + mode_lib->vba.PixelClock[k] 1557 + * dml_max( 1558 + mode_lib->vba.VTAPsChroma[k] 1559 + / 6.0 1560 + * dml_min( 1561 + 1.0, 1562 + mode_lib->vba.HRatio[k] 1563 + / 2), 1564 + dml_max( 1565 + mode_lib->vba.HRatio[k] 1566 + * mode_lib->vba.VRatio[k] 1567 + / 4 1568 + / locals->PSCL_THROUGHPUT_CHROMA[k], 1569 + 1.0)); 1570 + 1571 + if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6) 1572 + && mode_lib->vba.DPPCLKUsingSingleDPPChroma 1573 + < 2 * mode_lib->vba.PixelClock[k]) { 1574 + mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2 1575 + * mode_lib->vba.PixelClock[k]; 1576 + } 1577 + 1578 + locals->DPPCLKUsingSingleDPP[k] = dml_max( 1579 + mode_lib->vba.DPPCLKUsingSingleDPPLuma, 1580 + mode_lib->vba.DPPCLKUsingSingleDPPChroma); 1581 + } 1582 + } 1583 + 1584 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 1585 + if (mode_lib->vba.BlendingAndTiming[k] != k) 1586 + continue; 1587 + if (mode_lib->vba.ODMCombineEnabled[k]) { 1588 + mode_lib->vba.DISPCLKWithRamping = 1589 + dml_max( 1590 + mode_lib->vba.DISPCLKWithRamping, 1591 + mode_lib->vba.PixelClock[k] / 2 1592 + * (1 1593 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading 1594 + / 100) 1595 + * (1 1596 + + mode_lib->vba.DISPCLKRampingMargin 1597 + / 100)); 1598 + mode_lib->vba.DISPCLKWithoutRamping = 1599 + dml_max( 1600 + mode_lib->vba.DISPCLKWithoutRamping, 1601 + mode_lib->vba.PixelClock[k] / 2 1602 + * (1 1603 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading 1604 + / 100)); 1605 + } else if (!mode_lib->vba.ODMCombineEnabled[k]) { 1606 + mode_lib->vba.DISPCLKWithRamping = 1607 + dml_max( 1608 + mode_lib->vba.DISPCLKWithRamping, 1609 + mode_lib->vba.PixelClock[k] 1610 + * (1 1611 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading 1612 + / 100) 1613 + * (1 1614 + + mode_lib->vba.DISPCLKRampingMargin 1615 + / 100)); 1616 + mode_lib->vba.DISPCLKWithoutRamping = 1617 + dml_max( 1618 + mode_lib->vba.DISPCLKWithoutRamping, 1619 + mode_lib->vba.PixelClock[k] 1620 + * (1 1621 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading 1622 + / 100)); 1623 + } 1624 + } 1625 + 1626 + mode_lib->vba.DISPCLKWithRamping = dml_max( 1627 + mode_lib->vba.DISPCLKWithRamping, 1628 + mode_lib->vba.WritebackDISPCLK); 1629 + mode_lib->vba.DISPCLKWithoutRamping = dml_max( 1630 + mode_lib->vba.DISPCLKWithoutRamping, 1631 + mode_lib->vba.WritebackDISPCLK); 1632 + 1633 + ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0); 1634 + mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( 1635 + mode_lib->vba.DISPCLKWithRamping, 1636 + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); 1637 + mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( 1638 + mode_lib->vba.DISPCLKWithoutRamping, 1639 + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); 1640 + mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( 1641 + mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states - 1].dispclk_mhz, 1642 + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); 1643 + if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity 1644 + > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { 1645 + mode_lib->vba.DISPCLK_calculated = 1646 + mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity; 1647 + } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity 1648 + > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { 1649 + mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity; 1650 + } else { 1651 + mode_lib->vba.DISPCLK_calculated = 1652 + mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity; 1653 + } 1654 + DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated); 1655 + 1656 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 1657 + mode_lib->vba.DPPCLK_calculated[k] = locals->DPPCLKUsingSingleDPP[k] 1658 + / mode_lib->vba.DPPPerPlane[k] 1659 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100); 1660 + mode_lib->vba.GlobalDPPCLK = dml_max( 1661 + mode_lib->vba.GlobalDPPCLK, 1662 + mode_lib->vba.DPPCLK_calculated[k]); 1663 + } 1664 + mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp( 1665 + mode_lib->vba.GlobalDPPCLK, 1666 + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); 1667 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 1668 + mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255 1669 + * dml_ceil( 1670 + mode_lib->vba.DPPCLK_calculated[k] * 255 1671 + / mode_lib->vba.GlobalDPPCLK, 1672 + 1); 1673 + DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]); 1674 + } 1675 + 1676 + // Urgent and B P-State/DRAM Clock Change Watermark 1677 + DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK); 1678 + DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN); 1679 + DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW); 1680 + 1681 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 1682 + bool MainPlaneDoesODMCombine = false; 1683 + 1684 + if (mode_lib->vba.SourceScan[k] == dm_horz) 1685 + locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k]; 1686 + else 1687 + locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; 1688 + 1689 + if (mode_lib->vba.ODMCombineEnabled[k] == true) 1690 + MainPlaneDoesODMCombine = true; 1691 + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) 1692 + if (mode_lib->vba.BlendingAndTiming[k] == j 1693 + && mode_lib->vba.ODMCombineEnabled[j] == true) 1694 + MainPlaneDoesODMCombine = true; 1695 + 1696 + if (MainPlaneDoesODMCombine == true) 1697 + locals->SwathWidthY[k] = dml_min( 1698 + (double) locals->SwathWidthSingleDPPY[k], 1699 + dml_round( 1700 + mode_lib->vba.HActive[k] / 2.0 1701 + * mode_lib->vba.HRatio[k])); 1702 + else 1703 + locals->SwathWidthY[k] = locals->SwathWidthSingleDPPY[k] 1704 + / mode_lib->vba.DPPPerPlane[k]; 1705 + } 1706 + 1707 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 1708 + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { 1709 + locals->BytePerPixelDETY[k] = 8; 1710 + locals->BytePerPixelDETC[k] = 0; 1711 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { 1712 + locals->BytePerPixelDETY[k] = 4; 1713 + locals->BytePerPixelDETC[k] = 0; 1714 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) { 1715 + locals->BytePerPixelDETY[k] = 2; 1716 + locals->BytePerPixelDETC[k] = 0; 1717 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) { 1718 + locals->BytePerPixelDETY[k] = 1; 1719 + locals->BytePerPixelDETC[k] = 0; 1720 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { 1721 + locals->BytePerPixelDETY[k] = 1; 1722 + locals->BytePerPixelDETC[k] = 2; 1723 + } else { // dm_420_10 1724 + locals->BytePerPixelDETY[k] = 4.0 / 3.0; 1725 + locals->BytePerPixelDETC[k] = 8.0 / 3.0; 1726 + } 1727 + } 1728 + 1729 + mode_lib->vba.TotalDataReadBandwidth = 0.0; 1730 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 1731 + locals->ReadBandwidthPlaneLuma[k] = locals->SwathWidthSingleDPPY[k] 1732 + * dml_ceil(locals->BytePerPixelDETY[k], 1) 1733 + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) 1734 + * mode_lib->vba.VRatio[k]; 1735 + locals->ReadBandwidthPlaneChroma[k] = locals->SwathWidthSingleDPPY[k] 1736 + / 2 * dml_ceil(locals->BytePerPixelDETC[k], 2) 1737 + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) 1738 + * mode_lib->vba.VRatio[k] / 2; 1739 + DTRACE( 1740 + " read_bw[%i] = %fBps", 1741 + k, 1742 + locals->ReadBandwidthPlaneLuma[k] 1743 + + locals->ReadBandwidthPlaneChroma[k]); 1744 + mode_lib->vba.TotalDataReadBandwidth += locals->ReadBandwidthPlaneLuma[k] 1745 + + locals->ReadBandwidthPlaneChroma[k]; 1746 + } 1747 + 1748 + // DCFCLK Deep Sleep 1749 + CalculateDCFCLKDeepSleep( 1750 + mode_lib, 1751 + mode_lib->vba.NumberOfActivePlanes, 1752 + locals->BytePerPixelDETY, 1753 + locals->BytePerPixelDETC, 1754 + mode_lib->vba.VRatio, 1755 + locals->SwathWidthY, 1756 + mode_lib->vba.DPPPerPlane, 1757 + mode_lib->vba.HRatio, 1758 + mode_lib->vba.PixelClock, 1759 + locals->PSCL_THROUGHPUT_LUMA, 1760 + locals->PSCL_THROUGHPUT_CHROMA, 1761 + locals->DPPCLK, 1762 + &mode_lib->vba.DCFCLKDeepSleep); 1763 + 1764 + // DSCCLK 1765 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 1766 + if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) { 1767 + locals->DSCCLK_calculated[k] = 0.0; 1768 + } else { 1769 + if (mode_lib->vba.OutputFormat[k] == dm_420 1770 + || mode_lib->vba.OutputFormat[k] == dm_n422) 1771 + mode_lib->vba.DSCFormatFactor = 2; 1772 + else 1773 + mode_lib->vba.DSCFormatFactor = 1; 1774 + if (mode_lib->vba.ODMCombineEnabled[k]) 1775 + locals->DSCCLK_calculated[k] = 1776 + mode_lib->vba.PixelClockBackEnd[k] / 6 1777 + / mode_lib->vba.DSCFormatFactor 1778 + / (1 1779 + - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading 1780 + / 100); 1781 + else 1782 + locals->DSCCLK_calculated[k] = 1783 + mode_lib->vba.PixelClockBackEnd[k] / 3 1784 + / mode_lib->vba.DSCFormatFactor 1785 + / (1 1786 + - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading 1787 + / 100); 1788 + } 1789 + } 1790 + 1791 + // DSC Delay 1792 + // TODO 1793 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 1794 + double bpp = mode_lib->vba.OutputBpp[k]; 1795 + unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k]; 1796 + 1797 + if (mode_lib->vba.DSCEnabled[k] && bpp != 0) { 1798 + if (!mode_lib->vba.ODMCombineEnabled[k]) { 1799 + locals->DSCDelay[k] = 1800 + dscceComputeDelay( 1801 + mode_lib->vba.DSCInputBitPerComponent[k], 1802 + bpp, 1803 + dml_ceil( 1804 + (double) mode_lib->vba.HActive[k] 1805 + / mode_lib->vba.NumberOfDSCSlices[k], 1806 + 1), 1807 + slices, 1808 + mode_lib->vba.OutputFormat[k]) 1809 + + dscComputeDelay( 1810 + mode_lib->vba.OutputFormat[k]); 1811 + } else { 1812 + locals->DSCDelay[k] = 1813 + 2 1814 + * (dscceComputeDelay( 1815 + mode_lib->vba.DSCInputBitPerComponent[k], 1816 + bpp, 1817 + dml_ceil( 1818 + (double) mode_lib->vba.HActive[k] 1819 + / mode_lib->vba.NumberOfDSCSlices[k], 1820 + 1), 1821 + slices / 2.0, 1822 + mode_lib->vba.OutputFormat[k]) 1823 + + dscComputeDelay( 1824 + mode_lib->vba.OutputFormat[k])); 1825 + } 1826 + locals->DSCDelay[k] = locals->DSCDelay[k] 1827 + * mode_lib->vba.PixelClock[k] 1828 + / mode_lib->vba.PixelClockBackEnd[k]; 1829 + } else { 1830 + locals->DSCDelay[k] = 0; 1831 + } 1832 + } 1833 + 1834 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) 1835 + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes 1836 + if (j != k && mode_lib->vba.BlendingAndTiming[k] == j 1837 + && mode_lib->vba.DSCEnabled[j]) 1838 + locals->DSCDelay[k] = locals->DSCDelay[j]; 1839 + 1840 + // Prefetch 1841 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 1842 + unsigned int PDEAndMetaPTEBytesFrameY; 1843 + unsigned int PixelPTEBytesPerRowY; 1844 + unsigned int MetaRowByteY; 1845 + unsigned int MetaRowByteC; 1846 + unsigned int PDEAndMetaPTEBytesFrameC; 1847 + unsigned int PixelPTEBytesPerRowC; 1848 + bool PTEBufferSizeNotExceededY; 1849 + bool PTEBufferSizeNotExceededC; 1850 + 1851 + Calculate256BBlockSizes( 1852 + mode_lib->vba.SourcePixelFormat[k], 1853 + mode_lib->vba.SurfaceTiling[k], 1854 + dml_ceil(locals->BytePerPixelDETY[k], 1), 1855 + dml_ceil(locals->BytePerPixelDETC[k], 2), 1856 + &locals->BlockHeight256BytesY[k], 1857 + &locals->BlockHeight256BytesC[k], 1858 + &locals->BlockWidth256BytesY[k], 1859 + &locals->BlockWidth256BytesC[k]); 1860 + 1861 + locals->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( 1862 + mode_lib, 1863 + mode_lib->vba.VRatio[k], 1864 + mode_lib->vba.vtaps[k], 1865 + mode_lib->vba.Interlace[k], 1866 + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, 1867 + mode_lib->vba.SwathHeightY[k], 1868 + mode_lib->vba.ViewportYStartY[k], 1869 + &locals->VInitPreFillY[k], 1870 + &locals->MaxNumSwathY[k]); 1871 + 1872 + if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 1873 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 1874 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 1875 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) { 1876 + PDEAndMetaPTEBytesFrameC = 1877 + CalculateVMAndRowBytes( 1878 + mode_lib, 1879 + mode_lib->vba.DCCEnable[k], 1880 + locals->BlockHeight256BytesC[k], 1881 + locals->BlockWidth256BytesC[k], 1882 + mode_lib->vba.SourcePixelFormat[k], 1883 + mode_lib->vba.SurfaceTiling[k], 1884 + dml_ceil( 1885 + locals->BytePerPixelDETC[k], 1886 + 2), 1887 + mode_lib->vba.SourceScan[k], 1888 + mode_lib->vba.ViewportWidth[k] / 2, 1889 + mode_lib->vba.ViewportHeight[k] / 2, 1890 + locals->SwathWidthY[k] / 2, 1891 + mode_lib->vba.GPUVMEnable, 1892 + mode_lib->vba.HostVMEnable, 1893 + mode_lib->vba.HostVMMaxPageTableLevels, 1894 + mode_lib->vba.HostVMCachedPageTableLevels, 1895 + mode_lib->vba.VMMPageSize, 1896 + mode_lib->vba.PTEBufferSizeInRequestsChroma, 1897 + mode_lib->vba.PitchC[k], 1898 + mode_lib->vba.DCCMetaPitchC[k], 1899 + &locals->MacroTileWidthC[k], 1900 + &MetaRowByteC, 1901 + &PixelPTEBytesPerRowC, 1902 + &PTEBufferSizeNotExceededC, 1903 + &locals->dpte_row_width_chroma_ub[k], 1904 + &locals->dpte_row_height_chroma[k], 1905 + &locals->meta_req_width_chroma[k], 1906 + &locals->meta_req_height_chroma[k], 1907 + &locals->meta_row_width_chroma[k], 1908 + &locals->meta_row_height_chroma[k], 1909 + &locals->vm_group_bytes_chroma, 1910 + &locals->dpte_group_bytes_chroma, 1911 + &locals->PixelPTEReqWidthC[k], 1912 + &locals->PixelPTEReqHeightC[k], 1913 + &locals->PTERequestSizeC[k], 1914 + &locals->dpde0_bytes_per_frame_ub_c[k], 1915 + &locals->meta_pte_bytes_per_frame_ub_c[k]); 1916 + 1917 + locals->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( 1918 + mode_lib, 1919 + mode_lib->vba.VRatio[k] / 2, 1920 + mode_lib->vba.VTAPsChroma[k], 1921 + mode_lib->vba.Interlace[k], 1922 + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, 1923 + mode_lib->vba.SwathHeightC[k], 1924 + mode_lib->vba.ViewportYStartC[k], 1925 + &locals->VInitPreFillC[k], 1926 + &locals->MaxNumSwathC[k]); 1927 + } else { 1928 + PixelPTEBytesPerRowC = 0; 1929 + PDEAndMetaPTEBytesFrameC = 0; 1930 + MetaRowByteC = 0; 1931 + locals->MaxNumSwathC[k] = 0; 1932 + locals->PrefetchSourceLinesC[k] = 0; 1933 + locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; 1934 + } 1935 + 1936 + PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( 1937 + mode_lib, 1938 + mode_lib->vba.DCCEnable[k], 1939 + locals->BlockHeight256BytesY[k], 1940 + locals->BlockWidth256BytesY[k], 1941 + mode_lib->vba.SourcePixelFormat[k], 1942 + mode_lib->vba.SurfaceTiling[k], 1943 + dml_ceil(locals->BytePerPixelDETY[k], 1), 1944 + mode_lib->vba.SourceScan[k], 1945 + mode_lib->vba.ViewportWidth[k], 1946 + mode_lib->vba.ViewportHeight[k], 1947 + locals->SwathWidthY[k], 1948 + mode_lib->vba.GPUVMEnable, 1949 + mode_lib->vba.HostVMEnable, 1950 + mode_lib->vba.HostVMMaxPageTableLevels, 1951 + mode_lib->vba.HostVMCachedPageTableLevels, 1952 + mode_lib->vba.VMMPageSize, 1953 + locals->PTEBufferSizeInRequestsForLuma, 1954 + mode_lib->vba.PitchY[k], 1955 + mode_lib->vba.DCCMetaPitchY[k], 1956 + &locals->MacroTileWidthY[k], 1957 + &MetaRowByteY, 1958 + &PixelPTEBytesPerRowY, 1959 + &PTEBufferSizeNotExceededY, 1960 + &locals->dpte_row_width_luma_ub[k], 1961 + &locals->dpte_row_height[k], 1962 + &locals->meta_req_width[k], 1963 + &locals->meta_req_height[k], 1964 + &locals->meta_row_width[k], 1965 + &locals->meta_row_height[k], 1966 + &locals->vm_group_bytes[k], 1967 + &locals->dpte_group_bytes[k], 1968 + &locals->PixelPTEReqWidthY[k], 1969 + &locals->PixelPTEReqHeightY[k], 1970 + &locals->PTERequestSizeY[k], 1971 + &locals->dpde0_bytes_per_frame_ub_l[k], 1972 + &locals->meta_pte_bytes_per_frame_ub_l[k]); 1973 + 1974 + locals->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; 1975 + locals->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY 1976 + + PDEAndMetaPTEBytesFrameC; 1977 + locals->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; 1978 + 1979 + CalculateActiveRowBandwidth( 1980 + mode_lib->vba.GPUVMEnable, 1981 + mode_lib->vba.SourcePixelFormat[k], 1982 + mode_lib->vba.VRatio[k], 1983 + mode_lib->vba.DCCEnable[k], 1984 + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], 1985 + MetaRowByteY, 1986 + MetaRowByteC, 1987 + locals->meta_row_height[k], 1988 + locals->meta_row_height_chroma[k], 1989 + PixelPTEBytesPerRowY, 1990 + PixelPTEBytesPerRowC, 1991 + locals->dpte_row_height[k], 1992 + locals->dpte_row_height_chroma[k], 1993 + &locals->meta_row_bw[k], 1994 + &locals->dpte_row_bw[k]); 1995 + } 1996 + 1997 + mode_lib->vba.TotalDCCActiveDPP = 0; 1998 + mode_lib->vba.TotalActiveDPP = 0; 1999 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2000 + mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP 2001 + + mode_lib->vba.DPPPerPlane[k]; 2002 + if (mode_lib->vba.DCCEnable[k]) 2003 + mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP 2004 + + mode_lib->vba.DPPPerPlane[k]; 2005 + } 2006 + 2007 + mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3( 2008 + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, 2009 + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 2010 + mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly); 2011 + 2012 + mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency = 2013 + (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK 2014 + + mode_lib->vba.UrgentOutOfOrderReturnPerChannel 2015 + * mode_lib->vba.NumberOfChannels 2016 + / mode_lib->vba.ReturnBW; 2017 + 2018 + mode_lib->vba.UrgentExtraLatency = CalculateExtraLatency( 2019 + mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency, 2020 + mode_lib->vba.TotalActiveDPP, 2021 + mode_lib->vba.PixelChunkSizeInKByte, 2022 + mode_lib->vba.TotalDCCActiveDPP, 2023 + mode_lib->vba.MetaChunkSize, 2024 + mode_lib->vba.ReturnBW, 2025 + mode_lib->vba.GPUVMEnable, 2026 + mode_lib->vba.HostVMEnable, 2027 + mode_lib->vba.NumberOfActivePlanes, 2028 + mode_lib->vba.DPPPerPlane, 2029 + locals->dpte_group_bytes, 2030 + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 2031 + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 2032 + mode_lib->vba.HostVMMaxPageTableLevels, 2033 + mode_lib->vba.HostVMCachedPageTableLevels); 2034 + 2035 + 2036 + mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep; 2037 + 2038 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2039 + if (mode_lib->vba.BlendingAndTiming[k] == k) { 2040 + if (mode_lib->vba.WritebackEnable[k] == true) { 2041 + locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 2042 + mode_lib->vba.WritebackLatency 2043 + + CalculateWriteBackDelay( 2044 + mode_lib->vba.WritebackPixelFormat[k], 2045 + mode_lib->vba.WritebackHRatio[k], 2046 + mode_lib->vba.WritebackVRatio[k], 2047 + mode_lib->vba.WritebackLumaHTaps[k], 2048 + mode_lib->vba.WritebackLumaVTaps[k], 2049 + mode_lib->vba.WritebackChromaHTaps[k], 2050 + mode_lib->vba.WritebackChromaVTaps[k], 2051 + mode_lib->vba.WritebackDestinationWidth[k]) 2052 + / mode_lib->vba.DISPCLK; 2053 + } else 2054 + locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0; 2055 + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { 2056 + if (mode_lib->vba.BlendingAndTiming[j] == k 2057 + && mode_lib->vba.WritebackEnable[j] == true) { 2058 + locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 2059 + dml_max( 2060 + locals->WritebackDelay[mode_lib->vba.VoltageLevel][k], 2061 + mode_lib->vba.WritebackLatency 2062 + + CalculateWriteBackDelay( 2063 + mode_lib->vba.WritebackPixelFormat[j], 2064 + mode_lib->vba.WritebackHRatio[j], 2065 + mode_lib->vba.WritebackVRatio[j], 2066 + mode_lib->vba.WritebackLumaHTaps[j], 2067 + mode_lib->vba.WritebackLumaVTaps[j], 2068 + mode_lib->vba.WritebackChromaHTaps[j], 2069 + mode_lib->vba.WritebackChromaVTaps[j], 2070 + mode_lib->vba.WritebackDestinationWidth[j]) 2071 + / mode_lib->vba.DISPCLK); 2072 + } 2073 + } 2074 + } 2075 + } 2076 + 2077 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) 2078 + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) 2079 + if (mode_lib->vba.BlendingAndTiming[k] == j) 2080 + locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 2081 + locals->WritebackDelay[mode_lib->vba.VoltageLevel][j]; 2082 + 2083 + mode_lib->vba.VStartupLines = 13; 2084 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2085 + locals->MaxVStartupLines[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1)); 2086 + } 2087 + 2088 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) 2089 + locals->MaximumMaxVStartupLines = dml_max(locals->MaximumMaxVStartupLines, locals->MaxVStartupLines[k]); 2090 + 2091 + // We don't really care to iterate between the various prefetch modes 2092 + //mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode(mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &mode_lib->vba.MinPrefetchMode, &mode_lib->vba.MaxPrefetchMode); 2093 + mode_lib->vba.UrgentLatency = dml_max3(mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.UrgentLatencyPixelMixedWithVMData, mode_lib->vba.UrgentLatencyVMDataOnly); 2094 + 2095 + do { 2096 + double MaxTotalRDBandwidth = 0; 2097 + double MaxTotalRDBandwidthNoUrgentBurst = 0; 2098 + bool DestinationLineTimesForPrefetchLessThan2 = false; 2099 + bool VRatioPrefetchMoreThan4 = false; 2100 + double TWait = CalculateTWait( 2101 + mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], 2102 + mode_lib->vba.DRAMClockChangeLatency, 2103 + mode_lib->vba.UrgentLatency, 2104 + mode_lib->vba.SREnterPlusExitTime); 2105 + 2106 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2107 + Pipe myPipe; 2108 + HostVM myHostVM; 2109 + 2110 + if (mode_lib->vba.XFCEnabled[k] == true) { 2111 + mode_lib->vba.XFCRemoteSurfaceFlipDelay = 2112 + CalculateRemoteSurfaceFlipDelay( 2113 + mode_lib, 2114 + mode_lib->vba.VRatio[k], 2115 + locals->SwathWidthY[k], 2116 + dml_ceil( 2117 + locals->BytePerPixelDETY[k], 2118 + 1), 2119 + mode_lib->vba.HTotal[k] 2120 + / mode_lib->vba.PixelClock[k], 2121 + mode_lib->vba.XFCTSlvVupdateOffset, 2122 + mode_lib->vba.XFCTSlvVupdateWidth, 2123 + mode_lib->vba.XFCTSlvVreadyOffset, 2124 + mode_lib->vba.XFCXBUFLatencyTolerance, 2125 + mode_lib->vba.XFCFillBWOverhead, 2126 + mode_lib->vba.XFCSlvChunkSize, 2127 + mode_lib->vba.XFCBusTransportTime, 2128 + mode_lib->vba.TCalc, 2129 + TWait, 2130 + &mode_lib->vba.SrcActiveDrainRate, 2131 + &mode_lib->vba.TInitXFill, 2132 + &mode_lib->vba.TslvChk); 2133 + } else { 2134 + mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0; 2135 + } 2136 + 2137 + myPipe.DPPCLK = locals->DPPCLK[k]; 2138 + myPipe.DISPCLK = mode_lib->vba.DISPCLK; 2139 + myPipe.PixelClock = mode_lib->vba.PixelClock[k]; 2140 + myPipe.DCFCLKDeepSleep = mode_lib->vba.DCFCLKDeepSleep; 2141 + myPipe.DPPPerPlane = mode_lib->vba.DPPPerPlane[k]; 2142 + myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; 2143 + myPipe.SourceScan = mode_lib->vba.SourceScan[k]; 2144 + myPipe.BlockWidth256BytesY = locals->BlockWidth256BytesY[k]; 2145 + myPipe.BlockHeight256BytesY = locals->BlockHeight256BytesY[k]; 2146 + myPipe.BlockWidth256BytesC = locals->BlockWidth256BytesC[k]; 2147 + myPipe.BlockHeight256BytesC = locals->BlockHeight256BytesC[k]; 2148 + myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; 2149 + myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; 2150 + myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; 2151 + myPipe.HTotal = mode_lib->vba.HTotal[k]; 2152 + 2153 + 2154 + myHostVM.Enable = mode_lib->vba.HostVMEnable; 2155 + myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels; 2156 + myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels; 2157 + 2158 + mode_lib->vba.ErrorResult[k] = 2159 + CalculatePrefetchSchedule( 2160 + mode_lib, 2161 + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 2162 + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 2163 + &myPipe, 2164 + locals->DSCDelay[k], 2165 + mode_lib->vba.DPPCLKDelaySubtotal, 2166 + mode_lib->vba.DPPCLKDelaySCL, 2167 + mode_lib->vba.DPPCLKDelaySCLLBOnly, 2168 + mode_lib->vba.DPPCLKDelayCNVCFormater, 2169 + mode_lib->vba.DPPCLKDelayCNVCCursor, 2170 + mode_lib->vba.DISPCLKDelaySubtotal, 2171 + (unsigned int) (locals->SwathWidthY[k] 2172 + / mode_lib->vba.HRatio[k]), 2173 + mode_lib->vba.OutputFormat[k], 2174 + mode_lib->vba.MaxInterDCNTileRepeaters, 2175 + dml_min(mode_lib->vba.VStartupLines, locals->MaxVStartupLines[k]), 2176 + locals->MaxVStartupLines[k], 2177 + mode_lib->vba.GPUVMMaxPageTableLevels, 2178 + mode_lib->vba.GPUVMEnable, 2179 + &myHostVM, 2180 + mode_lib->vba.DynamicMetadataEnable[k], 2181 + mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], 2182 + mode_lib->vba.DynamicMetadataTransmittedBytes[k], 2183 + mode_lib->vba.DCCEnable[k], 2184 + mode_lib->vba.UrgentLatency, 2185 + mode_lib->vba.UrgentExtraLatency, 2186 + mode_lib->vba.TCalc, 2187 + locals->PDEAndMetaPTEBytesFrame[k], 2188 + locals->MetaRowByte[k], 2189 + locals->PixelPTEBytesPerRow[k], 2190 + locals->PrefetchSourceLinesY[k], 2191 + locals->SwathWidthY[k], 2192 + locals->BytePerPixelDETY[k], 2193 + locals->VInitPreFillY[k], 2194 + locals->MaxNumSwathY[k], 2195 + locals->PrefetchSourceLinesC[k], 2196 + locals->BytePerPixelDETC[k], 2197 + locals->VInitPreFillC[k], 2198 + locals->MaxNumSwathC[k], 2199 + mode_lib->vba.SwathHeightY[k], 2200 + mode_lib->vba.SwathHeightC[k], 2201 + TWait, 2202 + mode_lib->vba.XFCEnabled[k], 2203 + mode_lib->vba.XFCRemoteSurfaceFlipDelay, 2204 + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, 2205 + &locals->DSTXAfterScaler[k], 2206 + &locals->DSTYAfterScaler[k], 2207 + &locals->DestinationLinesForPrefetch[k], 2208 + &locals->PrefetchBandwidth[k], 2209 + &locals->DestinationLinesToRequestVMInVBlank[k], 2210 + &locals->DestinationLinesToRequestRowInVBlank[k], 2211 + &locals->VRatioPrefetchY[k], 2212 + &locals->VRatioPrefetchC[k], 2213 + &locals->RequiredPrefetchPixDataBWLuma[k], 2214 + &locals->RequiredPrefetchPixDataBWChroma[k], 2215 + &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, 2216 + &locals->Tno_bw[k], 2217 + &locals->prefetch_vmrow_bw[k], 2218 + &locals->swath_width_luma_ub[k], 2219 + &locals->swath_width_chroma_ub[k], 2220 + &mode_lib->vba.VUpdateOffsetPix[k], 2221 + &mode_lib->vba.VUpdateWidthPix[k], 2222 + &mode_lib->vba.VReadyOffsetPix[k]); 2223 + if (mode_lib->vba.BlendingAndTiming[k] == k) { 2224 + locals->VStartup[k] = dml_min( 2225 + mode_lib->vba.VStartupLines, 2226 + locals->MaxVStartupLines[k]); 2227 + if (locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata 2228 + != 0) { 2229 + locals->VStartup[k] = 2230 + locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata; 2231 + } 2232 + } else { 2233 + locals->VStartup[k] = 2234 + dml_min( 2235 + mode_lib->vba.VStartupLines, 2236 + locals->MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]); 2237 + } 2238 + } 2239 + 2240 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2241 + unsigned int m; 2242 + 2243 + locals->cursor_bw[k] = 0; 2244 + locals->cursor_bw_pre[k] = 0; 2245 + for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) { 2246 + locals->cursor_bw[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; 2247 + locals->cursor_bw_pre[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPrefetchY[k]; 2248 + } 2249 + 2250 + CalculateUrgentBurstFactor( 2251 + mode_lib->vba.DETBufferSizeInKByte, 2252 + mode_lib->vba.SwathHeightY[k], 2253 + mode_lib->vba.SwathHeightC[k], 2254 + locals->SwathWidthY[k], 2255 + mode_lib->vba.HTotal[k] / 2256 + mode_lib->vba.PixelClock[k], 2257 + mode_lib->vba.UrgentLatency, 2258 + mode_lib->vba.CursorBufferSize, 2259 + mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1], 2260 + dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]), 2261 + mode_lib->vba.VRatio[k], 2262 + locals->VRatioPrefetchY[k], 2263 + locals->VRatioPrefetchC[k], 2264 + locals->BytePerPixelDETY[k], 2265 + locals->BytePerPixelDETC[k], 2266 + &locals->UrgentBurstFactorCursor[k], 2267 + &locals->UrgentBurstFactorCursorPre[k], 2268 + &locals->UrgentBurstFactorLuma[k], 2269 + &locals->UrgentBurstFactorLumaPre[k], 2270 + &locals->UrgentBurstFactorChroma[k], 2271 + &locals->UrgentBurstFactorChromaPre[k], 2272 + &locals->NotEnoughUrgentLatencyHiding, 2273 + &locals->NotEnoughUrgentLatencyHidingPre); 2274 + 2275 + if (mode_lib->vba.UseUrgentBurstBandwidth == false) { 2276 + locals->UrgentBurstFactorLuma[k] = 1; 2277 + locals->UrgentBurstFactorChroma[k] = 1; 2278 + locals->UrgentBurstFactorCursor[k] = 1; 2279 + locals->UrgentBurstFactorLumaPre[k] = 1; 2280 + locals->UrgentBurstFactorChromaPre[k] = 1; 2281 + locals->UrgentBurstFactorCursorPre[k] = 1; 2282 + } 2283 + 2284 + MaxTotalRDBandwidth = MaxTotalRDBandwidth + 2285 + dml_max3(locals->prefetch_vmrow_bw[k], 2286 + locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k] 2287 + + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] 2288 + * locals->UrgentBurstFactorCursor[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k], 2289 + locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixDataBWChroma[k] 2290 + * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); 2291 + 2292 + MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst + 2293 + dml_max3(locals->prefetch_vmrow_bw[k], 2294 + locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k] 2295 + + locals->meta_row_bw[k] + locals->dpte_row_bw[k], 2296 + locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]); 2297 + 2298 + if (locals->DestinationLinesForPrefetch[k] < 2) 2299 + DestinationLineTimesForPrefetchLessThan2 = true; 2300 + if (locals->VRatioPrefetchY[k] > 4 || locals->VRatioPrefetchC[k] > 4) 2301 + VRatioPrefetchMoreThan4 = true; 2302 + } 2303 + mode_lib->vba.FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / mode_lib->vba.ReturnBW; 2304 + 2305 + if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && locals->NotEnoughUrgentLatencyHiding == 0 && locals->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4 2306 + && !DestinationLineTimesForPrefetchLessThan2) 2307 + mode_lib->vba.PrefetchModeSupported = true; 2308 + else { 2309 + mode_lib->vba.PrefetchModeSupported = false; 2310 + dml_print( 2311 + "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n"); 2312 + } 2313 + 2314 + if (mode_lib->vba.PrefetchModeSupported == true) { 2315 + mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW; 2316 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2317 + mode_lib->vba.BandwidthAvailableForImmediateFlip = 2318 + mode_lib->vba.BandwidthAvailableForImmediateFlip 2319 + - dml_max( 2320 + locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k] 2321 + + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k] 2322 + + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k], 2323 + locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] + 2324 + locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k] + 2325 + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); 2326 + } 2327 + 2328 + mode_lib->vba.TotImmediateFlipBytes = 0; 2329 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2330 + mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes + locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k] + locals->PixelPTEBytesPerRow[k]; 2331 + } 2332 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2333 + CalculateFlipSchedule( 2334 + mode_lib, 2335 + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 2336 + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 2337 + mode_lib->vba.UrgentExtraLatency, 2338 + mode_lib->vba.UrgentLatency, 2339 + mode_lib->vba.GPUVMMaxPageTableLevels, 2340 + mode_lib->vba.HostVMEnable, 2341 + mode_lib->vba.HostVMMaxPageTableLevels, 2342 + mode_lib->vba.HostVMCachedPageTableLevels, 2343 + mode_lib->vba.GPUVMEnable, 2344 + locals->PDEAndMetaPTEBytesFrame[k], 2345 + locals->MetaRowByte[k], 2346 + locals->PixelPTEBytesPerRow[k], 2347 + mode_lib->vba.BandwidthAvailableForImmediateFlip, 2348 + mode_lib->vba.TotImmediateFlipBytes, 2349 + mode_lib->vba.SourcePixelFormat[k], 2350 + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], 2351 + mode_lib->vba.VRatio[k], 2352 + locals->Tno_bw[k], 2353 + mode_lib->vba.DCCEnable[k], 2354 + locals->dpte_row_height[k], 2355 + locals->meta_row_height[k], 2356 + locals->dpte_row_height_chroma[k], 2357 + locals->meta_row_height_chroma[k], 2358 + &locals->DestinationLinesToRequestVMInImmediateFlip[k], 2359 + &locals->DestinationLinesToRequestRowInImmediateFlip[k], 2360 + &locals->final_flip_bw[k], 2361 + &locals->ImmediateFlipSupportedForPipe[k]); 2362 + } 2363 + mode_lib->vba.total_dcn_read_bw_with_flip = 0.0; 2364 + mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; 2365 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2366 + mode_lib->vba.total_dcn_read_bw_with_flip = 2367 + mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3( 2368 + locals->prefetch_vmrow_bw[k], 2369 + locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] 2370 + + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k], 2371 + locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] 2372 + + locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k] 2373 + + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); 2374 + mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst = 2375 + mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst + 2376 + dml_max3(locals->prefetch_vmrow_bw[k], 2377 + locals->final_flip_bw[k] + locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k], 2378 + locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]); 2379 + 2380 + } 2381 + mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip = mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst / mode_lib->vba.ReturnBW; 2382 + 2383 + mode_lib->vba.ImmediateFlipSupported = true; 2384 + if (mode_lib->vba.total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) { 2385 + mode_lib->vba.ImmediateFlipSupported = false; 2386 + } 2387 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2388 + if (locals->ImmediateFlipSupportedForPipe[k] == false) { 2389 + mode_lib->vba.ImmediateFlipSupported = false; 2390 + } 2391 + } 2392 + } else { 2393 + mode_lib->vba.ImmediateFlipSupported = false; 2394 + } 2395 + 2396 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2397 + if (mode_lib->vba.ErrorResult[k]) { 2398 + mode_lib->vba.PrefetchModeSupported = false; 2399 + dml_print( 2400 + "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n"); 2401 + } 2402 + } 2403 + 2404 + mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1; 2405 + } while (!((mode_lib->vba.PrefetchModeSupported 2406 + && ((!mode_lib->vba.ImmediateFlipSupport && !mode_lib->vba.HostVMEnable) 2407 + || mode_lib->vba.ImmediateFlipSupported)) 2408 + || locals->MaximumMaxVStartupLines < mode_lib->vba.VStartupLines)); 2409 + 2410 + //Watermarks and NB P-State/DRAM Clock Change Support 2411 + { 2412 + enum clock_change_support DRAMClockChangeSupport; // dummy 2413 + CalculateWatermarksAndDRAMSpeedChangeSupport( 2414 + mode_lib, 2415 + mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], 2416 + mode_lib->vba.NumberOfActivePlanes, 2417 + mode_lib->vba.MaxLineBufferLines, 2418 + mode_lib->vba.LineBufferSize, 2419 + mode_lib->vba.DPPOutputBufferPixels, 2420 + mode_lib->vba.DETBufferSizeInKByte, 2421 + mode_lib->vba.WritebackInterfaceLumaBufferSize, 2422 + mode_lib->vba.WritebackInterfaceChromaBufferSize, 2423 + mode_lib->vba.DCFCLK, 2424 + mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels, 2425 + mode_lib->vba.ReturnBW, 2426 + mode_lib->vba.GPUVMEnable, 2427 + locals->dpte_group_bytes, 2428 + mode_lib->vba.MetaChunkSize, 2429 + mode_lib->vba.UrgentLatency, 2430 + mode_lib->vba.UrgentExtraLatency, 2431 + mode_lib->vba.WritebackLatency, 2432 + mode_lib->vba.WritebackChunkSize, 2433 + mode_lib->vba.SOCCLK, 2434 + mode_lib->vba.DRAMClockChangeLatency, 2435 + mode_lib->vba.SRExitTime, 2436 + mode_lib->vba.SREnterPlusExitTime, 2437 + mode_lib->vba.DCFCLKDeepSleep, 2438 + mode_lib->vba.DPPPerPlane, 2439 + mode_lib->vba.DCCEnable, 2440 + locals->DPPCLK, 2441 + locals->SwathWidthSingleDPPY, 2442 + mode_lib->vba.SwathHeightY, 2443 + locals->ReadBandwidthPlaneLuma, 2444 + mode_lib->vba.SwathHeightC, 2445 + locals->ReadBandwidthPlaneChroma, 2446 + mode_lib->vba.LBBitPerPixel, 2447 + locals->SwathWidthY, 2448 + mode_lib->vba.HRatio, 2449 + mode_lib->vba.vtaps, 2450 + mode_lib->vba.VTAPsChroma, 2451 + mode_lib->vba.VRatio, 2452 + mode_lib->vba.HTotal, 2453 + mode_lib->vba.PixelClock, 2454 + mode_lib->vba.BlendingAndTiming, 2455 + locals->BytePerPixelDETY, 2456 + locals->BytePerPixelDETC, 2457 + mode_lib->vba.WritebackEnable, 2458 + mode_lib->vba.WritebackPixelFormat, 2459 + mode_lib->vba.WritebackDestinationWidth, 2460 + mode_lib->vba.WritebackDestinationHeight, 2461 + mode_lib->vba.WritebackSourceHeight, 2462 + &DRAMClockChangeSupport, 2463 + &mode_lib->vba.UrgentWatermark, 2464 + &mode_lib->vba.WritebackUrgentWatermark, 2465 + &mode_lib->vba.DRAMClockChangeWatermark, 2466 + &mode_lib->vba.WritebackDRAMClockChangeWatermark, 2467 + &mode_lib->vba.StutterExitWatermark, 2468 + &mode_lib->vba.StutterEnterPlusExitWatermark, 2469 + &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported); 2470 + } 2471 + 2472 + 2473 + //Display Pipeline Delivery Time in Prefetch, Groups 2474 + CalculatePixelDeliveryTimes( 2475 + mode_lib->vba.NumberOfActivePlanes, 2476 + mode_lib->vba.VRatio, 2477 + locals->VRatioPrefetchY, 2478 + locals->VRatioPrefetchC, 2479 + locals->swath_width_luma_ub, 2480 + locals->swath_width_chroma_ub, 2481 + mode_lib->vba.DPPPerPlane, 2482 + mode_lib->vba.HRatio, 2483 + mode_lib->vba.PixelClock, 2484 + locals->PSCL_THROUGHPUT_LUMA, 2485 + locals->PSCL_THROUGHPUT_CHROMA, 2486 + locals->DPPCLK, 2487 + locals->BytePerPixelDETC, 2488 + mode_lib->vba.SourceScan, 2489 + locals->BlockWidth256BytesY, 2490 + locals->BlockHeight256BytesY, 2491 + locals->BlockWidth256BytesC, 2492 + locals->BlockHeight256BytesC, 2493 + locals->DisplayPipeLineDeliveryTimeLuma, 2494 + locals->DisplayPipeLineDeliveryTimeChroma, 2495 + locals->DisplayPipeLineDeliveryTimeLumaPrefetch, 2496 + locals->DisplayPipeLineDeliveryTimeChromaPrefetch, 2497 + locals->DisplayPipeRequestDeliveryTimeLuma, 2498 + locals->DisplayPipeRequestDeliveryTimeChroma, 2499 + locals->DisplayPipeRequestDeliveryTimeLumaPrefetch, 2500 + locals->DisplayPipeRequestDeliveryTimeChromaPrefetch); 2501 + 2502 + CalculateMetaAndPTETimes( 2503 + mode_lib->vba.NumberOfActivePlanes, 2504 + mode_lib->vba.GPUVMEnable, 2505 + mode_lib->vba.MetaChunkSize, 2506 + mode_lib->vba.MinMetaChunkSizeBytes, 2507 + mode_lib->vba.GPUVMMaxPageTableLevels, 2508 + mode_lib->vba.HTotal, 2509 + mode_lib->vba.VRatio, 2510 + locals->VRatioPrefetchY, 2511 + locals->VRatioPrefetchC, 2512 + locals->DestinationLinesToRequestRowInVBlank, 2513 + locals->DestinationLinesToRequestRowInImmediateFlip, 2514 + locals->DestinationLinesToRequestVMInVBlank, 2515 + locals->DestinationLinesToRequestVMInImmediateFlip, 2516 + mode_lib->vba.DCCEnable, 2517 + mode_lib->vba.PixelClock, 2518 + locals->BytePerPixelDETY, 2519 + locals->BytePerPixelDETC, 2520 + mode_lib->vba.SourceScan, 2521 + locals->dpte_row_height, 2522 + locals->dpte_row_height_chroma, 2523 + locals->meta_row_width, 2524 + locals->meta_row_height, 2525 + locals->meta_req_width, 2526 + locals->meta_req_height, 2527 + locals->dpte_group_bytes, 2528 + locals->PTERequestSizeY, 2529 + locals->PTERequestSizeC, 2530 + locals->PixelPTEReqWidthY, 2531 + locals->PixelPTEReqHeightY, 2532 + locals->PixelPTEReqWidthC, 2533 + locals->PixelPTEReqHeightC, 2534 + locals->dpte_row_width_luma_ub, 2535 + locals->dpte_row_width_chroma_ub, 2536 + locals->vm_group_bytes, 2537 + locals->dpde0_bytes_per_frame_ub_l, 2538 + locals->dpde0_bytes_per_frame_ub_c, 2539 + locals->meta_pte_bytes_per_frame_ub_l, 2540 + locals->meta_pte_bytes_per_frame_ub_c, 2541 + locals->DST_Y_PER_PTE_ROW_NOM_L, 2542 + locals->DST_Y_PER_PTE_ROW_NOM_C, 2543 + locals->DST_Y_PER_META_ROW_NOM_L, 2544 + locals->TimePerMetaChunkNominal, 2545 + locals->TimePerMetaChunkVBlank, 2546 + locals->TimePerMetaChunkFlip, 2547 + locals->time_per_pte_group_nom_luma, 2548 + locals->time_per_pte_group_vblank_luma, 2549 + locals->time_per_pte_group_flip_luma, 2550 + locals->time_per_pte_group_nom_chroma, 2551 + locals->time_per_pte_group_vblank_chroma, 2552 + locals->time_per_pte_group_flip_chroma, 2553 + locals->TimePerVMGroupVBlank, 2554 + locals->TimePerVMGroupFlip, 2555 + locals->TimePerVMRequestVBlank, 2556 + locals->TimePerVMRequestFlip); 2557 + 2558 + 2559 + // Min TTUVBlank 2560 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2561 + if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { 2562 + locals->AllowDRAMClockChangeDuringVBlank[k] = true; 2563 + locals->AllowDRAMSelfRefreshDuringVBlank[k] = true; 2564 + locals->MinTTUVBlank[k] = dml_max( 2565 + mode_lib->vba.DRAMClockChangeWatermark, 2566 + dml_max( 2567 + mode_lib->vba.StutterEnterPlusExitWatermark, 2568 + mode_lib->vba.UrgentWatermark)); 2569 + } else if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 1) { 2570 + locals->AllowDRAMClockChangeDuringVBlank[k] = false; 2571 + locals->AllowDRAMSelfRefreshDuringVBlank[k] = true; 2572 + locals->MinTTUVBlank[k] = dml_max( 2573 + mode_lib->vba.StutterEnterPlusExitWatermark, 2574 + mode_lib->vba.UrgentWatermark); 2575 + } else { 2576 + locals->AllowDRAMClockChangeDuringVBlank[k] = false; 2577 + locals->AllowDRAMSelfRefreshDuringVBlank[k] = false; 2578 + locals->MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark; 2579 + } 2580 + if (!mode_lib->vba.DynamicMetadataEnable[k]) 2581 + locals->MinTTUVBlank[k] = mode_lib->vba.TCalc 2582 + + locals->MinTTUVBlank[k]; 2583 + } 2584 + 2585 + // DCC Configuration 2586 + mode_lib->vba.ActiveDPPs = 0; 2587 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2588 + locals->MaximumDCCCompressionYSurface[k] = CalculateDCCConfiguration( 2589 + mode_lib->vba.DCCEnable[k], 2590 + false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, 2591 + mode_lib->vba.ViewportWidth[k], 2592 + mode_lib->vba.ViewportHeight[k], 2593 + mode_lib->vba.DETBufferSizeInKByte * 1024, 2594 + locals->BlockHeight256BytesY[k], 2595 + mode_lib->vba.SwathHeightY[k], 2596 + mode_lib->vba.SurfaceTiling[k], 2597 + locals->BytePerPixelDETY[k], 2598 + mode_lib->vba.SourceScan[k], 2599 + &locals->DCCYMaxUncompressedBlock[k], 2600 + &locals->DCCYMaxCompressedBlock[k], 2601 + &locals->DCCYIndependent64ByteBlock[k]); 2602 + } 2603 + 2604 + //XFC Parameters: 2605 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2606 + if (mode_lib->vba.XFCEnabled[k] == true) { 2607 + double TWait; 2608 + 2609 + locals->XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset; 2610 + locals->XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth; 2611 + locals->XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset; 2612 + TWait = CalculateTWait( 2613 + mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], 2614 + mode_lib->vba.DRAMClockChangeLatency, 2615 + mode_lib->vba.UrgentLatency, 2616 + mode_lib->vba.SREnterPlusExitTime); 2617 + mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay( 2618 + mode_lib, 2619 + mode_lib->vba.VRatio[k], 2620 + locals->SwathWidthY[k], 2621 + dml_ceil(locals->BytePerPixelDETY[k], 1), 2622 + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], 2623 + mode_lib->vba.XFCTSlvVupdateOffset, 2624 + mode_lib->vba.XFCTSlvVupdateWidth, 2625 + mode_lib->vba.XFCTSlvVreadyOffset, 2626 + mode_lib->vba.XFCXBUFLatencyTolerance, 2627 + mode_lib->vba.XFCFillBWOverhead, 2628 + mode_lib->vba.XFCSlvChunkSize, 2629 + mode_lib->vba.XFCBusTransportTime, 2630 + mode_lib->vba.TCalc, 2631 + TWait, 2632 + &mode_lib->vba.SrcActiveDrainRate, 2633 + &mode_lib->vba.TInitXFill, 2634 + &mode_lib->vba.TslvChk); 2635 + locals->XFCRemoteSurfaceFlipLatency[k] = 2636 + dml_floor( 2637 + mode_lib->vba.XFCRemoteSurfaceFlipDelay 2638 + / (mode_lib->vba.HTotal[k] 2639 + / mode_lib->vba.PixelClock[k]), 2640 + 1); 2641 + locals->XFCTransferDelay[k] = 2642 + dml_ceil( 2643 + mode_lib->vba.XFCBusTransportTime 2644 + / (mode_lib->vba.HTotal[k] 2645 + / mode_lib->vba.PixelClock[k]), 2646 + 1); 2647 + locals->XFCPrechargeDelay[k] = 2648 + dml_ceil( 2649 + (mode_lib->vba.XFCBusTransportTime 2650 + + mode_lib->vba.TInitXFill 2651 + + mode_lib->vba.TslvChk) 2652 + / (mode_lib->vba.HTotal[k] 2653 + / mode_lib->vba.PixelClock[k]), 2654 + 1); 2655 + mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance 2656 + * mode_lib->vba.SrcActiveDrainRate; 2657 + mode_lib->vba.FinalFillMargin = 2658 + (locals->DestinationLinesToRequestVMInVBlank[k] 2659 + + locals->DestinationLinesToRequestRowInVBlank[k]) 2660 + * mode_lib->vba.HTotal[k] 2661 + / mode_lib->vba.PixelClock[k] 2662 + * mode_lib->vba.SrcActiveDrainRate 2663 + + mode_lib->vba.XFCFillConstant; 2664 + mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay 2665 + * mode_lib->vba.SrcActiveDrainRate 2666 + + mode_lib->vba.FinalFillMargin; 2667 + mode_lib->vba.RemainingFillLevel = dml_max( 2668 + 0.0, 2669 + mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel); 2670 + mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel 2671 + / (mode_lib->vba.SrcActiveDrainRate 2672 + * mode_lib->vba.XFCFillBWOverhead / 100); 2673 + locals->XFCPrefetchMargin[k] = 2674 + mode_lib->vba.XFCRemoteSurfaceFlipDelay 2675 + + mode_lib->vba.TFinalxFill 2676 + + (locals->DestinationLinesToRequestVMInVBlank[k] 2677 + + locals->DestinationLinesToRequestRowInVBlank[k]) 2678 + * mode_lib->vba.HTotal[k] 2679 + / mode_lib->vba.PixelClock[k]; 2680 + } else { 2681 + locals->XFCSlaveVUpdateOffset[k] = 0; 2682 + locals->XFCSlaveVupdateWidth[k] = 0; 2683 + locals->XFCSlaveVReadyOffset[k] = 0; 2684 + locals->XFCRemoteSurfaceFlipLatency[k] = 0; 2685 + locals->XFCPrechargeDelay[k] = 0; 2686 + locals->XFCTransferDelay[k] = 0; 2687 + locals->XFCPrefetchMargin[k] = 0; 2688 + } 2689 + } 2690 + 2691 + // Stutter Efficiency 2692 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2693 + CalculateDETBufferSize( 2694 + mode_lib->vba.DETBufferSizeInKByte, 2695 + mode_lib->vba.SwathHeightY[k], 2696 + mode_lib->vba.SwathHeightC[k], 2697 + &locals->DETBufferSizeY[k], 2698 + &locals->DETBufferSizeC[k]); 2699 + 2700 + locals->LinesInDETY[k] = locals->DETBufferSizeY[k] 2701 + / locals->BytePerPixelDETY[k] / locals->SwathWidthY[k]; 2702 + locals->LinesInDETYRoundedDownToSwath[k] = dml_floor( 2703 + locals->LinesInDETY[k], 2704 + mode_lib->vba.SwathHeightY[k]); 2705 + locals->FullDETBufferingTimeY[k] = 2706 + locals->LinesInDETYRoundedDownToSwath[k] 2707 + * (mode_lib->vba.HTotal[k] 2708 + / mode_lib->vba.PixelClock[k]) 2709 + / mode_lib->vba.VRatio[k]; 2710 + } 2711 + 2712 + mode_lib->vba.StutterPeriod = 999999.0; 2713 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2714 + if (locals->FullDETBufferingTimeY[k] < mode_lib->vba.StutterPeriod) { 2715 + mode_lib->vba.StutterPeriod = locals->FullDETBufferingTimeY[k]; 2716 + mode_lib->vba.FrameTimeForMinFullDETBufferingTime = 2717 + (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] 2718 + / mode_lib->vba.PixelClock[k]; 2719 + locals->BytePerPixelYCriticalPlane = dml_ceil(locals->BytePerPixelDETY[k], 1); 2720 + locals->SwathWidthYCriticalPlane = locals->SwathWidthY[k]; 2721 + locals->LinesToFinishSwathTransferStutterCriticalPlane = 2722 + mode_lib->vba.SwathHeightY[k] - (locals->LinesInDETY[k] - locals->LinesInDETYRoundedDownToSwath[k]); 2723 + } 2724 + } 2725 + 2726 + mode_lib->vba.AverageReadBandwidth = 0.0; 2727 + mode_lib->vba.TotalRowReadBandwidth = 0.0; 2728 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2729 + unsigned int DCCRateLimit; 2730 + 2731 + if (mode_lib->vba.DCCEnable[k]) { 2732 + if (locals->DCCYMaxCompressedBlock[k] == 256) 2733 + DCCRateLimit = 4; 2734 + else 2735 + DCCRateLimit = 2; 2736 + 2737 + mode_lib->vba.AverageReadBandwidth = 2738 + mode_lib->vba.AverageReadBandwidth 2739 + + (locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k]) / 2740 + dml_min(mode_lib->vba.DCCRate[k], DCCRateLimit); 2741 + } else { 2742 + mode_lib->vba.AverageReadBandwidth = 2743 + mode_lib->vba.AverageReadBandwidth 2744 + + locals->ReadBandwidthPlaneLuma[k] 2745 + + locals->ReadBandwidthPlaneChroma[k]; 2746 + } 2747 + mode_lib->vba.TotalRowReadBandwidth = mode_lib->vba.TotalRowReadBandwidth + 2748 + locals->meta_row_bw[k] + locals->dpte_row_bw[k]; 2749 + } 2750 + 2751 + mode_lib->vba.AverageDCCCompressionRate = mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.AverageReadBandwidth; 2752 + 2753 + mode_lib->vba.PartOfBurstThatFitsInROB = 2754 + dml_min( 2755 + mode_lib->vba.StutterPeriod 2756 + * mode_lib->vba.TotalDataReadBandwidth, 2757 + mode_lib->vba.ROBBufferSizeInKByte * 1024 2758 + * mode_lib->vba.AverageDCCCompressionRate); 2759 + mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB 2760 + / mode_lib->vba.AverageDCCCompressionRate / mode_lib->vba.ReturnBW 2761 + + (mode_lib->vba.StutterPeriod * mode_lib->vba.TotalDataReadBandwidth 2762 + - mode_lib->vba.PartOfBurstThatFitsInROB) 2763 + / (mode_lib->vba.DCFCLK * 64) 2764 + + mode_lib->vba.StutterPeriod * mode_lib->vba.TotalRowReadBandwidth / mode_lib->vba.ReturnBW; 2765 + mode_lib->vba.StutterBurstTime = dml_max( 2766 + mode_lib->vba.StutterBurstTime, 2767 + (locals->LinesToFinishSwathTransferStutterCriticalPlane * locals->BytePerPixelYCriticalPlane * 2768 + locals->SwathWidthYCriticalPlane / mode_lib->vba.ReturnBW) 2769 + ); 2770 + 2771 + mode_lib->vba.TotalActiveWriteback = 0; 2772 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2773 + if (mode_lib->vba.WritebackEnable[k] == true) { 2774 + mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1; 2775 + } 2776 + } 2777 + 2778 + if (mode_lib->vba.TotalActiveWriteback == 0) { 2779 + mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1 2780 + - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime) 2781 + / mode_lib->vba.StutterPeriod) * 100; 2782 + } else { 2783 + mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0; 2784 + } 2785 + 2786 + mode_lib->vba.SmallestVBlank = 999999; 2787 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2788 + if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { 2789 + mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k] 2790 + - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k] 2791 + / mode_lib->vba.PixelClock[k]; 2792 + } else { 2793 + mode_lib->vba.VBlankTime = 0; 2794 + } 2795 + mode_lib->vba.SmallestVBlank = dml_min( 2796 + mode_lib->vba.SmallestVBlank, 2797 + mode_lib->vba.VBlankTime); 2798 + } 2799 + 2800 + mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100 2801 + * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime 2802 + - mode_lib->vba.SmallestVBlank) 2803 + + mode_lib->vba.SmallestVBlank) 2804 + / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100; 2805 + } 2806 + 2807 + static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) 2808 + { 2809 + // Display Pipe Configuration 2810 + double BytePerPixDETY; 2811 + double BytePerPixDETC; 2812 + double Read256BytesBlockHeightY; 2813 + double Read256BytesBlockHeightC; 2814 + double Read256BytesBlockWidthY; 2815 + double Read256BytesBlockWidthC; 2816 + double MaximumSwathHeightY; 2817 + double MaximumSwathHeightC; 2818 + double MinimumSwathHeightY; 2819 + double MinimumSwathHeightC; 2820 + double SwathWidth; 2821 + double SwathWidthGranularityY; 2822 + double SwathWidthGranularityC; 2823 + double RoundedUpMaxSwathSizeBytesY; 2824 + double RoundedUpMaxSwathSizeBytesC; 2825 + unsigned int j, k; 2826 + 2827 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 2828 + bool MainPlaneDoesODMCombine = false; 2829 + 2830 + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { 2831 + BytePerPixDETY = 8; 2832 + BytePerPixDETC = 0; 2833 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { 2834 + BytePerPixDETY = 4; 2835 + BytePerPixDETC = 0; 2836 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { 2837 + BytePerPixDETY = 2; 2838 + BytePerPixDETC = 0; 2839 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { 2840 + BytePerPixDETY = 1; 2841 + BytePerPixDETC = 0; 2842 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { 2843 + BytePerPixDETY = 1; 2844 + BytePerPixDETC = 2; 2845 + } else { 2846 + BytePerPixDETY = 4.0 / 3.0; 2847 + BytePerPixDETC = 8.0 / 3.0; 2848 + } 2849 + 2850 + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 2851 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 2852 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 2853 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { 2854 + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { 2855 + Read256BytesBlockHeightY = 1; 2856 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { 2857 + Read256BytesBlockHeightY = 4; 2858 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32 2859 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { 2860 + Read256BytesBlockHeightY = 8; 2861 + } else { 2862 + Read256BytesBlockHeightY = 16; 2863 + } 2864 + Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) 2865 + / Read256BytesBlockHeightY; 2866 + Read256BytesBlockHeightC = 0; 2867 + Read256BytesBlockWidthC = 0; 2868 + } else { 2869 + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { 2870 + Read256BytesBlockHeightY = 1; 2871 + Read256BytesBlockHeightC = 1; 2872 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { 2873 + Read256BytesBlockHeightY = 16; 2874 + Read256BytesBlockHeightC = 8; 2875 + } else { 2876 + Read256BytesBlockHeightY = 8; 2877 + Read256BytesBlockHeightC = 8; 2878 + } 2879 + Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) 2880 + / Read256BytesBlockHeightY; 2881 + Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2) 2882 + / Read256BytesBlockHeightC; 2883 + } 2884 + 2885 + if (mode_lib->vba.SourceScan[k] == dm_horz) { 2886 + MaximumSwathHeightY = Read256BytesBlockHeightY; 2887 + MaximumSwathHeightC = Read256BytesBlockHeightC; 2888 + } else { 2889 + MaximumSwathHeightY = Read256BytesBlockWidthY; 2890 + MaximumSwathHeightC = Read256BytesBlockWidthC; 2891 + } 2892 + 2893 + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 2894 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 2895 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 2896 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { 2897 + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear 2898 + || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 2899 + && (mode_lib->vba.SurfaceTiling[k] 2900 + == dm_sw_4kb_s 2901 + || mode_lib->vba.SurfaceTiling[k] 2902 + == dm_sw_4kb_s_x 2903 + || mode_lib->vba.SurfaceTiling[k] 2904 + == dm_sw_64kb_s 2905 + || mode_lib->vba.SurfaceTiling[k] 2906 + == dm_sw_64kb_s_t 2907 + || mode_lib->vba.SurfaceTiling[k] 2908 + == dm_sw_64kb_s_x 2909 + || mode_lib->vba.SurfaceTiling[k] 2910 + == dm_sw_var_s 2911 + || mode_lib->vba.SurfaceTiling[k] 2912 + == dm_sw_var_s_x) 2913 + && mode_lib->vba.SourceScan[k] == dm_horz)) { 2914 + MinimumSwathHeightY = MaximumSwathHeightY; 2915 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 2916 + && mode_lib->vba.SourceScan[k] != dm_horz) { 2917 + MinimumSwathHeightY = MaximumSwathHeightY; 2918 + } else { 2919 + MinimumSwathHeightY = MaximumSwathHeightY / 2.0; 2920 + } 2921 + MinimumSwathHeightC = MaximumSwathHeightC; 2922 + } else { 2923 + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { 2924 + MinimumSwathHeightY = MaximumSwathHeightY; 2925 + MinimumSwathHeightC = MaximumSwathHeightC; 2926 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 2927 + && mode_lib->vba.SourceScan[k] == dm_horz) { 2928 + MinimumSwathHeightY = MaximumSwathHeightY / 2.0; 2929 + MinimumSwathHeightC = MaximumSwathHeightC; 2930 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 2931 + && mode_lib->vba.SourceScan[k] == dm_horz) { 2932 + MinimumSwathHeightC = MaximumSwathHeightC / 2.0; 2933 + MinimumSwathHeightY = MaximumSwathHeightY; 2934 + } else { 2935 + MinimumSwathHeightY = MaximumSwathHeightY; 2936 + MinimumSwathHeightC = MaximumSwathHeightC; 2937 + } 2938 + } 2939 + 2940 + if (mode_lib->vba.SourceScan[k] == dm_horz) { 2941 + SwathWidth = mode_lib->vba.ViewportWidth[k]; 2942 + } else { 2943 + SwathWidth = mode_lib->vba.ViewportHeight[k]; 2944 + } 2945 + 2946 + if (mode_lib->vba.ODMCombineEnabled[k] == true) { 2947 + MainPlaneDoesODMCombine = true; 2948 + } 2949 + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { 2950 + if (mode_lib->vba.BlendingAndTiming[k] == j 2951 + && mode_lib->vba.ODMCombineEnabled[j] == true) { 2952 + MainPlaneDoesODMCombine = true; 2953 + } 2954 + } 2955 + 2956 + if (MainPlaneDoesODMCombine == true) { 2957 + SwathWidth = dml_min( 2958 + SwathWidth, 2959 + mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]); 2960 + } else { 2961 + SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k]; 2962 + } 2963 + 2964 + SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY; 2965 + RoundedUpMaxSwathSizeBytesY = (dml_ceil( 2966 + (double) (SwathWidth - 1), 2967 + SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY 2968 + * MaximumSwathHeightY; 2969 + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { 2970 + RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256) 2971 + + 256; 2972 + } 2973 + if (MaximumSwathHeightC > 0) { 2974 + SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2) 2975 + / MaximumSwathHeightC; 2976 + RoundedUpMaxSwathSizeBytesC = (dml_ceil( 2977 + (double) (SwathWidth / 2.0 - 1), 2978 + SwathWidthGranularityC) + SwathWidthGranularityC) 2979 + * BytePerPixDETC * MaximumSwathHeightC; 2980 + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { 2981 + RoundedUpMaxSwathSizeBytesC = dml_ceil( 2982 + RoundedUpMaxSwathSizeBytesC, 2983 + 256) + 256; 2984 + } 2985 + } else 2986 + RoundedUpMaxSwathSizeBytesC = 0.0; 2987 + 2988 + if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC 2989 + <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) { 2990 + mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY; 2991 + mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC; 2992 + } else { 2993 + mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY; 2994 + mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC; 2995 + } 2996 + 2997 + CalculateDETBufferSize( 2998 + mode_lib->vba.DETBufferSizeInKByte, 2999 + mode_lib->vba.SwathHeightY[k], 3000 + mode_lib->vba.SwathHeightC[k], 3001 + &mode_lib->vba.DETBufferSizeY[k], 3002 + &mode_lib->vba.DETBufferSizeC[k]); 3003 + } 3004 + } 3005 + 3006 + static double CalculateTWait( 3007 + unsigned int PrefetchMode, 3008 + double DRAMClockChangeLatency, 3009 + double UrgentLatency, 3010 + double SREnterPlusExitTime) 3011 + { 3012 + if (PrefetchMode == 0) { 3013 + return dml_max( 3014 + DRAMClockChangeLatency + UrgentLatency, 3015 + dml_max(SREnterPlusExitTime, UrgentLatency)); 3016 + } else if (PrefetchMode == 1) { 3017 + return dml_max(SREnterPlusExitTime, UrgentLatency); 3018 + } else { 3019 + return UrgentLatency; 3020 + } 3021 + } 3022 + 3023 + static double CalculateRemoteSurfaceFlipDelay( 3024 + struct display_mode_lib *mode_lib, 3025 + double VRatio, 3026 + double SwathWidth, 3027 + double Bpp, 3028 + double LineTime, 3029 + double XFCTSlvVupdateOffset, 3030 + double XFCTSlvVupdateWidth, 3031 + double XFCTSlvVreadyOffset, 3032 + double XFCXBUFLatencyTolerance, 3033 + double XFCFillBWOverhead, 3034 + double XFCSlvChunkSize, 3035 + double XFCBusTransportTime, 3036 + double TCalc, 3037 + double TWait, 3038 + double *SrcActiveDrainRate, 3039 + double *TInitXFill, 3040 + double *TslvChk) 3041 + { 3042 + double TSlvSetup, AvgfillRate, result; 3043 + 3044 + *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime; 3045 + TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset; 3046 + *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100); 3047 + AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100); 3048 + *TslvChk = XFCSlvChunkSize / AvgfillRate; 3049 + dml_print( 3050 + "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n", 3051 + *SrcActiveDrainRate); 3052 + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup); 3053 + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill); 3054 + dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate); 3055 + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk); 3056 + result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide 3057 + dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result); 3058 + return result; 3059 + } 3060 + 3061 + static double CalculateWriteBackDelay( 3062 + enum source_format_class WritebackPixelFormat, 3063 + double WritebackHRatio, 3064 + double WritebackVRatio, 3065 + unsigned int WritebackLumaHTaps, 3066 + unsigned int WritebackLumaVTaps, 3067 + unsigned int WritebackChromaHTaps, 3068 + unsigned int WritebackChromaVTaps, 3069 + unsigned int WritebackDestinationWidth) 3070 + { 3071 + double CalculateWriteBackDelay = 3072 + dml_max( 3073 + dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio, 3074 + WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1) 3075 + * dml_ceil( 3076 + WritebackDestinationWidth 3077 + / 4.0, 3078 + 1) 3079 + + dml_ceil(1.0 / WritebackVRatio, 1) 3080 + * (dml_ceil( 3081 + WritebackLumaVTaps 3082 + / 4.0, 3083 + 1) + 4)); 3084 + 3085 + if (WritebackPixelFormat != dm_444_32) { 3086 + CalculateWriteBackDelay = 3087 + dml_max( 3088 + CalculateWriteBackDelay, 3089 + dml_max( 3090 + dml_ceil( 3091 + WritebackChromaHTaps 3092 + / 2.0, 3093 + 1) 3094 + / (2 3095 + * WritebackHRatio), 3096 + WritebackChromaVTaps 3097 + * dml_ceil( 3098 + 1 3099 + / (2 3100 + * WritebackVRatio), 3101 + 1) 3102 + * dml_ceil( 3103 + WritebackDestinationWidth 3104 + / 2.0 3105 + / 2.0, 3106 + 1) 3107 + + dml_ceil( 3108 + 1 3109 + / (2 3110 + * WritebackVRatio), 3111 + 1) 3112 + * (dml_ceil( 3113 + WritebackChromaVTaps 3114 + / 4.0, 3115 + 1) 3116 + + 4))); 3117 + } 3118 + return CalculateWriteBackDelay; 3119 + } 3120 + 3121 + static void CalculateActiveRowBandwidth( 3122 + bool GPUVMEnable, 3123 + enum source_format_class SourcePixelFormat, 3124 + double VRatio, 3125 + bool DCCEnable, 3126 + double LineTime, 3127 + unsigned int MetaRowByteLuma, 3128 + unsigned int MetaRowByteChroma, 3129 + unsigned int meta_row_height_luma, 3130 + unsigned int meta_row_height_chroma, 3131 + unsigned int PixelPTEBytesPerRowLuma, 3132 + unsigned int PixelPTEBytesPerRowChroma, 3133 + unsigned int dpte_row_height_luma, 3134 + unsigned int dpte_row_height_chroma, 3135 + double *meta_row_bw, 3136 + double *dpte_row_bw) 3137 + { 3138 + if (DCCEnable != true) { 3139 + *meta_row_bw = 0; 3140 + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { 3141 + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) 3142 + + VRatio / 2 * MetaRowByteChroma 3143 + / (meta_row_height_chroma * LineTime); 3144 + } else { 3145 + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 3146 + } 3147 + 3148 + if (GPUVMEnable != true) { 3149 + *dpte_row_bw = 0; 3150 + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { 3151 + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) 3152 + + VRatio / 2 * PixelPTEBytesPerRowChroma 3153 + / (dpte_row_height_chroma * LineTime); 3154 + } else { 3155 + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 3156 + } 3157 + } 3158 + 3159 + static void CalculateFlipSchedule( 3160 + struct display_mode_lib *mode_lib, 3161 + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 3162 + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 3163 + double UrgentExtraLatency, 3164 + double UrgentLatency, 3165 + unsigned int GPUVMMaxPageTableLevels, 3166 + bool HostVMEnable, 3167 + unsigned int HostVMMaxPageTableLevels, 3168 + unsigned int HostVMCachedPageTableLevels, 3169 + bool GPUVMEnable, 3170 + double PDEAndMetaPTEBytesPerFrame, 3171 + double MetaRowBytes, 3172 + double DPTEBytesPerRow, 3173 + double BandwidthAvailableForImmediateFlip, 3174 + unsigned int TotImmediateFlipBytes, 3175 + enum source_format_class SourcePixelFormat, 3176 + double LineTime, 3177 + double VRatio, 3178 + double Tno_bw, 3179 + bool DCCEnable, 3180 + unsigned int dpte_row_height, 3181 + unsigned int meta_row_height, 3182 + unsigned int dpte_row_height_chroma, 3183 + unsigned int meta_row_height_chroma, 3184 + double *DestinationLinesToRequestVMInImmediateFlip, 3185 + double *DestinationLinesToRequestRowInImmediateFlip, 3186 + double *final_flip_bw, 3187 + bool *ImmediateFlipSupportedForPipe) 3188 + { 3189 + double min_row_time = 0.0; 3190 + unsigned int HostVMDynamicLevels; 3191 + double TimeForFetchingMetaPTEImmediateFlip; 3192 + double TimeForFetchingRowInVBlankImmediateFlip; 3193 + double ImmediateFlipBW; 3194 + double HostVMInefficiencyFactor; 3195 + 3196 + if (GPUVMEnable == true && HostVMEnable == true) { 3197 + HostVMInefficiencyFactor = 3198 + PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData 3199 + / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; 3200 + HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels; 3201 + } else { 3202 + HostVMInefficiencyFactor = 1; 3203 + HostVMDynamicLevels = 0; 3204 + } 3205 + 3206 + ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) 3207 + * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 3208 + 3209 + if (GPUVMEnable == true) { 3210 + TimeForFetchingMetaPTEImmediateFlip = dml_max3( 3211 + Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, 3212 + UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevels + 1) - 1), 3213 + LineTime / 4.0); 3214 + } else { 3215 + TimeForFetchingMetaPTEImmediateFlip = 0; 3216 + } 3217 + 3218 + *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; 3219 + if ((GPUVMEnable == true || DCCEnable == true)) { 3220 + TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevels + 1), LineTime / 4); 3221 + } else { 3222 + TimeForFetchingRowInVBlankImmediateFlip = 0; 3223 + } 3224 + 3225 + *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; 3226 + *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), (MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); 3227 + if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { 3228 + if (GPUVMEnable == true && DCCEnable != true) { 3229 + min_row_time = dml_min( 3230 + dpte_row_height * LineTime / VRatio, 3231 + dpte_row_height_chroma * LineTime / (VRatio / 2)); 3232 + } else if (GPUVMEnable != true && DCCEnable == true) { 3233 + min_row_time = dml_min( 3234 + meta_row_height * LineTime / VRatio, 3235 + meta_row_height_chroma * LineTime / (VRatio / 2)); 3236 + } else { 3237 + min_row_time = dml_min4( 3238 + dpte_row_height * LineTime / VRatio, 3239 + meta_row_height * LineTime / VRatio, 3240 + dpte_row_height_chroma * LineTime / (VRatio / 2), 3241 + meta_row_height_chroma * LineTime / (VRatio / 2)); 3242 + } 3243 + } else { 3244 + if (GPUVMEnable == true && DCCEnable != true) { 3245 + min_row_time = dpte_row_height * LineTime / VRatio; 3246 + } else if (GPUVMEnable != true && DCCEnable == true) { 3247 + min_row_time = meta_row_height * LineTime / VRatio; 3248 + } else { 3249 + min_row_time = dml_min( 3250 + dpte_row_height * LineTime / VRatio, 3251 + meta_row_height * LineTime / VRatio); 3252 + } 3253 + } 3254 + 3255 + if (*DestinationLinesToRequestVMInImmediateFlip >= 32 3256 + || *DestinationLinesToRequestRowInImmediateFlip >= 16 3257 + || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { 3258 + *ImmediateFlipSupportedForPipe = false; 3259 + } else { 3260 + *ImmediateFlipSupportedForPipe = true; 3261 + } 3262 + } 3263 + 3264 + static unsigned int TruncToValidBPP( 3265 + double DecimalBPP, 3266 + double DesiredBPP, 3267 + bool DSCEnabled, 3268 + enum output_encoder_class Output, 3269 + enum output_format_class Format, 3270 + unsigned int DSCInputBitPerComponent) 3271 + { 3272 + if (Output == dm_hdmi) { 3273 + if (Format == dm_420) { 3274 + if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18)) 3275 + return 18; 3276 + else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15)) 3277 + return 15; 3278 + else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12)) 3279 + return 12; 3280 + else 3281 + return BPP_INVALID; 3282 + } else if (Format == dm_444) { 3283 + if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36)) 3284 + return 36; 3285 + else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30)) 3286 + return 30; 3287 + else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) 3288 + return 24; 3289 + else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18)) 3290 + return 18; 3291 + else 3292 + return BPP_INVALID; 3293 + } else { 3294 + if (DecimalBPP / 1.5 >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) 3295 + return 24; 3296 + else if (DecimalBPP / 1.5 >= 20 && (DesiredBPP == 0 || DesiredBPP == 20)) 3297 + return 20; 3298 + else if (DecimalBPP / 1.5 >= 16 && (DesiredBPP == 0 || DesiredBPP == 16)) 3299 + return 16; 3300 + else 3301 + return BPP_INVALID; 3302 + } 3303 + } else { 3304 + if (DSCEnabled) { 3305 + if (Format == dm_420) { 3306 + if (DesiredBPP == 0) { 3307 + if (DecimalBPP < 6) 3308 + return BPP_INVALID; 3309 + else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1.0 / 16.0) 3310 + return 1.5 * DSCInputBitPerComponent - 1.0 / 16.0; 3311 + else 3312 + return dml_floor(16 * DecimalBPP, 1) / 16.0; 3313 + } else { 3314 + if (DecimalBPP < 6 3315 + || DesiredBPP < 6 3316 + || DesiredBPP > 1.5 * DSCInputBitPerComponent - 1.0 / 16.0 3317 + || DecimalBPP < DesiredBPP) { 3318 + return BPP_INVALID; 3319 + } else { 3320 + return DesiredBPP; 3321 + } 3322 + } 3323 + } else if (Format == dm_n422) { 3324 + if (DesiredBPP == 0) { 3325 + if (DecimalBPP < 7) 3326 + return BPP_INVALID; 3327 + else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1.0 / 16.0) 3328 + return 2 * DSCInputBitPerComponent - 1.0 / 16.0; 3329 + else 3330 + return dml_floor(16 * DecimalBPP, 1) / 16.0; 3331 + } else { 3332 + if (DecimalBPP < 7 3333 + || DesiredBPP < 7 3334 + || DesiredBPP > 2 * DSCInputBitPerComponent - 1.0 / 16.0 3335 + || DecimalBPP < DesiredBPP) { 3336 + return BPP_INVALID; 3337 + } else { 3338 + return DesiredBPP; 3339 + } 3340 + } 3341 + } else { 3342 + if (DesiredBPP == 0) { 3343 + if (DecimalBPP < 8) 3344 + return BPP_INVALID; 3345 + else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1.0 / 16.0) 3346 + return 3 * DSCInputBitPerComponent - 1.0 / 16.0; 3347 + else 3348 + return dml_floor(16 * DecimalBPP, 1) / 16.0; 3349 + } else { 3350 + if (DecimalBPP < 8 3351 + || DesiredBPP < 8 3352 + || DesiredBPP > 3 * DSCInputBitPerComponent - 1.0 / 16.0 3353 + || DecimalBPP < DesiredBPP) { 3354 + return BPP_INVALID; 3355 + } else { 3356 + return DesiredBPP; 3357 + } 3358 + } 3359 + } 3360 + } else if (Format == dm_420) { 3361 + if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18)) 3362 + return 18; 3363 + else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15)) 3364 + return 15; 3365 + else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12)) 3366 + return 12; 3367 + else 3368 + return BPP_INVALID; 3369 + } else if (Format == dm_s422 || Format == dm_n422) { 3370 + if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) 3371 + return 24; 3372 + else if (DecimalBPP >= 20 && (DesiredBPP == 0 || DesiredBPP == 20)) 3373 + return 20; 3374 + else if (DecimalBPP >= 16 && (DesiredBPP == 0 || DesiredBPP == 16)) 3375 + return 16; 3376 + else 3377 + return BPP_INVALID; 3378 + } else { 3379 + if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36)) 3380 + return 36; 3381 + else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30)) 3382 + return 30; 3383 + else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) 3384 + return 24; 3385 + else 3386 + return BPP_INVALID; 3387 + } 3388 + } 3389 + } 3390 + 3391 + void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) 3392 + { 3393 + struct vba_vars_st *locals = &mode_lib->vba; 3394 + 3395 + int i; 3396 + unsigned int j, k, m; 3397 + 3398 + /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ 3399 + 3400 + /*Scale Ratio, taps Support Check*/ 3401 + 3402 + mode_lib->vba.ScaleRatioAndTapsSupport = true; 3403 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 3404 + if (mode_lib->vba.ScalerEnabled[k] == false 3405 + && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 3406 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 3407 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 3408 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 3409 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) 3410 + || mode_lib->vba.HRatio[k] != 1.0 3411 + || mode_lib->vba.htaps[k] != 1.0 3412 + || mode_lib->vba.VRatio[k] != 1.0 3413 + || mode_lib->vba.vtaps[k] != 1.0)) { 3414 + mode_lib->vba.ScaleRatioAndTapsSupport = false; 3415 + } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0 3416 + || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0 3417 + || (mode_lib->vba.htaps[k] > 1.0 3418 + && (mode_lib->vba.htaps[k] % 2) == 1) 3419 + || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio 3420 + || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio 3421 + || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k] 3422 + || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k] 3423 + || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 3424 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 3425 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 3426 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 3427 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8 3428 + && (mode_lib->vba.HRatio[k] / 2.0 3429 + > mode_lib->vba.HTAPsChroma[k] 3430 + || mode_lib->vba.VRatio[k] / 2.0 3431 + > mode_lib->vba.VTAPsChroma[k]))) { 3432 + mode_lib->vba.ScaleRatioAndTapsSupport = false; 3433 + } 3434 + } 3435 + /*Source Format, Pixel Format and Scan Support Check*/ 3436 + 3437 + mode_lib->vba.SourceFormatPixelAndScanSupport = true; 3438 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 3439 + if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear 3440 + && mode_lib->vba.SourceScan[k] != dm_horz) 3441 + || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d 3442 + || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x 3443 + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d 3444 + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t 3445 + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x 3446 + || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d 3447 + || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x) 3448 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_64) 3449 + || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x 3450 + && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8 3451 + || mode_lib->vba.SourcePixelFormat[k] 3452 + == dm_420_8 3453 + || mode_lib->vba.SourcePixelFormat[k] 3454 + == dm_420_10)) 3455 + || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl 3456 + || mode_lib->vba.SurfaceTiling[k] 3457 + == dm_sw_gfx7_2d_thin_lvp) 3458 + && !((mode_lib->vba.SourcePixelFormat[k] 3459 + == dm_444_64 3460 + || mode_lib->vba.SourcePixelFormat[k] 3461 + == dm_444_32) 3462 + && mode_lib->vba.SourceScan[k] 3463 + == dm_horz 3464 + && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp 3465 + == true 3466 + && mode_lib->vba.DCCEnable[k] 3467 + == false)) 3468 + || (mode_lib->vba.DCCEnable[k] == true 3469 + && (mode_lib->vba.SurfaceTiling[k] 3470 + == dm_sw_linear 3471 + || mode_lib->vba.SourcePixelFormat[k] 3472 + == dm_420_8 3473 + || mode_lib->vba.SourcePixelFormat[k] 3474 + == dm_420_10)))) { 3475 + mode_lib->vba.SourceFormatPixelAndScanSupport = false; 3476 + } 3477 + } 3478 + /*Bandwidth Support Check*/ 3479 + 3480 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 3481 + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { 3482 + locals->BytePerPixelInDETY[k] = 8.0; 3483 + locals->BytePerPixelInDETC[k] = 0.0; 3484 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { 3485 + locals->BytePerPixelInDETY[k] = 4.0; 3486 + locals->BytePerPixelInDETC[k] = 0.0; 3487 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 3488 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) { 3489 + locals->BytePerPixelInDETY[k] = 2.0; 3490 + locals->BytePerPixelInDETC[k] = 0.0; 3491 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) { 3492 + locals->BytePerPixelInDETY[k] = 1.0; 3493 + locals->BytePerPixelInDETC[k] = 0.0; 3494 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { 3495 + locals->BytePerPixelInDETY[k] = 1.0; 3496 + locals->BytePerPixelInDETC[k] = 2.0; 3497 + } else { 3498 + locals->BytePerPixelInDETY[k] = 4.0 / 3; 3499 + locals->BytePerPixelInDETC[k] = 8.0 / 3; 3500 + } 3501 + if (mode_lib->vba.SourceScan[k] == dm_horz) { 3502 + locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k]; 3503 + } else { 3504 + locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k]; 3505 + } 3506 + } 3507 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 3508 + locals->ReadBandwidthLuma[k] = locals->SwathWidthYSingleDPP[k] * dml_ceil(locals->BytePerPixelInDETY[k], 1.0) 3509 + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; 3510 + locals->ReadBandwidthChroma[k] = locals->SwathWidthYSingleDPP[k] / 2 * dml_ceil(locals->BytePerPixelInDETC[k], 2.0) 3511 + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] / 2.0; 3512 + locals->ReadBandwidth[k] = locals->ReadBandwidthLuma[k] + locals->ReadBandwidthChroma[k]; 3513 + } 3514 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 3515 + if (mode_lib->vba.WritebackEnable[k] == true 3516 + && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { 3517 + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] 3518 + * mode_lib->vba.WritebackDestinationHeight[k] 3519 + / (mode_lib->vba.WritebackSourceHeight[k] 3520 + * mode_lib->vba.HTotal[k] 3521 + / mode_lib->vba.PixelClock[k]) * 4.0; 3522 + } else if (mode_lib->vba.WritebackEnable[k] == true 3523 + && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { 3524 + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] 3525 + * mode_lib->vba.WritebackDestinationHeight[k] 3526 + / (mode_lib->vba.WritebackSourceHeight[k] 3527 + * mode_lib->vba.HTotal[k] 3528 + / mode_lib->vba.PixelClock[k]) * 3.0; 3529 + } else if (mode_lib->vba.WritebackEnable[k] == true) { 3530 + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] 3531 + * mode_lib->vba.WritebackDestinationHeight[k] 3532 + / (mode_lib->vba.WritebackSourceHeight[k] 3533 + * mode_lib->vba.HTotal[k] 3534 + / mode_lib->vba.PixelClock[k]) * 1.5; 3535 + } else { 3536 + locals->WriteBandwidth[k] = 0.0; 3537 + } 3538 + } 3539 + mode_lib->vba.DCCEnabledInAnyPlane = false; 3540 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 3541 + if (mode_lib->vba.DCCEnable[k] == true) { 3542 + mode_lib->vba.DCCEnabledInAnyPlane = true; 3543 + } 3544 + } 3545 + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { 3546 + locals->IdealSDPPortBandwidthPerState[i] = dml_min3( 3547 + mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i], 3548 + mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels 3549 + * mode_lib->vba.DRAMChannelWidth, 3550 + mode_lib->vba.FabricClockPerState[i] 3551 + * mode_lib->vba.FabricDatapathToDCNDataReturn); 3552 + if (mode_lib->vba.HostVMEnable == false) { 3553 + locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i] 3554 + * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100.0; 3555 + } else { 3556 + locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i] 3557 + * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0; 3558 + } 3559 + } 3560 + /*Writeback Latency support check*/ 3561 + 3562 + mode_lib->vba.WritebackLatencySupport = true; 3563 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 3564 + if (mode_lib->vba.WritebackEnable[k] == true) { 3565 + if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { 3566 + if (locals->WriteBandwidth[k] 3567 + > (mode_lib->vba.WritebackInterfaceLumaBufferSize 3568 + + mode_lib->vba.WritebackInterfaceChromaBufferSize) 3569 + / mode_lib->vba.WritebackLatency) { 3570 + mode_lib->vba.WritebackLatencySupport = false; 3571 + } 3572 + } else { 3573 + if (locals->WriteBandwidth[k] 3574 + > 1.5 3575 + * dml_min( 3576 + mode_lib->vba.WritebackInterfaceLumaBufferSize, 3577 + 2.0 3578 + * mode_lib->vba.WritebackInterfaceChromaBufferSize) 3579 + / mode_lib->vba.WritebackLatency) { 3580 + mode_lib->vba.WritebackLatencySupport = false; 3581 + } 3582 + } 3583 + } 3584 + } 3585 + /*Re-ordering Buffer Support Check*/ 3586 + 3587 + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { 3588 + locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] = 3589 + (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i] 3590 + + dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, 3591 + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 3592 + mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly) 3593 + * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; 3594 + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] 3595 + > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { 3596 + locals->ROBSupport[i] = true; 3597 + } else { 3598 + locals->ROBSupport[i] = false; 3599 + } 3600 + } 3601 + /*Writeback Mode Support Check*/ 3602 + 3603 + mode_lib->vba.TotalNumberOfActiveWriteback = 0; 3604 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 3605 + if (mode_lib->vba.WritebackEnable[k] == true) { 3606 + if (mode_lib->vba.ActiveWritebacksPerPlane[k] == 0) 3607 + mode_lib->vba.ActiveWritebacksPerPlane[k] = 1; 3608 + mode_lib->vba.TotalNumberOfActiveWriteback = 3609 + mode_lib->vba.TotalNumberOfActiveWriteback 3610 + + mode_lib->vba.ActiveWritebacksPerPlane[k]; 3611 + } 3612 + } 3613 + mode_lib->vba.WritebackModeSupport = true; 3614 + if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) { 3615 + mode_lib->vba.WritebackModeSupport = false; 3616 + } 3617 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 3618 + if (mode_lib->vba.WritebackEnable[k] == true 3619 + && mode_lib->vba.Writeback10bpc420Supported != true 3620 + && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { 3621 + mode_lib->vba.WritebackModeSupport = false; 3622 + } 3623 + } 3624 + /*Writeback Scale Ratio and Taps Support Check*/ 3625 + 3626 + mode_lib->vba.WritebackScaleRatioAndTapsSupport = true; 3627 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 3628 + if (mode_lib->vba.WritebackEnable[k] == true) { 3629 + if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false 3630 + && (mode_lib->vba.WritebackHRatio[k] != 1.0 3631 + || mode_lib->vba.WritebackVRatio[k] != 1.0)) { 3632 + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; 3633 + } 3634 + if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio 3635 + || mode_lib->vba.WritebackVRatio[k] 3636 + > mode_lib->vba.WritebackMaxVSCLRatio 3637 + || mode_lib->vba.WritebackHRatio[k] 3638 + < mode_lib->vba.WritebackMinHSCLRatio 3639 + || mode_lib->vba.WritebackVRatio[k] 3640 + < mode_lib->vba.WritebackMinVSCLRatio 3641 + || mode_lib->vba.WritebackLumaHTaps[k] 3642 + > mode_lib->vba.WritebackMaxHSCLTaps 3643 + || mode_lib->vba.WritebackLumaVTaps[k] 3644 + > mode_lib->vba.WritebackMaxVSCLTaps 3645 + || mode_lib->vba.WritebackHRatio[k] 3646 + > mode_lib->vba.WritebackLumaHTaps[k] 3647 + || mode_lib->vba.WritebackVRatio[k] 3648 + > mode_lib->vba.WritebackLumaVTaps[k] 3649 + || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0 3650 + && ((mode_lib->vba.WritebackLumaHTaps[k] % 2) 3651 + == 1)) 3652 + || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32 3653 + && (mode_lib->vba.WritebackChromaHTaps[k] 3654 + > mode_lib->vba.WritebackMaxHSCLTaps 3655 + || mode_lib->vba.WritebackChromaVTaps[k] 3656 + > mode_lib->vba.WritebackMaxVSCLTaps 3657 + || 2.0 3658 + * mode_lib->vba.WritebackHRatio[k] 3659 + > mode_lib->vba.WritebackChromaHTaps[k] 3660 + || 2.0 3661 + * mode_lib->vba.WritebackVRatio[k] 3662 + > mode_lib->vba.WritebackChromaVTaps[k] 3663 + || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0 3664 + && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) { 3665 + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; 3666 + } 3667 + if (mode_lib->vba.WritebackVRatio[k] < 1.0) { 3668 + mode_lib->vba.WritebackLumaVExtra = 3669 + dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0); 3670 + } else { 3671 + mode_lib->vba.WritebackLumaVExtra = -1; 3672 + } 3673 + if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32 3674 + && mode_lib->vba.WritebackLumaVTaps[k] 3675 + > (mode_lib->vba.WritebackLineBufferLumaBufferSize 3676 + + mode_lib->vba.WritebackLineBufferChromaBufferSize) 3677 + / 3.0 3678 + / mode_lib->vba.WritebackDestinationWidth[k] 3679 + - mode_lib->vba.WritebackLumaVExtra) 3680 + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 3681 + && mode_lib->vba.WritebackLumaVTaps[k] 3682 + > mode_lib->vba.WritebackLineBufferLumaBufferSize 3683 + * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] 3684 + - mode_lib->vba.WritebackLumaVExtra) 3685 + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 3686 + && mode_lib->vba.WritebackLumaVTaps[k] 3687 + > mode_lib->vba.WritebackLineBufferLumaBufferSize 3688 + * 8.0 / 10.0 3689 + / mode_lib->vba.WritebackDestinationWidth[k] 3690 + - mode_lib->vba.WritebackLumaVExtra)) { 3691 + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; 3692 + } 3693 + if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) { 3694 + mode_lib->vba.WritebackChromaVExtra = 0.0; 3695 + } else { 3696 + mode_lib->vba.WritebackChromaVExtra = -1; 3697 + } 3698 + if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 3699 + && mode_lib->vba.WritebackChromaVTaps[k] 3700 + > mode_lib->vba.WritebackLineBufferChromaBufferSize 3701 + * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] 3702 + - mode_lib->vba.WritebackChromaVExtra) 3703 + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 3704 + && mode_lib->vba.WritebackChromaVTaps[k] 3705 + > mode_lib->vba.WritebackLineBufferChromaBufferSize 3706 + * 8.0 / 10.0 3707 + / mode_lib->vba.WritebackDestinationWidth[k] 3708 + - mode_lib->vba.WritebackChromaVExtra)) { 3709 + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; 3710 + } 3711 + } 3712 + } 3713 + /*Maximum DISPCLK/DPPCLK Support check*/ 3714 + 3715 + mode_lib->vba.WritebackRequiredDISPCLK = 0.0; 3716 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 3717 + if (mode_lib->vba.WritebackEnable[k] == true) { 3718 + mode_lib->vba.WritebackRequiredDISPCLK = 3719 + dml_max( 3720 + mode_lib->vba.WritebackRequiredDISPCLK, 3721 + CalculateWriteBackDISPCLK( 3722 + mode_lib->vba.WritebackPixelFormat[k], 3723 + mode_lib->vba.PixelClock[k], 3724 + mode_lib->vba.WritebackHRatio[k], 3725 + mode_lib->vba.WritebackVRatio[k], 3726 + mode_lib->vba.WritebackLumaHTaps[k], 3727 + mode_lib->vba.WritebackLumaVTaps[k], 3728 + mode_lib->vba.WritebackChromaHTaps[k], 3729 + mode_lib->vba.WritebackChromaVTaps[k], 3730 + mode_lib->vba.WritebackDestinationWidth[k], 3731 + mode_lib->vba.HTotal[k], 3732 + mode_lib->vba.WritebackChromaLineBufferWidth)); 3733 + } 3734 + } 3735 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 3736 + if (mode_lib->vba.HRatio[k] > 1.0) { 3737 + locals->PSCL_FACTOR[k] = dml_min( 3738 + mode_lib->vba.MaxDCHUBToPSCLThroughput, 3739 + mode_lib->vba.MaxPSCLToLBThroughput 3740 + * mode_lib->vba.HRatio[k] 3741 + / dml_ceil( 3742 + mode_lib->vba.htaps[k] 3743 + / 6.0, 3744 + 1.0)); 3745 + } else { 3746 + locals->PSCL_FACTOR[k] = dml_min( 3747 + mode_lib->vba.MaxDCHUBToPSCLThroughput, 3748 + mode_lib->vba.MaxPSCLToLBThroughput); 3749 + } 3750 + if (locals->BytePerPixelInDETC[k] == 0.0) { 3751 + locals->PSCL_FACTOR_CHROMA[k] = 0.0; 3752 + locals->MinDPPCLKUsingSingleDPP[k] = 3753 + mode_lib->vba.PixelClock[k] 3754 + * dml_max3( 3755 + mode_lib->vba.vtaps[k] / 6.0 3756 + * dml_min( 3757 + 1.0, 3758 + mode_lib->vba.HRatio[k]), 3759 + mode_lib->vba.HRatio[k] 3760 + * mode_lib->vba.VRatio[k] 3761 + / locals->PSCL_FACTOR[k], 3762 + 1.0); 3763 + if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0) 3764 + && locals->MinDPPCLKUsingSingleDPP[k] 3765 + < 2.0 * mode_lib->vba.PixelClock[k]) { 3766 + locals->MinDPPCLKUsingSingleDPP[k] = 2.0 3767 + * mode_lib->vba.PixelClock[k]; 3768 + } 3769 + } else { 3770 + if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) { 3771 + locals->PSCL_FACTOR_CHROMA[k] = 3772 + dml_min( 3773 + mode_lib->vba.MaxDCHUBToPSCLThroughput, 3774 + mode_lib->vba.MaxPSCLToLBThroughput 3775 + * mode_lib->vba.HRatio[k] 3776 + / 2.0 3777 + / dml_ceil( 3778 + mode_lib->vba.HTAPsChroma[k] 3779 + / 6.0, 3780 + 1.0)); 3781 + } else { 3782 + locals->PSCL_FACTOR_CHROMA[k] = dml_min( 3783 + mode_lib->vba.MaxDCHUBToPSCLThroughput, 3784 + mode_lib->vba.MaxPSCLToLBThroughput); 3785 + } 3786 + locals->MinDPPCLKUsingSingleDPP[k] = 3787 + mode_lib->vba.PixelClock[k] 3788 + * dml_max5( 3789 + mode_lib->vba.vtaps[k] / 6.0 3790 + * dml_min( 3791 + 1.0, 3792 + mode_lib->vba.HRatio[k]), 3793 + mode_lib->vba.HRatio[k] 3794 + * mode_lib->vba.VRatio[k] 3795 + / locals->PSCL_FACTOR[k], 3796 + mode_lib->vba.VTAPsChroma[k] 3797 + / 6.0 3798 + * dml_min( 3799 + 1.0, 3800 + mode_lib->vba.HRatio[k] 3801 + / 2.0), 3802 + mode_lib->vba.HRatio[k] 3803 + * mode_lib->vba.VRatio[k] 3804 + / 4.0 3805 + / locals->PSCL_FACTOR_CHROMA[k], 3806 + 1.0); 3807 + if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0 3808 + || mode_lib->vba.HTAPsChroma[k] > 6.0 3809 + || mode_lib->vba.VTAPsChroma[k] > 6.0) 3810 + && locals->MinDPPCLKUsingSingleDPP[k] 3811 + < 2.0 * mode_lib->vba.PixelClock[k]) { 3812 + locals->MinDPPCLKUsingSingleDPP[k] = 2.0 3813 + * mode_lib->vba.PixelClock[k]; 3814 + } 3815 + } 3816 + } 3817 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 3818 + Calculate256BBlockSizes( 3819 + mode_lib->vba.SourcePixelFormat[k], 3820 + mode_lib->vba.SurfaceTiling[k], 3821 + dml_ceil(locals->BytePerPixelInDETY[k], 1.0), 3822 + dml_ceil(locals->BytePerPixelInDETC[k], 2.0), 3823 + &locals->Read256BlockHeightY[k], 3824 + &locals->Read256BlockHeightC[k], 3825 + &locals->Read256BlockWidthY[k], 3826 + &locals->Read256BlockWidthC[k]); 3827 + if (mode_lib->vba.SourceScan[k] == dm_horz) { 3828 + locals->MaxSwathHeightY[k] = locals->Read256BlockHeightY[k]; 3829 + locals->MaxSwathHeightC[k] = locals->Read256BlockHeightC[k]; 3830 + } else { 3831 + locals->MaxSwathHeightY[k] = locals->Read256BlockWidthY[k]; 3832 + locals->MaxSwathHeightC[k] = locals->Read256BlockWidthC[k]; 3833 + } 3834 + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 3835 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 3836 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 3837 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16 3838 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) { 3839 + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear 3840 + || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 3841 + && (mode_lib->vba.SurfaceTiling[k] 3842 + == dm_sw_4kb_s 3843 + || mode_lib->vba.SurfaceTiling[k] 3844 + == dm_sw_4kb_s_x 3845 + || mode_lib->vba.SurfaceTiling[k] 3846 + == dm_sw_64kb_s 3847 + || mode_lib->vba.SurfaceTiling[k] 3848 + == dm_sw_64kb_s_t 3849 + || mode_lib->vba.SurfaceTiling[k] 3850 + == dm_sw_64kb_s_x 3851 + || mode_lib->vba.SurfaceTiling[k] 3852 + == dm_sw_var_s 3853 + || mode_lib->vba.SurfaceTiling[k] 3854 + == dm_sw_var_s_x) 3855 + && mode_lib->vba.SourceScan[k] == dm_horz)) { 3856 + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; 3857 + } else { 3858 + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] 3859 + / 2.0; 3860 + } 3861 + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; 3862 + } else { 3863 + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { 3864 + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; 3865 + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; 3866 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 3867 + && mode_lib->vba.SourceScan[k] == dm_horz) { 3868 + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] 3869 + / 2.0; 3870 + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; 3871 + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 3872 + && mode_lib->vba.SourceScan[k] == dm_horz) { 3873 + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k] 3874 + / 2.0; 3875 + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; 3876 + } else { 3877 + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; 3878 + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; 3879 + } 3880 + } 3881 + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { 3882 + mode_lib->vba.MaximumSwathWidthSupport = 8192.0; 3883 + } else { 3884 + mode_lib->vba.MaximumSwathWidthSupport = 5120.0; 3885 + } 3886 + mode_lib->vba.MaximumSwathWidthInDETBuffer = 3887 + dml_min( 3888 + mode_lib->vba.MaximumSwathWidthSupport, 3889 + mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0 3890 + / (locals->BytePerPixelInDETY[k] 3891 + * locals->MinSwathHeightY[k] 3892 + + locals->BytePerPixelInDETC[k] 3893 + / 2.0 3894 + * locals->MinSwathHeightC[k])); 3895 + if (locals->BytePerPixelInDETC[k] == 0.0) { 3896 + mode_lib->vba.MaximumSwathWidthInLineBuffer = 3897 + mode_lib->vba.LineBufferSize 3898 + * dml_max(mode_lib->vba.HRatio[k], 1.0) 3899 + / mode_lib->vba.LBBitPerPixel[k] 3900 + / (mode_lib->vba.vtaps[k] 3901 + + dml_max( 3902 + dml_ceil( 3903 + mode_lib->vba.VRatio[k], 3904 + 1.0) 3905 + - 2, 3906 + 0.0)); 3907 + } else { 3908 + mode_lib->vba.MaximumSwathWidthInLineBuffer = 3909 + dml_min( 3910 + mode_lib->vba.LineBufferSize 3911 + * dml_max( 3912 + mode_lib->vba.HRatio[k], 3913 + 1.0) 3914 + / mode_lib->vba.LBBitPerPixel[k] 3915 + / (mode_lib->vba.vtaps[k] 3916 + + dml_max( 3917 + dml_ceil( 3918 + mode_lib->vba.VRatio[k], 3919 + 1.0) 3920 + - 2, 3921 + 0.0)), 3922 + 2.0 * mode_lib->vba.LineBufferSize 3923 + * dml_max( 3924 + mode_lib->vba.HRatio[k] 3925 + / 2.0, 3926 + 1.0) 3927 + / mode_lib->vba.LBBitPerPixel[k] 3928 + / (mode_lib->vba.VTAPsChroma[k] 3929 + + dml_max( 3930 + dml_ceil( 3931 + mode_lib->vba.VRatio[k] 3932 + / 2.0, 3933 + 1.0) 3934 + - 2, 3935 + 0.0))); 3936 + } 3937 + locals->MaximumSwathWidth[k] = dml_min( 3938 + mode_lib->vba.MaximumSwathWidthInDETBuffer, 3939 + mode_lib->vba.MaximumSwathWidthInLineBuffer); 3940 + } 3941 + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { 3942 + for (j = 0; j < 2; j++) { 3943 + mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( 3944 + mode_lib->vba.MaxDispclk[i], 3945 + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); 3946 + mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( 3947 + mode_lib->vba.MaxDppclk[i], 3948 + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); 3949 + locals->RequiredDISPCLK[i][j] = 0.0; 3950 + locals->DISPCLK_DPPCLK_Support[i][j] = true; 3951 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 3952 + mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = 3953 + mode_lib->vba.PixelClock[k] 3954 + * (1.0 3955 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading 3956 + / 100.0) 3957 + * (1.0 3958 + + mode_lib->vba.DISPCLKRampingMargin 3959 + / 100.0); 3960 + if (mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine >= mode_lib->vba.MaxDispclk[i] 3961 + && i == mode_lib->vba.soc.num_states) 3962 + mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = mode_lib->vba.PixelClock[k] 3963 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 3964 + 3965 + mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 3966 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + mode_lib->vba.DISPCLKRampingMargin / 100.0); 3967 + if (mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine >= mode_lib->vba.MaxDispclk[i] 3968 + && i == mode_lib->vba.soc.num_states) 3969 + mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 3970 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 3971 + if (mode_lib->vba.ODMCapability == false || mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine <= mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) { 3972 + locals->ODMCombineEnablePerState[i][k] = false; 3973 + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; 3974 + } else { 3975 + locals->ODMCombineEnablePerState[i][k] = true; 3976 + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; 3977 + } 3978 + if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity 3979 + && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] 3980 + && locals->ODMCombineEnablePerState[i][k] == false) { 3981 + locals->NoOfDPP[i][j][k] = 1; 3982 + locals->RequiredDPPCLK[i][j][k] = 3983 + locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 3984 + } else { 3985 + locals->NoOfDPP[i][j][k] = 2; 3986 + locals->RequiredDPPCLK[i][j][k] = 3987 + locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 3988 + } 3989 + locals->RequiredDISPCLK[i][j] = dml_max( 3990 + locals->RequiredDISPCLK[i][j], 3991 + mode_lib->vba.PlaneRequiredDISPCLK); 3992 + if ((locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 3993 + > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity) 3994 + || (mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) { 3995 + locals->DISPCLK_DPPCLK_Support[i][j] = false; 3996 + } 3997 + } 3998 + locals->TotalNumberOfActiveDPP[i][j] = 0.0; 3999 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) 4000 + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; 4001 + if (j == 1) { 4002 + while (locals->TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP 4003 + && locals->TotalNumberOfActiveDPP[i][j] < 2 * mode_lib->vba.NumberOfActivePlanes) { 4004 + double BWOfNonSplitPlaneOfMaximumBandwidth; 4005 + unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; 4006 + 4007 + BWOfNonSplitPlaneOfMaximumBandwidth = 0; 4008 + NumberOfNonSplitPlaneOfMaximumBandwidth = 0; 4009 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 4010 + if (locals->ReadBandwidth[k] > BWOfNonSplitPlaneOfMaximumBandwidth && locals->NoOfDPP[i][j][k] == 1) { 4011 + BWOfNonSplitPlaneOfMaximumBandwidth = locals->ReadBandwidth[k]; 4012 + NumberOfNonSplitPlaneOfMaximumBandwidth = k; 4013 + } 4014 + } 4015 + locals->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; 4016 + locals->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 4017 + locals->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] 4018 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; 4019 + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + 1; 4020 + } 4021 + } 4022 + if (locals->TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) { 4023 + locals->RequiredDISPCLK[i][j] = 0.0; 4024 + locals->DISPCLK_DPPCLK_Support[i][j] = true; 4025 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4026 + locals->ODMCombineEnablePerState[i][k] = false; 4027 + if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) { 4028 + locals->NoOfDPP[i][j][k] = 1; 4029 + locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] 4030 + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4031 + } else { 4032 + locals->NoOfDPP[i][j][k] = 2; 4033 + locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] 4034 + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4035 + } 4036 + if (i != mode_lib->vba.soc.num_states) { 4037 + mode_lib->vba.PlaneRequiredDISPCLK = 4038 + mode_lib->vba.PixelClock[k] 4039 + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4040 + * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0); 4041 + } else { 4042 + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PixelClock[k] 4043 + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4044 + } 4045 + locals->RequiredDISPCLK[i][j] = dml_max( 4046 + locals->RequiredDISPCLK[i][j], 4047 + mode_lib->vba.PlaneRequiredDISPCLK); 4048 + if (locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4049 + > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity 4050 + || mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) 4051 + locals->DISPCLK_DPPCLK_Support[i][j] = false; 4052 + } 4053 + locals->TotalNumberOfActiveDPP[i][j] = 0.0; 4054 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) 4055 + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; 4056 + } 4057 + locals->RequiredDISPCLK[i][j] = dml_max( 4058 + locals->RequiredDISPCLK[i][j], 4059 + mode_lib->vba.WritebackRequiredDISPCLK); 4060 + if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity 4061 + < mode_lib->vba.WritebackRequiredDISPCLK) { 4062 + locals->DISPCLK_DPPCLK_Support[i][j] = false; 4063 + } 4064 + } 4065 + } 4066 + /*Viewport Size Check*/ 4067 + 4068 + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { 4069 + locals->ViewportSizeSupport[i] = true; 4070 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4071 + if (locals->ODMCombineEnablePerState[i][k] == true) { 4072 + if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) 4073 + > locals->MaximumSwathWidth[k]) { 4074 + locals->ViewportSizeSupport[i] = false; 4075 + } 4076 + } else { 4077 + if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { 4078 + locals->ViewportSizeSupport[i] = false; 4079 + } 4080 + } 4081 + } 4082 + } 4083 + /*Total Available Pipes Support Check*/ 4084 + 4085 + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { 4086 + for (j = 0; j < 2; j++) { 4087 + if (locals->TotalNumberOfActiveDPP[i][j] <= mode_lib->vba.MaxNumDPP) 4088 + locals->TotalAvailablePipesSupport[i][j] = true; 4089 + else 4090 + locals->TotalAvailablePipesSupport[i][j] = false; 4091 + } 4092 + } 4093 + /*Total Available OTG Support Check*/ 4094 + 4095 + mode_lib->vba.TotalNumberOfActiveOTG = 0.0; 4096 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4097 + if (mode_lib->vba.BlendingAndTiming[k] == k) { 4098 + mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG 4099 + + 1.0; 4100 + } 4101 + } 4102 + if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) { 4103 + mode_lib->vba.NumberOfOTGSupport = true; 4104 + } else { 4105 + mode_lib->vba.NumberOfOTGSupport = false; 4106 + } 4107 + /*Display IO and DSC Support Check*/ 4108 + 4109 + mode_lib->vba.NonsupportedDSCInputBPC = false; 4110 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4111 + if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0 4112 + || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0 4113 + || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) { 4114 + mode_lib->vba.NonsupportedDSCInputBPC = true; 4115 + } 4116 + } 4117 + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { 4118 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4119 + locals->RequiresDSC[i][k] = 0; 4120 + locals->RequiresFEC[i][k] = 0; 4121 + if (mode_lib->vba.BlendingAndTiming[k] == k) { 4122 + if (mode_lib->vba.Output[k] == dm_hdmi) { 4123 + locals->RequiresDSC[i][k] = 0; 4124 + locals->RequiresFEC[i][k] = 0; 4125 + locals->OutputBppPerState[i][k] = TruncToValidBPP( 4126 + dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24, 4127 + mode_lib->vba.ForcedOutputLinkBPP[k], 4128 + false, 4129 + mode_lib->vba.Output[k], 4130 + mode_lib->vba.OutputFormat[k], 4131 + mode_lib->vba.DSCInputBitPerComponent[k]); 4132 + } else if (mode_lib->vba.Output[k] == dm_dp 4133 + || mode_lib->vba.Output[k] == dm_edp) { 4134 + if (mode_lib->vba.Output[k] == dm_edp) { 4135 + mode_lib->vba.EffectiveFECOverhead = 0.0; 4136 + } else { 4137 + mode_lib->vba.EffectiveFECOverhead = 4138 + mode_lib->vba.FECOverhead; 4139 + } 4140 + if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) { 4141 + mode_lib->vba.Outbpp = TruncToValidBPP( 4142 + (1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0 4143 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, 4144 + mode_lib->vba.ForcedOutputLinkBPP[k], 4145 + false, 4146 + mode_lib->vba.Output[k], 4147 + mode_lib->vba.OutputFormat[k], 4148 + mode_lib->vba.DSCInputBitPerComponent[k]); 4149 + mode_lib->vba.OutbppDSC = TruncToValidBPP( 4150 + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0 4151 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, 4152 + mode_lib->vba.ForcedOutputLinkBPP[k], 4153 + true, 4154 + mode_lib->vba.Output[k], 4155 + mode_lib->vba.OutputFormat[k], 4156 + mode_lib->vba.DSCInputBitPerComponent[k]); 4157 + if (mode_lib->vba.DSCEnabled[k] == true) { 4158 + locals->RequiresDSC[i][k] = true; 4159 + if (mode_lib->vba.Output[k] == dm_dp) { 4160 + locals->RequiresFEC[i][k] = true; 4161 + } else { 4162 + locals->RequiresFEC[i][k] = false; 4163 + } 4164 + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; 4165 + } else { 4166 + locals->RequiresDSC[i][k] = false; 4167 + locals->RequiresFEC[i][k] = false; 4168 + } 4169 + locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; 4170 + } 4171 + if (mode_lib->vba.Outbpp == BPP_INVALID && mode_lib->vba.PHYCLKPerState[i] >= 540.0) { 4172 + mode_lib->vba.Outbpp = TruncToValidBPP( 4173 + (1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0 4174 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, 4175 + mode_lib->vba.ForcedOutputLinkBPP[k], 4176 + false, 4177 + mode_lib->vba.Output[k], 4178 + mode_lib->vba.OutputFormat[k], 4179 + mode_lib->vba.DSCInputBitPerComponent[k]); 4180 + mode_lib->vba.OutbppDSC = TruncToValidBPP( 4181 + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0 4182 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, 4183 + mode_lib->vba.ForcedOutputLinkBPP[k], 4184 + true, 4185 + mode_lib->vba.Output[k], 4186 + mode_lib->vba.OutputFormat[k], 4187 + mode_lib->vba.DSCInputBitPerComponent[k]); 4188 + if (mode_lib->vba.DSCEnabled[k] == true) { 4189 + locals->RequiresDSC[i][k] = true; 4190 + if (mode_lib->vba.Output[k] == dm_dp) { 4191 + locals->RequiresFEC[i][k] = true; 4192 + } else { 4193 + locals->RequiresFEC[i][k] = false; 4194 + } 4195 + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; 4196 + } else { 4197 + locals->RequiresDSC[i][k] = false; 4198 + locals->RequiresFEC[i][k] = false; 4199 + } 4200 + locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; 4201 + } 4202 + if (mode_lib->vba.Outbpp == BPP_INVALID 4203 + && mode_lib->vba.PHYCLKPerState[i] 4204 + >= 810.0) { 4205 + mode_lib->vba.Outbpp = TruncToValidBPP( 4206 + (1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0 4207 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, 4208 + mode_lib->vba.ForcedOutputLinkBPP[k], 4209 + false, 4210 + mode_lib->vba.Output[k], 4211 + mode_lib->vba.OutputFormat[k], 4212 + mode_lib->vba.DSCInputBitPerComponent[k]); 4213 + mode_lib->vba.OutbppDSC = TruncToValidBPP( 4214 + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0 4215 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, 4216 + mode_lib->vba.ForcedOutputLinkBPP[k], 4217 + true, 4218 + mode_lib->vba.Output[k], 4219 + mode_lib->vba.OutputFormat[k], 4220 + mode_lib->vba.DSCInputBitPerComponent[k]); 4221 + if (mode_lib->vba.DSCEnabled[k] == true || mode_lib->vba.Outbpp == BPP_INVALID) { 4222 + locals->RequiresDSC[i][k] = true; 4223 + if (mode_lib->vba.Output[k] == dm_dp) { 4224 + locals->RequiresFEC[i][k] = true; 4225 + } else { 4226 + locals->RequiresFEC[i][k] = false; 4227 + } 4228 + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; 4229 + } else { 4230 + locals->RequiresDSC[i][k] = false; 4231 + locals->RequiresFEC[i][k] = false; 4232 + } 4233 + locals->OutputBppPerState[i][k] = 4234 + mode_lib->vba.Outbpp; 4235 + } 4236 + } 4237 + } else { 4238 + locals->OutputBppPerState[i][k] = BPP_BLENDED_PIPE; 4239 + } 4240 + } 4241 + } 4242 + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { 4243 + locals->DIOSupport[i] = true; 4244 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4245 + if (locals->OutputBppPerState[i][k] == BPP_INVALID 4246 + || (mode_lib->vba.OutputFormat[k] == dm_420 4247 + && mode_lib->vba.Interlace[k] == true 4248 + && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true)) { 4249 + locals->DIOSupport[i] = false; 4250 + } 4251 + } 4252 + } 4253 + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { 4254 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4255 + locals->DSCCLKRequiredMoreThanSupported[i] = false; 4256 + if (mode_lib->vba.BlendingAndTiming[k] == k) { 4257 + if ((mode_lib->vba.Output[k] == dm_dp 4258 + || mode_lib->vba.Output[k] == dm_edp)) { 4259 + if (mode_lib->vba.OutputFormat[k] == dm_420 4260 + || mode_lib->vba.OutputFormat[k] 4261 + == dm_n422) { 4262 + mode_lib->vba.DSCFormatFactor = 2; 4263 + } else { 4264 + mode_lib->vba.DSCFormatFactor = 1; 4265 + } 4266 + if (locals->RequiresDSC[i][k] == true) { 4267 + if (locals->ODMCombineEnablePerState[i][k] 4268 + == true) { 4269 + if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor 4270 + > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { 4271 + locals->DSCCLKRequiredMoreThanSupported[i] = 4272 + true; 4273 + } 4274 + } else { 4275 + if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor 4276 + > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { 4277 + locals->DSCCLKRequiredMoreThanSupported[i] = 4278 + true; 4279 + } 4280 + } 4281 + } 4282 + } 4283 + } 4284 + } 4285 + } 4286 + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { 4287 + locals->NotEnoughDSCUnits[i] = false; 4288 + mode_lib->vba.TotalDSCUnitsRequired = 0.0; 4289 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4290 + if (locals->RequiresDSC[i][k] == true) { 4291 + if (locals->ODMCombineEnablePerState[i][k] == true) { 4292 + mode_lib->vba.TotalDSCUnitsRequired = 4293 + mode_lib->vba.TotalDSCUnitsRequired + 2.0; 4294 + } else { 4295 + mode_lib->vba.TotalDSCUnitsRequired = 4296 + mode_lib->vba.TotalDSCUnitsRequired + 1.0; 4297 + } 4298 + } 4299 + } 4300 + if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) { 4301 + locals->NotEnoughDSCUnits[i] = true; 4302 + } 4303 + } 4304 + /*DSC Delay per state*/ 4305 + 4306 + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { 4307 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4308 + if (mode_lib->vba.BlendingAndTiming[k] != k) { 4309 + mode_lib->vba.slices = 0; 4310 + } else if (locals->RequiresDSC[i][k] == 0 4311 + || locals->RequiresDSC[i][k] == false) { 4312 + mode_lib->vba.slices = 0; 4313 + } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) { 4314 + mode_lib->vba.slices = dml_ceil( 4315 + mode_lib->vba.PixelClockBackEnd[k] / 400.0, 4316 + 4.0); 4317 + } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) { 4318 + mode_lib->vba.slices = 8.0; 4319 + } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) { 4320 + mode_lib->vba.slices = 4.0; 4321 + } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) { 4322 + mode_lib->vba.slices = 2.0; 4323 + } else { 4324 + mode_lib->vba.slices = 1.0; 4325 + } 4326 + if (locals->OutputBppPerState[i][k] == BPP_BLENDED_PIPE 4327 + || locals->OutputBppPerState[i][k] == BPP_INVALID) { 4328 + mode_lib->vba.bpp = 0.0; 4329 + } else { 4330 + mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; 4331 + } 4332 + if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { 4333 + if (locals->ODMCombineEnablePerState[i][k] == false) { 4334 + locals->DSCDelayPerState[i][k] = 4335 + dscceComputeDelay( 4336 + mode_lib->vba.DSCInputBitPerComponent[k], 4337 + mode_lib->vba.bpp, 4338 + dml_ceil( 4339 + mode_lib->vba.HActive[k] 4340 + / mode_lib->vba.slices, 4341 + 1.0), 4342 + mode_lib->vba.slices, 4343 + mode_lib->vba.OutputFormat[k]) 4344 + + dscComputeDelay( 4345 + mode_lib->vba.OutputFormat[k]); 4346 + } else { 4347 + locals->DSCDelayPerState[i][k] = 4348 + 2.0 * (dscceComputeDelay( 4349 + mode_lib->vba.DSCInputBitPerComponent[k], 4350 + mode_lib->vba.bpp, 4351 + dml_ceil(mode_lib->vba.HActive[k] / mode_lib->vba.slices, 1.0), 4352 + mode_lib->vba.slices / 2, 4353 + mode_lib->vba.OutputFormat[k]) 4354 + + dscComputeDelay(mode_lib->vba.OutputFormat[k])); 4355 + } 4356 + locals->DSCDelayPerState[i][k] = 4357 + locals->DSCDelayPerState[i][k] * mode_lib->vba.PixelClock[k] / mode_lib->vba.PixelClockBackEnd[k]; 4358 + } else { 4359 + locals->DSCDelayPerState[i][k] = 0.0; 4360 + } 4361 + } 4362 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4363 + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { 4364 + for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) { 4365 + if (mode_lib->vba.BlendingAndTiming[k] == m && locals->RequiresDSC[i][m] == true) 4366 + locals->DSCDelayPerState[i][k] = locals->DSCDelayPerState[i][m]; 4367 + } 4368 + } 4369 + } 4370 + } 4371 + 4372 + //Prefetch Check 4373 + for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) { 4374 + for (j = 0; j <= 1; ++j) { 4375 + locals->TotalNumberOfDCCActiveDPP[i][j] = 0; 4376 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 4377 + if (mode_lib->vba.DCCEnable[k] == true) 4378 + locals->TotalNumberOfDCCActiveDPP[i][j] = locals->TotalNumberOfDCCActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; 4379 + } 4380 + } 4381 + } 4382 + 4383 + mode_lib->vba.UrgentLatency = dml_max3( 4384 + mode_lib->vba.UrgentLatencyPixelDataOnly, 4385 + mode_lib->vba.UrgentLatencyPixelMixedWithVMData, 4386 + mode_lib->vba.UrgentLatencyVMDataOnly); 4387 + mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode( 4388 + mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, 4389 + &mode_lib->vba.MinPrefetchMode, 4390 + &mode_lib->vba.MaxPrefetchMode); 4391 + 4392 + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { 4393 + for (j = 0; j < 2; j++) { 4394 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4395 + locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k]; 4396 + locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k]; 4397 + if (locals->ODMCombineEnablePerState[i][k] == true) { 4398 + locals->SwathWidthYThisState[k] = 4399 + dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])); 4400 + } else { 4401 + locals->SwathWidthYThisState[k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k]; 4402 + } 4403 + mode_lib->vba.SwathWidthGranularityY = 256.0 4404 + / dml_ceil(locals->BytePerPixelInDETY[k], 1.0) 4405 + / locals->MaxSwathHeightY[k]; 4406 + mode_lib->vba.RoundedUpMaxSwathSizeBytesY = 4407 + (dml_ceil(locals->SwathWidthYThisState[k] - 1.0, mode_lib->vba.SwathWidthGranularityY) 4408 + + mode_lib->vba.SwathWidthGranularityY) * locals->BytePerPixelInDETY[k] * locals->MaxSwathHeightY[k]; 4409 + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { 4410 + mode_lib->vba.RoundedUpMaxSwathSizeBytesY = dml_ceil( 4411 + mode_lib->vba.RoundedUpMaxSwathSizeBytesY, 4412 + 256.0) + 256; 4413 + } 4414 + if (locals->MaxSwathHeightC[k] > 0.0) { 4415 + mode_lib->vba.SwathWidthGranularityC = 256.0 / dml_ceil(locals->BytePerPixelInDETC[k], 2.0) / locals->MaxSwathHeightC[k]; 4416 + mode_lib->vba.RoundedUpMaxSwathSizeBytesC = (dml_ceil(locals->SwathWidthYThisState[k] / 2.0 - 1.0, mode_lib->vba.SwathWidthGranularityC) 4417 + + mode_lib->vba.SwathWidthGranularityC) * locals->BytePerPixelInDETC[k] * locals->MaxSwathHeightC[k]; 4418 + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { 4419 + mode_lib->vba.RoundedUpMaxSwathSizeBytesC = dml_ceil(mode_lib->vba.RoundedUpMaxSwathSizeBytesC, 256.0) + 256; 4420 + } 4421 + } else { 4422 + mode_lib->vba.RoundedUpMaxSwathSizeBytesC = 0.0; 4423 + } 4424 + if (mode_lib->vba.RoundedUpMaxSwathSizeBytesY + mode_lib->vba.RoundedUpMaxSwathSizeBytesC 4425 + <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) { 4426 + locals->SwathHeightYThisState[k] = locals->MaxSwathHeightY[k]; 4427 + locals->SwathHeightCThisState[k] = locals->MaxSwathHeightC[k]; 4428 + } else { 4429 + locals->SwathHeightYThisState[k] = 4430 + locals->MinSwathHeightY[k]; 4431 + locals->SwathHeightCThisState[k] = 4432 + locals->MinSwathHeightC[k]; 4433 + } 4434 + } 4435 + 4436 + CalculateDCFCLKDeepSleep( 4437 + mode_lib, 4438 + mode_lib->vba.NumberOfActivePlanes, 4439 + locals->BytePerPixelInDETY, 4440 + locals->BytePerPixelInDETC, 4441 + mode_lib->vba.VRatio, 4442 + locals->SwathWidthYThisState, 4443 + locals->NoOfDPPThisState, 4444 + mode_lib->vba.HRatio, 4445 + mode_lib->vba.PixelClock, 4446 + locals->PSCL_FACTOR, 4447 + locals->PSCL_FACTOR_CHROMA, 4448 + locals->RequiredDPPCLKThisState, 4449 + &mode_lib->vba.ProjectedDCFCLKDeepSleep); 4450 + 4451 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4452 + if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 4453 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 4454 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 4455 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 4456 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) { 4457 + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( 4458 + mode_lib, 4459 + mode_lib->vba.DCCEnable[k], 4460 + locals->Read256BlockHeightC[k], 4461 + locals->Read256BlockWidthC[k], 4462 + mode_lib->vba.SourcePixelFormat[k], 4463 + mode_lib->vba.SurfaceTiling[k], 4464 + dml_ceil(locals->BytePerPixelInDETC[k], 2.0), 4465 + mode_lib->vba.SourceScan[k], 4466 + mode_lib->vba.ViewportWidth[k] / 2.0, 4467 + mode_lib->vba.ViewportHeight[k] / 2.0, 4468 + locals->SwathWidthYThisState[k] / 2.0, 4469 + mode_lib->vba.GPUVMEnable, 4470 + mode_lib->vba.HostVMEnable, 4471 + mode_lib->vba.HostVMMaxPageTableLevels, 4472 + mode_lib->vba.HostVMCachedPageTableLevels, 4473 + mode_lib->vba.VMMPageSize, 4474 + mode_lib->vba.PTEBufferSizeInRequestsChroma, 4475 + mode_lib->vba.PitchC[k], 4476 + 0.0, 4477 + &locals->MacroTileWidthC[k], 4478 + &mode_lib->vba.MetaRowBytesC, 4479 + &mode_lib->vba.DPTEBytesPerRowC, 4480 + &locals->PTEBufferSizeNotExceededC[i][j][k], 4481 + locals->dpte_row_width_chroma_ub, 4482 + &locals->dpte_row_height_chroma[k], 4483 + &locals->meta_req_width_chroma[k], 4484 + &locals->meta_req_height_chroma[k], 4485 + &locals->meta_row_width_chroma[k], 4486 + &locals->meta_row_height_chroma[k], 4487 + &locals->vm_group_bytes_chroma, 4488 + &locals->dpte_group_bytes_chroma, 4489 + locals->PixelPTEReqWidthC, 4490 + locals->PixelPTEReqHeightC, 4491 + locals->PTERequestSizeC, 4492 + locals->dpde0_bytes_per_frame_ub_c, 4493 + locals->meta_pte_bytes_per_frame_ub_c); 4494 + locals->PrefetchLinesC[k] = CalculatePrefetchSourceLines( 4495 + mode_lib, 4496 + mode_lib->vba.VRatio[k]/2, 4497 + mode_lib->vba.VTAPsChroma[k], 4498 + mode_lib->vba.Interlace[k], 4499 + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, 4500 + locals->SwathHeightCThisState[k], 4501 + mode_lib->vba.ViewportYStartC[k], 4502 + &locals->PrefillC[k], 4503 + &locals->MaxNumSwC[k]); 4504 + locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma; 4505 + } else { 4506 + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; 4507 + mode_lib->vba.MetaRowBytesC = 0.0; 4508 + mode_lib->vba.DPTEBytesPerRowC = 0.0; 4509 + locals->PrefetchLinesC[k] = 0.0; 4510 + locals->PTEBufferSizeNotExceededC[i][j][k] = true; 4511 + locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; 4512 + } 4513 + mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( 4514 + mode_lib, 4515 + mode_lib->vba.DCCEnable[k], 4516 + locals->Read256BlockHeightY[k], 4517 + locals->Read256BlockWidthY[k], 4518 + mode_lib->vba.SourcePixelFormat[k], 4519 + mode_lib->vba.SurfaceTiling[k], 4520 + dml_ceil(locals->BytePerPixelInDETY[k], 1.0), 4521 + mode_lib->vba.SourceScan[k], 4522 + mode_lib->vba.ViewportWidth[k], 4523 + mode_lib->vba.ViewportHeight[k], 4524 + locals->SwathWidthYThisState[k], 4525 + mode_lib->vba.GPUVMEnable, 4526 + mode_lib->vba.HostVMEnable, 4527 + mode_lib->vba.HostVMMaxPageTableLevels, 4528 + mode_lib->vba.HostVMCachedPageTableLevels, 4529 + mode_lib->vba.VMMPageSize, 4530 + locals->PTEBufferSizeInRequestsForLuma, 4531 + mode_lib->vba.PitchY[k], 4532 + mode_lib->vba.DCCMetaPitchY[k], 4533 + &locals->MacroTileWidthY[k], 4534 + &mode_lib->vba.MetaRowBytesY, 4535 + &mode_lib->vba.DPTEBytesPerRowY, 4536 + &locals->PTEBufferSizeNotExceededY[i][j][k], 4537 + locals->dpte_row_width_luma_ub, 4538 + &locals->dpte_row_height[k], 4539 + &locals->meta_req_width[k], 4540 + &locals->meta_req_height[k], 4541 + &locals->meta_row_width[k], 4542 + &locals->meta_row_height[k], 4543 + &locals->vm_group_bytes[k], 4544 + &locals->dpte_group_bytes[k], 4545 + locals->PixelPTEReqWidthY, 4546 + locals->PixelPTEReqHeightY, 4547 + locals->PTERequestSizeY, 4548 + locals->dpde0_bytes_per_frame_ub_l, 4549 + locals->meta_pte_bytes_per_frame_ub_l); 4550 + locals->PrefetchLinesY[k] = CalculatePrefetchSourceLines( 4551 + mode_lib, 4552 + mode_lib->vba.VRatio[k], 4553 + mode_lib->vba.vtaps[k], 4554 + mode_lib->vba.Interlace[k], 4555 + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, 4556 + locals->SwathHeightYThisState[k], 4557 + mode_lib->vba.ViewportYStartY[k], 4558 + &locals->PrefillY[k], 4559 + &locals->MaxNumSwY[k]); 4560 + locals->PDEAndMetaPTEBytesPerFrame[k] = 4561 + mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; 4562 + locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; 4563 + locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; 4564 + 4565 + CalculateActiveRowBandwidth( 4566 + mode_lib->vba.GPUVMEnable, 4567 + mode_lib->vba.SourcePixelFormat[k], 4568 + mode_lib->vba.VRatio[k], 4569 + mode_lib->vba.DCCEnable[k], 4570 + mode_lib->vba.HTotal[k] / 4571 + mode_lib->vba.PixelClock[k], 4572 + mode_lib->vba.MetaRowBytesY, 4573 + mode_lib->vba.MetaRowBytesC, 4574 + locals->meta_row_height[k], 4575 + locals->meta_row_height_chroma[k], 4576 + mode_lib->vba.DPTEBytesPerRowY, 4577 + mode_lib->vba.DPTEBytesPerRowC, 4578 + locals->dpte_row_height[k], 4579 + locals->dpte_row_height_chroma[k], 4580 + &locals->meta_row_bw[k], 4581 + &locals->dpte_row_bw[k]); 4582 + } 4583 + mode_lib->vba.ExtraLatency = CalculateExtraLatency( 4584 + locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i], 4585 + locals->TotalNumberOfActiveDPP[i][j], 4586 + mode_lib->vba.PixelChunkSizeInKByte, 4587 + locals->TotalNumberOfDCCActiveDPP[i][j], 4588 + mode_lib->vba.MetaChunkSize, 4589 + locals->ReturnBWPerState[i], 4590 + mode_lib->vba.GPUVMEnable, 4591 + mode_lib->vba.HostVMEnable, 4592 + mode_lib->vba.NumberOfActivePlanes, 4593 + locals->NoOfDPPThisState, 4594 + locals->dpte_group_bytes, 4595 + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 4596 + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 4597 + mode_lib->vba.HostVMMaxPageTableLevels, 4598 + mode_lib->vba.HostVMCachedPageTableLevels); 4599 + 4600 + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; 4601 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4602 + if (mode_lib->vba.BlendingAndTiming[k] == k) { 4603 + if (mode_lib->vba.WritebackEnable[k] == true) { 4604 + locals->WritebackDelay[i][k] = mode_lib->vba.WritebackLatency 4605 + + CalculateWriteBackDelay( 4606 + mode_lib->vba.WritebackPixelFormat[k], 4607 + mode_lib->vba.WritebackHRatio[k], 4608 + mode_lib->vba.WritebackVRatio[k], 4609 + mode_lib->vba.WritebackLumaHTaps[k], 4610 + mode_lib->vba.WritebackLumaVTaps[k], 4611 + mode_lib->vba.WritebackChromaHTaps[k], 4612 + mode_lib->vba.WritebackChromaVTaps[k], 4613 + mode_lib->vba.WritebackDestinationWidth[k]) / locals->RequiredDISPCLK[i][j]; 4614 + } else { 4615 + locals->WritebackDelay[i][k] = 0.0; 4616 + } 4617 + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { 4618 + if (mode_lib->vba.BlendingAndTiming[m] == k 4619 + && mode_lib->vba.WritebackEnable[m] 4620 + == true) { 4621 + locals->WritebackDelay[i][k] = dml_max(locals->WritebackDelay[i][k], 4622 + mode_lib->vba.WritebackLatency + CalculateWriteBackDelay( 4623 + mode_lib->vba.WritebackPixelFormat[m], 4624 + mode_lib->vba.WritebackHRatio[m], 4625 + mode_lib->vba.WritebackVRatio[m], 4626 + mode_lib->vba.WritebackLumaHTaps[m], 4627 + mode_lib->vba.WritebackLumaVTaps[m], 4628 + mode_lib->vba.WritebackChromaHTaps[m], 4629 + mode_lib->vba.WritebackChromaVTaps[m], 4630 + mode_lib->vba.WritebackDestinationWidth[m]) / locals->RequiredDISPCLK[i][j]); 4631 + } 4632 + } 4633 + } 4634 + } 4635 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4636 + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { 4637 + if (mode_lib->vba.BlendingAndTiming[k] == m) { 4638 + locals->WritebackDelay[i][k] = locals->WritebackDelay[i][m]; 4639 + } 4640 + } 4641 + } 4642 + mode_lib->vba.MaxMaxVStartup = 0; 4643 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4644 + locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] 4645 + - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); 4646 + mode_lib->vba.MaxMaxVStartup = dml_max(mode_lib->vba.MaxMaxVStartup, locals->MaximumVStartup[k]); 4647 + } 4648 + 4649 + mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode; 4650 + mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup; 4651 + do { 4652 + mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode; 4653 + mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup; 4654 + 4655 + mode_lib->vba.TWait = CalculateTWait( 4656 + mode_lib->vba.PrefetchMode[i][j], 4657 + mode_lib->vba.DRAMClockChangeLatency, 4658 + mode_lib->vba.UrgentLatency, 4659 + mode_lib->vba.SREnterPlusExitTime); 4660 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4661 + Pipe myPipe; 4662 + HostVM myHostVM; 4663 + 4664 + if (mode_lib->vba.XFCEnabled[k] == true) { 4665 + mode_lib->vba.XFCRemoteSurfaceFlipDelay = 4666 + CalculateRemoteSurfaceFlipDelay( 4667 + mode_lib, 4668 + mode_lib->vba.VRatio[k], 4669 + locals->SwathWidthYThisState[k], 4670 + dml_ceil(locals->BytePerPixelInDETY[k], 1.0), 4671 + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], 4672 + mode_lib->vba.XFCTSlvVupdateOffset, 4673 + mode_lib->vba.XFCTSlvVupdateWidth, 4674 + mode_lib->vba.XFCTSlvVreadyOffset, 4675 + mode_lib->vba.XFCXBUFLatencyTolerance, 4676 + mode_lib->vba.XFCFillBWOverhead, 4677 + mode_lib->vba.XFCSlvChunkSize, 4678 + mode_lib->vba.XFCBusTransportTime, 4679 + mode_lib->vba.TimeCalc, 4680 + mode_lib->vba.TWait, 4681 + &mode_lib->vba.SrcActiveDrainRate, 4682 + &mode_lib->vba.TInitXFill, 4683 + &mode_lib->vba.TslvChk); 4684 + } else { 4685 + mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0; 4686 + } 4687 + 4688 + myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k]; 4689 + myPipe.DISPCLK = locals->RequiredDISPCLK[i][j]; 4690 + myPipe.PixelClock = mode_lib->vba.PixelClock[k]; 4691 + myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep; 4692 + myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k]; 4693 + myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; 4694 + myPipe.SourceScan = mode_lib->vba.SourceScan[k]; 4695 + myPipe.BlockWidth256BytesY = locals->Read256BlockWidthY[k]; 4696 + myPipe.BlockHeight256BytesY = locals->Read256BlockHeightY[k]; 4697 + myPipe.BlockWidth256BytesC = locals->Read256BlockWidthC[k]; 4698 + myPipe.BlockHeight256BytesC = locals->Read256BlockHeightC[k]; 4699 + myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; 4700 + myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; 4701 + myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; 4702 + myPipe.HTotal = mode_lib->vba.HTotal[k]; 4703 + 4704 + 4705 + myHostVM.Enable = mode_lib->vba.HostVMEnable; 4706 + myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels; 4707 + myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels; 4708 + 4709 + 4710 + mode_lib->vba.IsErrorResult[i][j][k] = CalculatePrefetchSchedule( 4711 + mode_lib, 4712 + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 4713 + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 4714 + &myPipe, 4715 + locals->DSCDelayPerState[i][k], 4716 + mode_lib->vba.DPPCLKDelaySubtotal, 4717 + mode_lib->vba.DPPCLKDelaySCL, 4718 + mode_lib->vba.DPPCLKDelaySCLLBOnly, 4719 + mode_lib->vba.DPPCLKDelayCNVCFormater, 4720 + mode_lib->vba.DPPCLKDelayCNVCCursor, 4721 + mode_lib->vba.DISPCLKDelaySubtotal, 4722 + locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k], 4723 + mode_lib->vba.OutputFormat[k], 4724 + mode_lib->vba.MaxInterDCNTileRepeaters, 4725 + dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[k]), 4726 + locals->MaximumVStartup[k], 4727 + mode_lib->vba.GPUVMMaxPageTableLevels, 4728 + mode_lib->vba.GPUVMEnable, 4729 + &myHostVM, 4730 + mode_lib->vba.DynamicMetadataEnable[k], 4731 + mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], 4732 + mode_lib->vba.DynamicMetadataTransmittedBytes[k], 4733 + mode_lib->vba.DCCEnable[k], 4734 + mode_lib->vba.UrgentLatency, 4735 + mode_lib->vba.ExtraLatency, 4736 + mode_lib->vba.TimeCalc, 4737 + locals->PDEAndMetaPTEBytesPerFrame[k], 4738 + locals->MetaRowBytes[k], 4739 + locals->DPTEBytesPerRow[k], 4740 + locals->PrefetchLinesY[k], 4741 + locals->SwathWidthYThisState[k], 4742 + locals->BytePerPixelInDETY[k], 4743 + locals->PrefillY[k], 4744 + locals->MaxNumSwY[k], 4745 + locals->PrefetchLinesC[k], 4746 + locals->BytePerPixelInDETC[k], 4747 + locals->PrefillC[k], 4748 + locals->MaxNumSwC[k], 4749 + locals->SwathHeightYThisState[k], 4750 + locals->SwathHeightCThisState[k], 4751 + mode_lib->vba.TWait, 4752 + mode_lib->vba.XFCEnabled[k], 4753 + mode_lib->vba.XFCRemoteSurfaceFlipDelay, 4754 + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, 4755 + &locals->dst_x_after_scaler, 4756 + &locals->dst_y_after_scaler, 4757 + &locals->LineTimesForPrefetch[k], 4758 + &locals->PrefetchBW[k], 4759 + &locals->LinesForMetaPTE[k], 4760 + &locals->LinesForMetaAndDPTERow[k], 4761 + &locals->VRatioPreY[i][j][k], 4762 + &locals->VRatioPreC[i][j][k], 4763 + &locals->RequiredPrefetchPixelDataBWLuma[i][j][k], 4764 + &locals->RequiredPrefetchPixelDataBWChroma[i][j][k], 4765 + &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, 4766 + &locals->Tno_bw[k], 4767 + &locals->prefetch_vmrow_bw[k], 4768 + locals->swath_width_luma_ub, 4769 + locals->swath_width_chroma_ub, 4770 + &mode_lib->vba.VUpdateOffsetPix[k], 4771 + &mode_lib->vba.VUpdateWidthPix[k], 4772 + &mode_lib->vba.VReadyOffsetPix[k]); 4773 + } 4774 + mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0; 4775 + mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0; 4776 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4777 + uint m; 4778 + 4779 + locals->cursor_bw[k] = 0; 4780 + locals->cursor_bw_pre[k] = 0; 4781 + for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) { 4782 + locals->cursor_bw[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] 4783 + / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; 4784 + locals->cursor_bw_pre[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] 4785 + / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPreY[i][j][k]; 4786 + } 4787 + 4788 + CalculateUrgentBurstFactor( 4789 + mode_lib->vba.DETBufferSizeInKByte, 4790 + locals->SwathHeightYThisState[k], 4791 + locals->SwathHeightCThisState[k], 4792 + locals->SwathWidthYThisState[k], 4793 + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], 4794 + mode_lib->vba.UrgentLatency, 4795 + mode_lib->vba.CursorBufferSize, 4796 + mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1], 4797 + dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]), 4798 + mode_lib->vba.VRatio[k], 4799 + locals->VRatioPreY[i][j][k], 4800 + locals->VRatioPreC[i][j][k], 4801 + locals->BytePerPixelInDETY[k], 4802 + locals->BytePerPixelInDETC[k], 4803 + &locals->UrgentBurstFactorCursor[k], 4804 + &locals->UrgentBurstFactorCursorPre[k], 4805 + &locals->UrgentBurstFactorLuma[k], 4806 + &locals->UrgentBurstFactorLumaPre[k], 4807 + &locals->UrgentBurstFactorChroma[k], 4808 + &locals->UrgentBurstFactorChromaPre[k], 4809 + &locals->NotEnoughUrgentLatencyHiding, 4810 + &locals->NotEnoughUrgentLatencyHidingPre); 4811 + 4812 + if (mode_lib->vba.UseUrgentBurstBandwidth == false) { 4813 + locals->UrgentBurstFactorCursor[k] = 1; 4814 + locals->UrgentBurstFactorCursorPre[k] = 1; 4815 + locals->UrgentBurstFactorLuma[k] = 1; 4816 + locals->UrgentBurstFactorLumaPre[k] = 1; 4817 + locals->UrgentBurstFactorChroma[k] = 1; 4818 + locals->UrgentBurstFactorChromaPre[k] = 1; 4819 + } 4820 + 4821 + mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = mode_lib->vba.MaximumReadBandwidthWithoutPrefetch 4822 + + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k] + locals->ReadBandwidthLuma[k] 4823 + * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k] 4824 + * locals->UrgentBurstFactorChroma[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k]; 4825 + mode_lib->vba.MaximumReadBandwidthWithPrefetch = mode_lib->vba.MaximumReadBandwidthWithPrefetch 4826 + + dml_max3(locals->prefetch_vmrow_bw[k], 4827 + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k] 4828 + * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k] 4829 + + locals->meta_row_bw[k] + locals->dpte_row_bw[k], 4830 + locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k] 4831 + + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k] 4832 + + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); 4833 + } 4834 + locals->BandwidthWithoutPrefetchSupported[i] = true; 4835 + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i] 4836 + || locals->NotEnoughUrgentLatencyHiding == 1) { 4837 + locals->BandwidthWithoutPrefetchSupported[i] = false; 4838 + } 4839 + 4840 + locals->PrefetchSupported[i][j] = true; 4841 + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i] 4842 + || locals->NotEnoughUrgentLatencyHiding == 1 4843 + || locals->NotEnoughUrgentLatencyHidingPre == 1) { 4844 + locals->PrefetchSupported[i][j] = false; 4845 + } 4846 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4847 + if (locals->LineTimesForPrefetch[k] < 2.0 4848 + || locals->LinesForMetaPTE[k] >= 32.0 4849 + || locals->LinesForMetaAndDPTERow[k] >= 16.0 4850 + || mode_lib->vba.IsErrorResult[i][j][k] == true) { 4851 + locals->PrefetchSupported[i][j] = false; 4852 + } 4853 + } 4854 + locals->VRatioInPrefetchSupported[i][j] = true; 4855 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4856 + if (locals->VRatioPreY[i][j][k] > 4.0 4857 + || locals->VRatioPreC[i][j][k] > 4.0 4858 + || mode_lib->vba.IsErrorResult[i][j][k] == true) { 4859 + locals->VRatioInPrefetchSupported[i][j] = false; 4860 + } 4861 + } 4862 + mode_lib->vba.AnyLinesForVMOrRowTooLarge = false; 4863 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 4864 + if (locals->LinesForMetaAndDPTERow[k] >= 16 || locals->LinesForMetaPTE[k] >= 32) { 4865 + mode_lib->vba.AnyLinesForVMOrRowTooLarge = true; 4866 + } 4867 + } 4868 + 4869 + if (mode_lib->vba.MaxVStartup <= 13 || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) { 4870 + mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup; 4871 + mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1; 4872 + } else { 4873 + mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1; 4874 + } 4875 + } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) 4876 + && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup 4877 + || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode)); 4878 + 4879 + if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) { 4880 + mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i]; 4881 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4882 + mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip 4883 + - dml_max(locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] 4884 + + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] 4885 + + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k], 4886 + locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k] 4887 + + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k] 4888 + + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); 4889 + } 4890 + mode_lib->vba.TotImmediateFlipBytes = 0.0; 4891 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4892 + mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes 4893 + + locals->PDEAndMetaPTEBytesPerFrame[k] + locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]; 4894 + } 4895 + 4896 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4897 + CalculateFlipSchedule( 4898 + mode_lib, 4899 + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 4900 + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 4901 + mode_lib->vba.ExtraLatency, 4902 + mode_lib->vba.UrgentLatency, 4903 + mode_lib->vba.GPUVMMaxPageTableLevels, 4904 + mode_lib->vba.HostVMEnable, 4905 + mode_lib->vba.HostVMMaxPageTableLevels, 4906 + mode_lib->vba.HostVMCachedPageTableLevels, 4907 + mode_lib->vba.GPUVMEnable, 4908 + locals->PDEAndMetaPTEBytesPerFrame[k], 4909 + locals->MetaRowBytes[k], 4910 + locals->DPTEBytesPerRow[k], 4911 + mode_lib->vba.BandwidthAvailableForImmediateFlip, 4912 + mode_lib->vba.TotImmediateFlipBytes, 4913 + mode_lib->vba.SourcePixelFormat[k], 4914 + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], 4915 + mode_lib->vba.VRatio[k], 4916 + locals->Tno_bw[k], 4917 + mode_lib->vba.DCCEnable[k], 4918 + locals->dpte_row_height[k], 4919 + locals->meta_row_height[k], 4920 + locals->dpte_row_height_chroma[k], 4921 + locals->meta_row_height_chroma[k], 4922 + &locals->DestinationLinesToRequestVMInImmediateFlip[k], 4923 + &locals->DestinationLinesToRequestRowInImmediateFlip[k], 4924 + &locals->final_flip_bw[k], 4925 + &locals->ImmediateFlipSupportedForPipe[k]); 4926 + } 4927 + mode_lib->vba.total_dcn_read_bw_with_flip = 0.0; 4928 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4929 + mode_lib->vba.total_dcn_read_bw_with_flip = mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3( 4930 + locals->prefetch_vmrow_bw[k], 4931 + locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] 4932 + + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] 4933 + + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k], 4934 + locals->final_flip_bw[k] + locals->RequiredPrefetchPixelDataBWLuma[i][j][k] 4935 + * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] 4936 + * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] 4937 + * locals->UrgentBurstFactorCursorPre[k]); 4938 + } 4939 + locals->ImmediateFlipSupportedForState[i][j] = true; 4940 + if (mode_lib->vba.total_dcn_read_bw_with_flip 4941 + > locals->ReturnBWPerState[i]) { 4942 + locals->ImmediateFlipSupportedForState[i][j] = false; 4943 + } 4944 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 4945 + if (locals->ImmediateFlipSupportedForPipe[k] == false) { 4946 + locals->ImmediateFlipSupportedForState[i][j] = false; 4947 + } 4948 + } 4949 + } else { 4950 + locals->ImmediateFlipSupportedForState[i][j] = false; 4951 + } 4952 + mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3( 4953 + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, 4954 + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 4955 + mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly); 4956 + CalculateWatermarksAndDRAMSpeedChangeSupport( 4957 + mode_lib, 4958 + mode_lib->vba.PrefetchMode[i][j], 4959 + mode_lib->vba.NumberOfActivePlanes, 4960 + mode_lib->vba.MaxLineBufferLines, 4961 + mode_lib->vba.LineBufferSize, 4962 + mode_lib->vba.DPPOutputBufferPixels, 4963 + mode_lib->vba.DETBufferSizeInKByte, 4964 + mode_lib->vba.WritebackInterfaceLumaBufferSize, 4965 + mode_lib->vba.WritebackInterfaceChromaBufferSize, 4966 + mode_lib->vba.DCFCLKPerState[i], 4967 + mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels, 4968 + locals->ReturnBWPerState[i], 4969 + mode_lib->vba.GPUVMEnable, 4970 + locals->dpte_group_bytes, 4971 + mode_lib->vba.MetaChunkSize, 4972 + mode_lib->vba.UrgentLatency, 4973 + mode_lib->vba.ExtraLatency, 4974 + mode_lib->vba.WritebackLatency, 4975 + mode_lib->vba.WritebackChunkSize, 4976 + mode_lib->vba.SOCCLKPerState[i], 4977 + mode_lib->vba.DRAMClockChangeLatency, 4978 + mode_lib->vba.SRExitTime, 4979 + mode_lib->vba.SREnterPlusExitTime, 4980 + mode_lib->vba.ProjectedDCFCLKDeepSleep, 4981 + locals->NoOfDPPThisState, 4982 + mode_lib->vba.DCCEnable, 4983 + locals->RequiredDPPCLKThisState, 4984 + locals->SwathWidthYSingleDPP, 4985 + locals->SwathHeightYThisState, 4986 + locals->ReadBandwidthLuma, 4987 + locals->SwathHeightCThisState, 4988 + locals->ReadBandwidthChroma, 4989 + mode_lib->vba.LBBitPerPixel, 4990 + locals->SwathWidthYThisState, 4991 + mode_lib->vba.HRatio, 4992 + mode_lib->vba.vtaps, 4993 + mode_lib->vba.VTAPsChroma, 4994 + mode_lib->vba.VRatio, 4995 + mode_lib->vba.HTotal, 4996 + mode_lib->vba.PixelClock, 4997 + mode_lib->vba.BlendingAndTiming, 4998 + locals->BytePerPixelInDETY, 4999 + locals->BytePerPixelInDETC, 5000 + mode_lib->vba.WritebackEnable, 5001 + mode_lib->vba.WritebackPixelFormat, 5002 + mode_lib->vba.WritebackDestinationWidth, 5003 + mode_lib->vba.WritebackDestinationHeight, 5004 + mode_lib->vba.WritebackSourceHeight, 5005 + &locals->DRAMClockChangeSupport[i][j], 5006 + &mode_lib->vba.UrgentWatermark, 5007 + &mode_lib->vba.WritebackUrgentWatermark, 5008 + &mode_lib->vba.DRAMClockChangeWatermark, 5009 + &mode_lib->vba.WritebackDRAMClockChangeWatermark, 5010 + &mode_lib->vba.StutterExitWatermark, 5011 + &mode_lib->vba.StutterEnterPlusExitWatermark, 5012 + &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported); 5013 + } 5014 + } 5015 + 5016 + /*Vertical Active BW support*/ 5017 + { 5018 + double MaxTotalVActiveRDBandwidth = 0.0; 5019 + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { 5020 + MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k]; 5021 + } 5022 + for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) { 5023 + locals->MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min( 5024 + locals->IdealSDPPortBandwidthPerState[i] * 5025 + mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation 5026 + / 100.0, mode_lib->vba.DRAMSpeedPerState[i] * 5027 + mode_lib->vba.NumberOfChannels * 5028 + mode_lib->vba.DRAMChannelWidth * 5029 + mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation 5030 + / 100.0); 5031 + 5032 + if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i]) { 5033 + locals->TotalVerticalActiveBandwidthSupport[i] = true; 5034 + } else { 5035 + locals->TotalVerticalActiveBandwidthSupport[i] = false; 5036 + } 5037 + } 5038 + } 5039 + 5040 + /*PTE Buffer Size Check*/ 5041 + 5042 + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { 5043 + for (j = 0; j < 2; j++) { 5044 + locals->PTEBufferSizeNotExceeded[i][j] = true; 5045 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 5046 + if (locals->PTEBufferSizeNotExceededY[i][j][k] == false 5047 + || locals->PTEBufferSizeNotExceededC[i][j][k] == false) { 5048 + locals->PTEBufferSizeNotExceeded[i][j] = false; 5049 + } 5050 + } 5051 + } 5052 + } 5053 + /*Cursor Support Check*/ 5054 + 5055 + mode_lib->vba.CursorSupport = true; 5056 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 5057 + if (mode_lib->vba.CursorWidth[k][0] > 0.0) { 5058 + for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) { 5059 + if (mode_lib->vba.CursorBPP[k][m] == 64 && mode_lib->vba.Cursor64BppSupport == false) { 5060 + mode_lib->vba.CursorSupport = false; 5061 + } 5062 + } 5063 + } 5064 + } 5065 + /*Valid Pitch Check*/ 5066 + 5067 + mode_lib->vba.PitchSupport = true; 5068 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 5069 + locals->AlignedYPitch[k] = dml_ceil( 5070 + dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]), 5071 + locals->MacroTileWidthY[k]); 5072 + if (locals->AlignedYPitch[k] > mode_lib->vba.PitchY[k]) { 5073 + mode_lib->vba.PitchSupport = false; 5074 + } 5075 + if (mode_lib->vba.DCCEnable[k] == true) { 5076 + locals->AlignedDCCMetaPitch[k] = dml_ceil( 5077 + dml_max( 5078 + mode_lib->vba.DCCMetaPitchY[k], 5079 + mode_lib->vba.ViewportWidth[k]), 5080 + 64.0 * locals->Read256BlockWidthY[k]); 5081 + } else { 5082 + locals->AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k]; 5083 + } 5084 + if (locals->AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) { 5085 + mode_lib->vba.PitchSupport = false; 5086 + } 5087 + if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 5088 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 5089 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 5090 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 5091 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) { 5092 + locals->AlignedCPitch[k] = dml_ceil( 5093 + dml_max( 5094 + mode_lib->vba.PitchC[k], 5095 + mode_lib->vba.ViewportWidth[k] / 2.0), 5096 + locals->MacroTileWidthC[k]); 5097 + } else { 5098 + locals->AlignedCPitch[k] = mode_lib->vba.PitchC[k]; 5099 + } 5100 + if (locals->AlignedCPitch[k] > mode_lib->vba.PitchC[k]) { 5101 + mode_lib->vba.PitchSupport = false; 5102 + } 5103 + } 5104 + /*Mode Support, Voltage State and SOC Configuration*/ 5105 + 5106 + for (i = mode_lib->vba.soc.num_states; i >= 0; i--) { 5107 + for (j = 0; j < 2; j++) { 5108 + enum dm_validation_status status = DML_VALIDATION_OK; 5109 + 5110 + if (mode_lib->vba.ScaleRatioAndTapsSupport != true) { 5111 + status = DML_FAIL_SCALE_RATIO_TAP; 5112 + } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { 5113 + status = DML_FAIL_SOURCE_PIXEL_FORMAT; 5114 + } else if (locals->ViewportSizeSupport[i] != true) { 5115 + status = DML_FAIL_VIEWPORT_SIZE; 5116 + } else if (locals->DIOSupport[i] != true) { 5117 + status = DML_FAIL_DIO_SUPPORT; 5118 + } else if (locals->NotEnoughDSCUnits[i] != false) { 5119 + status = DML_FAIL_NOT_ENOUGH_DSC; 5120 + } else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) { 5121 + status = DML_FAIL_DSC_CLK_REQUIRED; 5122 + } else if (locals->ROBSupport[i] != true) { 5123 + status = DML_FAIL_REORDERING_BUFFER; 5124 + } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { 5125 + status = DML_FAIL_DISPCLK_DPPCLK; 5126 + } else if (locals->TotalAvailablePipesSupport[i][j] != true) { 5127 + status = DML_FAIL_TOTAL_AVAILABLE_PIPES; 5128 + } else if (mode_lib->vba.NumberOfOTGSupport != true) { 5129 + status = DML_FAIL_NUM_OTG; 5130 + } else if (mode_lib->vba.WritebackModeSupport != true) { 5131 + status = DML_FAIL_WRITEBACK_MODE; 5132 + } else if (mode_lib->vba.WritebackLatencySupport != true) { 5133 + status = DML_FAIL_WRITEBACK_LATENCY; 5134 + } else if (mode_lib->vba.WritebackScaleRatioAndTapsSupport != true) { 5135 + status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP; 5136 + } else if (mode_lib->vba.CursorSupport != true) { 5137 + status = DML_FAIL_CURSOR_SUPPORT; 5138 + } else if (mode_lib->vba.PitchSupport != true) { 5139 + status = DML_FAIL_PITCH_SUPPORT; 5140 + } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { 5141 + status = DML_FAIL_TOTAL_V_ACTIVE_BW; 5142 + } else if (locals->PTEBufferSizeNotExceeded[i][j] != true) { 5143 + status = DML_FAIL_PTE_BUFFER_SIZE; 5144 + } else if (mode_lib->vba.NonsupportedDSCInputBPC != false) { 5145 + status = DML_FAIL_DSC_INPUT_BPC; 5146 + } else if ((mode_lib->vba.HostVMEnable != false 5147 + && locals->ImmediateFlipSupportedForState[i][j] != true)) { 5148 + status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP; 5149 + } else if (locals->PrefetchSupported[i][j] != true) { 5150 + status = DML_FAIL_PREFETCH_SUPPORT; 5151 + } else if (locals->VRatioInPrefetchSupported[i][j] != true) { 5152 + status = DML_FAIL_V_RATIO_PREFETCH; 5153 + } 5154 + 5155 + if (status == DML_VALIDATION_OK) { 5156 + locals->ModeSupport[i][j] = true; 5157 + } else { 5158 + locals->ModeSupport[i][j] = false; 5159 + } 5160 + locals->ValidationStatus[i] = status; 5161 + } 5162 + } 5163 + { 5164 + unsigned int MaximumMPCCombine = 0; 5165 + mode_lib->vba.VoltageLevel = mode_lib->vba.soc.num_states + 1; 5166 + for (i = mode_lib->vba.VoltageOverrideLevel; i <= mode_lib->vba.soc.num_states; i++) { 5167 + if (locals->ModeSupport[i][0] == true || locals->ModeSupport[i][1] == true) { 5168 + mode_lib->vba.VoltageLevel = i; 5169 + if (locals->ModeSupport[i][1] == true && (locals->ModeSupport[i][0] == false 5170 + || mode_lib->vba.WhenToDoMPCCombine == dm_mpc_always_when_possible 5171 + || (mode_lib->vba.WhenToDoMPCCombine == dm_mpc_reduce_voltage_and_clocks 5172 + && ((locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vactive 5173 + && locals->DRAMClockChangeSupport[i][0] != dm_dram_clock_change_vactive) 5174 + || (locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vblank 5175 + && locals->DRAMClockChangeSupport[i][0] == dm_dram_clock_change_unsupported))))) { 5176 + MaximumMPCCombine = 1; 5177 + } else { 5178 + MaximumMPCCombine = 0; 5179 + } 5180 + break; 5181 + } 5182 + } 5183 + mode_lib->vba.ImmediateFlipSupport = 5184 + locals->ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; 5185 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 5186 + mode_lib->vba.DPPPerPlane[k] = locals->NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; 5187 + locals->DPPCLK[k] = locals->RequiredDPPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; 5188 + } 5189 + mode_lib->vba.DISPCLK = locals->RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; 5190 + mode_lib->vba.maxMpcComb = MaximumMPCCombine; 5191 + } 5192 + mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel]; 5193 + mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; 5194 + mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; 5195 + mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; 5196 + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; 5197 + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { 5198 + if (mode_lib->vba.BlendingAndTiming[k] == k) { 5199 + mode_lib->vba.ODMCombineEnabled[k] = 5200 + locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k]; 5201 + } else { 5202 + mode_lib->vba.ODMCombineEnabled[k] = 0; 5203 + } 5204 + mode_lib->vba.DSCEnabled[k] = 5205 + locals->RequiresDSC[mode_lib->vba.VoltageLevel][k]; 5206 + mode_lib->vba.OutputBpp[k] = 5207 + locals->OutputBppPerState[mode_lib->vba.VoltageLevel][k]; 5208 + } 5209 + } 5210 + 5211 + static void CalculateWatermarksAndDRAMSpeedChangeSupport( 5212 + struct display_mode_lib *mode_lib, 5213 + unsigned int PrefetchMode, 5214 + unsigned int NumberOfActivePlanes, 5215 + unsigned int MaxLineBufferLines, 5216 + unsigned int LineBufferSize, 5217 + unsigned int DPPOutputBufferPixels, 5218 + double DETBufferSizeInKByte, 5219 + unsigned int WritebackInterfaceLumaBufferSize, 5220 + unsigned int WritebackInterfaceChromaBufferSize, 5221 + double DCFCLK, 5222 + double UrgentOutOfOrderReturn, 5223 + double ReturnBW, 5224 + bool GPUVMEnable, 5225 + long dpte_group_bytes[], 5226 + unsigned int MetaChunkSize, 5227 + double UrgentLatency, 5228 + double ExtraLatency, 5229 + double WritebackLatency, 5230 + double WritebackChunkSize, 5231 + double SOCCLK, 5232 + double DRAMClockChangeLatency, 5233 + double SRExitTime, 5234 + double SREnterPlusExitTime, 5235 + double DCFCLKDeepSleep, 5236 + int DPPPerPlane[], 5237 + bool DCCEnable[], 5238 + double DPPCLK[], 5239 + unsigned int SwathWidthSingleDPPY[], 5240 + unsigned int SwathHeightY[], 5241 + double ReadBandwidthPlaneLuma[], 5242 + unsigned int SwathHeightC[], 5243 + double ReadBandwidthPlaneChroma[], 5244 + unsigned int LBBitPerPixel[], 5245 + unsigned int SwathWidthY[], 5246 + double HRatio[], 5247 + unsigned int vtaps[], 5248 + unsigned int VTAPsChroma[], 5249 + double VRatio[], 5250 + unsigned int HTotal[], 5251 + double PixelClock[], 5252 + unsigned int BlendingAndTiming[], 5253 + double BytePerPixelDETY[], 5254 + double BytePerPixelDETC[], 5255 + bool WritebackEnable[], 5256 + enum source_format_class WritebackPixelFormat[], 5257 + double WritebackDestinationWidth[], 5258 + double WritebackDestinationHeight[], 5259 + double WritebackSourceHeight[], 5260 + enum clock_change_support *DRAMClockChangeSupport, 5261 + double *UrgentWatermark, 5262 + double *WritebackUrgentWatermark, 5263 + double *DRAMClockChangeWatermark, 5264 + double *WritebackDRAMClockChangeWatermark, 5265 + double *StutterExitWatermark, 5266 + double *StutterEnterPlusExitWatermark, 5267 + double *MinActiveDRAMClockChangeLatencySupported) 5268 + { 5269 + double EffectiveLBLatencyHidingY; 5270 + double EffectiveLBLatencyHidingC; 5271 + double DPPOutputBufferLinesY; 5272 + double DPPOutputBufferLinesC; 5273 + double DETBufferSizeY; 5274 + double DETBufferSizeC; 5275 + double LinesInDETY[DC__NUM_DPP__MAX]; 5276 + double LinesInDETC; 5277 + unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 5278 + unsigned int LinesInDETCRoundedDownToSwath; 5279 + double FullDETBufferingTimeY[DC__NUM_DPP__MAX]; 5280 + double FullDETBufferingTimeC; 5281 + double ActiveDRAMClockChangeLatencyMarginY; 5282 + double ActiveDRAMClockChangeLatencyMarginC; 5283 + double WritebackDRAMClockChangeLatencyMargin; 5284 + double PlaneWithMinActiveDRAMClockChangeMargin; 5285 + double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; 5286 + double FullDETBufferingTimeYStutterCriticalPlane = 0; 5287 + double TimeToFinishSwathTransferStutterCriticalPlane = 0; 5288 + uint k, j; 5289 + 5290 + mode_lib->vba.TotalActiveDPP = 0; 5291 + mode_lib->vba.TotalDCCActiveDPP = 0; 5292 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5293 + mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k]; 5294 + if (DCCEnable[k] == true) { 5295 + mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k]; 5296 + } 5297 + } 5298 + 5299 + mode_lib->vba.TotalDataReadBandwidth = 0; 5300 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5301 + mode_lib->vba.TotalDataReadBandwidth = mode_lib->vba.TotalDataReadBandwidth 5302 + + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; 5303 + } 5304 + 5305 + *UrgentWatermark = UrgentLatency + ExtraLatency; 5306 + 5307 + *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark; 5308 + 5309 + mode_lib->vba.TotalActiveWriteback = 0; 5310 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5311 + if (WritebackEnable[k] == true) { 5312 + mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1; 5313 + } 5314 + } 5315 + 5316 + if (mode_lib->vba.TotalActiveWriteback <= 1) { 5317 + *WritebackUrgentWatermark = WritebackLatency; 5318 + } else { 5319 + *WritebackUrgentWatermark = WritebackLatency 5320 + + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5321 + } 5322 + 5323 + if (mode_lib->vba.TotalActiveWriteback <= 1) { 5324 + *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency; 5325 + } else { 5326 + *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency 5327 + + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5328 + } 5329 + 5330 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5331 + 5332 + mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, 5333 + dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) 5334 + - (vtaps[k] - 1); 5335 + 5336 + mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, 5337 + dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / 2 / dml_max(HRatio[k] / 2, 1.0)), 1)) 5338 + - (VTAPsChroma[k] - 1); 5339 + 5340 + EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] 5341 + * (HTotal[k] / PixelClock[k]); 5342 + 5343 + EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC 5344 + / (VRatio[k] / 2) * (HTotal[k] / PixelClock[k]); 5345 + 5346 + if (SwathWidthY[k] > 2 * DPPOutputBufferPixels) { 5347 + DPPOutputBufferLinesY = (double) DPPOutputBufferPixels / SwathWidthY[k]; 5348 + } else if (SwathWidthY[k] > DPPOutputBufferPixels) { 5349 + DPPOutputBufferLinesY = 0.5; 5350 + } else { 5351 + DPPOutputBufferLinesY = 1; 5352 + } 5353 + 5354 + if (SwathWidthY[k] / 2.0 > 2 * DPPOutputBufferPixels) { 5355 + DPPOutputBufferLinesC = (double) DPPOutputBufferPixels 5356 + / (SwathWidthY[k] / 2.0); 5357 + } else if (SwathWidthY[k] / 2.0 > DPPOutputBufferPixels) { 5358 + DPPOutputBufferLinesC = 0.5; 5359 + } else { 5360 + DPPOutputBufferLinesC = 1; 5361 + } 5362 + 5363 + CalculateDETBufferSize( 5364 + DETBufferSizeInKByte, 5365 + SwathHeightY[k], 5366 + SwathHeightC[k], 5367 + &DETBufferSizeY, 5368 + &DETBufferSizeC); 5369 + 5370 + LinesInDETY[k] = DETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 5371 + LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 5372 + FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] 5373 + * (HTotal[k] / PixelClock[k]) / VRatio[k]; 5374 + if (BytePerPixelDETC[k] > 0) { 5375 + LinesInDETC = DETBufferSizeC / BytePerPixelDETC[k] / (SwathWidthY[k] / 2.0); 5376 + LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); 5377 + FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath 5378 + * (HTotal[k] / PixelClock[k]) / (VRatio[k] / 2); 5379 + } else { 5380 + LinesInDETC = 0; 5381 + FullDETBufferingTimeC = 999999; 5382 + } 5383 + 5384 + ActiveDRAMClockChangeLatencyMarginY = HTotal[k] / PixelClock[k] 5385 + * DPPOutputBufferLinesY + EffectiveLBLatencyHidingY 5386 + + FullDETBufferingTimeY[k] - *DRAMClockChangeWatermark; 5387 + 5388 + if (NumberOfActivePlanes > 1) { 5389 + ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY 5390 + - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; 5391 + } 5392 + 5393 + if (BytePerPixelDETC[k] > 0) { 5394 + ActiveDRAMClockChangeLatencyMarginC = HTotal[k] / PixelClock[k] 5395 + * DPPOutputBufferLinesC + EffectiveLBLatencyHidingC 5396 + + FullDETBufferingTimeC - *DRAMClockChangeWatermark; 5397 + if (NumberOfActivePlanes > 1) { 5398 + ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC 5399 + - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / (VRatio[k] / 2); 5400 + } 5401 + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( 5402 + ActiveDRAMClockChangeLatencyMarginY, 5403 + ActiveDRAMClockChangeLatencyMarginC); 5404 + } else { 5405 + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; 5406 + } 5407 + 5408 + if (WritebackEnable[k] == true) { 5409 + if (WritebackPixelFormat[k] == dm_444_32) { 5410 + WritebackDRAMClockChangeLatencyMargin = (WritebackInterfaceLumaBufferSize 5411 + + WritebackInterfaceChromaBufferSize) / (WritebackDestinationWidth[k] 5412 + * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] 5413 + / PixelClock[k]) * 4) - *WritebackDRAMClockChangeWatermark; 5414 + } else { 5415 + WritebackDRAMClockChangeLatencyMargin = dml_min( 5416 + WritebackInterfaceLumaBufferSize * 8.0 / 10, 5417 + 2 * WritebackInterfaceChromaBufferSize * 8.0 / 10) / (WritebackDestinationWidth[k] 5418 + * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k])) 5419 + - *WritebackDRAMClockChangeWatermark; 5420 + } 5421 + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( 5422 + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], 5423 + WritebackDRAMClockChangeLatencyMargin); 5424 + } 5425 + } 5426 + 5427 + mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999; 5428 + PlaneWithMinActiveDRAMClockChangeMargin = 0; 5429 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5430 + if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] 5431 + < mode_lib->vba.MinActiveDRAMClockChangeMargin) { 5432 + mode_lib->vba.MinActiveDRAMClockChangeMargin = 5433 + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; 5434 + if (BlendingAndTiming[k] == k) { 5435 + PlaneWithMinActiveDRAMClockChangeMargin = k; 5436 + } else { 5437 + for (j = 0; j < NumberOfActivePlanes; ++j) { 5438 + if (BlendingAndTiming[k] == j) { 5439 + PlaneWithMinActiveDRAMClockChangeMargin = j; 5440 + } 5441 + } 5442 + } 5443 + } 5444 + } 5445 + 5446 + *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency; 5447 + 5448 + SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; 5449 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5450 + if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) 5451 + && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) 5452 + && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] 5453 + < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { 5454 + SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 5455 + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; 5456 + } 5457 + } 5458 + 5459 + mode_lib->vba.TotalNumberOfActiveOTG = 0; 5460 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5461 + if (BlendingAndTiming[k] == k) { 5462 + mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1; 5463 + } 5464 + } 5465 + 5466 + if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { 5467 + *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 5468 + } else if (((mode_lib->vba.SynchronizedVBlank == true 5469 + || mode_lib->vba.TotalNumberOfActiveOTG == 1 5470 + || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) 5471 + && PrefetchMode == 0)) { 5472 + *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 5473 + } else { 5474 + *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 5475 + } 5476 + 5477 + FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0]; 5478 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5479 + if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) { 5480 + TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] 5481 + - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) 5482 + * (HTotal[k] / PixelClock[k]) / VRatio[k]; 5483 + } 5484 + } 5485 + 5486 + *StutterExitWatermark = SRExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark 5487 + + ExtraLatency + 10 / DCFCLKDeepSleep; 5488 + *StutterEnterPlusExitWatermark = dml_max( 5489 + SREnterPlusExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark 5490 + + ExtraLatency + 10 / DCFCLKDeepSleep, 5491 + TimeToFinishSwathTransferStutterCriticalPlane); 5492 + 5493 + } 5494 + 5495 + static void CalculateDCFCLKDeepSleep( 5496 + struct display_mode_lib *mode_lib, 5497 + unsigned int NumberOfActivePlanes, 5498 + double BytePerPixelDETY[], 5499 + double BytePerPixelDETC[], 5500 + double VRatio[], 5501 + unsigned int SwathWidthY[], 5502 + int DPPPerPlane[], 5503 + double HRatio[], 5504 + double PixelClock[], 5505 + double PSCL_THROUGHPUT[], 5506 + double PSCL_THROUGHPUT_CHROMA[], 5507 + double DPPCLK[], 5508 + double *DCFCLKDeepSleep) 5509 + { 5510 + uint k; 5511 + double DisplayPipeLineDeliveryTimeLuma; 5512 + double DisplayPipeLineDeliveryTimeChroma; 5513 + //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX]; 5514 + 5515 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5516 + if (VRatio[k] <= 1) { 5517 + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] 5518 + / HRatio[k] / PixelClock[k]; 5519 + } else { 5520 + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] 5521 + / DPPCLK[k]; 5522 + } 5523 + if (BytePerPixelDETC[k] == 0) { 5524 + DisplayPipeLineDeliveryTimeChroma = 0; 5525 + } else { 5526 + if (VRatio[k] / 2 <= 1) { 5527 + DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0 5528 + * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k]; 5529 + } else { 5530 + DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0 5531 + / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5532 + } 5533 + } 5534 + 5535 + if (BytePerPixelDETC[k] > 0) { 5536 + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max( 5537 + 1.1 * SwathWidthY[k] * dml_ceil(BytePerPixelDETY[k], 1) 5538 + / 32.0 / DisplayPipeLineDeliveryTimeLuma, 5539 + 1.1 * SwathWidthY[k] / 2.0 5540 + * dml_ceil(BytePerPixelDETC[k], 2) / 32.0 5541 + / DisplayPipeLineDeliveryTimeChroma); 5542 + } else { 5543 + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k] 5544 + * dml_ceil(BytePerPixelDETY[k], 1) / 64.0 5545 + / DisplayPipeLineDeliveryTimeLuma; 5546 + } 5547 + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max( 5548 + mode_lib->vba.DCFCLKDeepSleepPerPlane[k], 5549 + PixelClock[k] / 16); 5550 + 5551 + } 5552 + 5553 + *DCFCLKDeepSleep = 8; 5554 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5555 + *DCFCLKDeepSleep = dml_max( 5556 + *DCFCLKDeepSleep, 5557 + mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); 5558 + } 5559 + } 5560 + 5561 + static void CalculateDETBufferSize( 5562 + double DETBufferSizeInKByte, 5563 + unsigned int SwathHeightY, 5564 + unsigned int SwathHeightC, 5565 + double *DETBufferSizeY, 5566 + double *DETBufferSizeC) 5567 + { 5568 + if (SwathHeightC == 0) { 5569 + *DETBufferSizeY = DETBufferSizeInKByte * 1024; 5570 + *DETBufferSizeC = 0; 5571 + } else if (SwathHeightY <= SwathHeightC) { 5572 + *DETBufferSizeY = DETBufferSizeInKByte * 1024 / 2; 5573 + *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 2; 5574 + } else { 5575 + *DETBufferSizeY = DETBufferSizeInKByte * 1024 * 2 / 3; 5576 + *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 3; 5577 + } 5578 + } 5579 + 5580 + static void CalculateUrgentBurstFactor( 5581 + unsigned int DETBufferSizeInKByte, 5582 + unsigned int SwathHeightY, 5583 + unsigned int SwathHeightC, 5584 + unsigned int SwathWidthY, 5585 + double LineTime, 5586 + double UrgentLatency, 5587 + double CursorBufferSize, 5588 + unsigned int CursorWidth, 5589 + unsigned int CursorBPP, 5590 + double VRatio, 5591 + double VRatioPreY, 5592 + double VRatioPreC, 5593 + double BytePerPixelInDETY, 5594 + double BytePerPixelInDETC, 5595 + double *UrgentBurstFactorCursor, 5596 + double *UrgentBurstFactorCursorPre, 5597 + double *UrgentBurstFactorLuma, 5598 + double *UrgentBurstFactorLumaPre, 5599 + double *UrgentBurstFactorChroma, 5600 + double *UrgentBurstFactorChromaPre, 5601 + unsigned int *NotEnoughUrgentLatencyHiding, 5602 + unsigned int *NotEnoughUrgentLatencyHidingPre) 5603 + { 5604 + double LinesInDETLuma; 5605 + double LinesInDETChroma; 5606 + unsigned int LinesInCursorBuffer; 5607 + double CursorBufferSizeInTime; 5608 + double CursorBufferSizeInTimePre; 5609 + double DETBufferSizeInTimeLuma; 5610 + double DETBufferSizeInTimeLumaPre; 5611 + double DETBufferSizeInTimeChroma; 5612 + double DETBufferSizeInTimeChromaPre; 5613 + double DETBufferSizeY; 5614 + double DETBufferSizeC; 5615 + 5616 + *NotEnoughUrgentLatencyHiding = 0; 5617 + *NotEnoughUrgentLatencyHidingPre = 0; 5618 + 5619 + if (CursorWidth > 0) { 5620 + LinesInCursorBuffer = 1 << (unsigned int) dml_floor( 5621 + dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); 5622 + CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 5623 + if (CursorBufferSizeInTime - UrgentLatency <= 0) { 5624 + *NotEnoughUrgentLatencyHiding = 1; 5625 + *UrgentBurstFactorCursor = 0; 5626 + } else { 5627 + *UrgentBurstFactorCursor = CursorBufferSizeInTime 5628 + / (CursorBufferSizeInTime - UrgentLatency); 5629 + } 5630 + if (VRatioPreY > 0) { 5631 + CursorBufferSizeInTimePre = LinesInCursorBuffer * LineTime / VRatioPreY; 5632 + if (CursorBufferSizeInTimePre - UrgentLatency <= 0) { 5633 + *NotEnoughUrgentLatencyHidingPre = 1; 5634 + *UrgentBurstFactorCursorPre = 0; 5635 + } else { 5636 + *UrgentBurstFactorCursorPre = CursorBufferSizeInTimePre 5637 + / (CursorBufferSizeInTimePre - UrgentLatency); 5638 + } 5639 + } else { 5640 + *UrgentBurstFactorCursorPre = 1; 5641 + } 5642 + } 5643 + 5644 + CalculateDETBufferSize( 5645 + DETBufferSizeInKByte, 5646 + SwathHeightY, 5647 + SwathHeightC, 5648 + &DETBufferSizeY, 5649 + &DETBufferSizeC); 5650 + 5651 + LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / SwathWidthY; 5652 + DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 5653 + if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 5654 + *NotEnoughUrgentLatencyHiding = 1; 5655 + *UrgentBurstFactorLuma = 0; 5656 + } else { 5657 + *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma 5658 + / (DETBufferSizeInTimeLuma - UrgentLatency); 5659 + } 5660 + if (VRatioPreY > 0) { 5661 + DETBufferSizeInTimeLumaPre = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime 5662 + / VRatioPreY; 5663 + if (DETBufferSizeInTimeLumaPre - UrgentLatency <= 0) { 5664 + *NotEnoughUrgentLatencyHidingPre = 1; 5665 + *UrgentBurstFactorLumaPre = 0; 5666 + } else { 5667 + *UrgentBurstFactorLumaPre = DETBufferSizeInTimeLumaPre 5668 + / (DETBufferSizeInTimeLumaPre - UrgentLatency); 5669 + } 5670 + } else { 5671 + *UrgentBurstFactorLumaPre = 1; 5672 + } 5673 + 5674 + if (BytePerPixelInDETC > 0) { 5675 + LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / (SwathWidthY / 2); 5676 + DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime 5677 + / (VRatio / 2); 5678 + if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 5679 + *NotEnoughUrgentLatencyHiding = 1; 5680 + *UrgentBurstFactorChroma = 0; 5681 + } else { 5682 + *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma 5683 + / (DETBufferSizeInTimeChroma - UrgentLatency); 5684 + } 5685 + if (VRatioPreC > 0) { 5686 + DETBufferSizeInTimeChromaPre = dml_floor(LinesInDETChroma, SwathHeightC) 5687 + * LineTime / VRatioPreC; 5688 + if (DETBufferSizeInTimeChromaPre - UrgentLatency <= 0) { 5689 + *NotEnoughUrgentLatencyHidingPre = 1; 5690 + *UrgentBurstFactorChromaPre = 0; 5691 + } else { 5692 + *UrgentBurstFactorChromaPre = DETBufferSizeInTimeChromaPre 5693 + / (DETBufferSizeInTimeChromaPre - UrgentLatency); 5694 + } 5695 + } else { 5696 + *UrgentBurstFactorChromaPre = 1; 5697 + } 5698 + } 5699 + } 5700 + 5701 + static void CalculatePixelDeliveryTimes( 5702 + unsigned int NumberOfActivePlanes, 5703 + double VRatio[], 5704 + double VRatioPrefetchY[], 5705 + double VRatioPrefetchC[], 5706 + unsigned int swath_width_luma_ub[], 5707 + unsigned int swath_width_chroma_ub[], 5708 + int DPPPerPlane[], 5709 + double HRatio[], 5710 + double PixelClock[], 5711 + double PSCL_THROUGHPUT[], 5712 + double PSCL_THROUGHPUT_CHROMA[], 5713 + double DPPCLK[], 5714 + double BytePerPixelDETC[], 5715 + enum scan_direction_class SourceScan[], 5716 + unsigned int BlockWidth256BytesY[], 5717 + unsigned int BlockHeight256BytesY[], 5718 + unsigned int BlockWidth256BytesC[], 5719 + unsigned int BlockHeight256BytesC[], 5720 + double DisplayPipeLineDeliveryTimeLuma[], 5721 + double DisplayPipeLineDeliveryTimeChroma[], 5722 + double DisplayPipeLineDeliveryTimeLumaPrefetch[], 5723 + double DisplayPipeLineDeliveryTimeChromaPrefetch[], 5724 + double DisplayPipeRequestDeliveryTimeLuma[], 5725 + double DisplayPipeRequestDeliveryTimeChroma[], 5726 + double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 5727 + double DisplayPipeRequestDeliveryTimeChromaPrefetch[]) 5728 + { 5729 + double req_per_swath_ub; 5730 + uint k; 5731 + 5732 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5733 + if (VRatio[k] <= 1) { 5734 + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] 5735 + / HRatio[k] / PixelClock[k]; 5736 + } else { 5737 + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] 5738 + / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5739 + } 5740 + 5741 + if (BytePerPixelDETC[k] == 0) { 5742 + DisplayPipeLineDeliveryTimeChroma[k] = 0; 5743 + } else { 5744 + if (VRatio[k] / 2 <= 1) { 5745 + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] 5746 + * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k]; 5747 + } else { 5748 + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] 5749 + / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5750 + } 5751 + } 5752 + 5753 + if (VRatioPrefetchY[k] <= 1) { 5754 + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] 5755 + * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5756 + } else { 5757 + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] 5758 + / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5759 + } 5760 + 5761 + if (BytePerPixelDETC[k] == 0) { 5762 + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 5763 + } else { 5764 + if (VRatioPrefetchC[k] <= 1) { 5765 + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 5766 + swath_width_chroma_ub[k] * DPPPerPlane[k] 5767 + / (HRatio[k] / 2) / PixelClock[k]; 5768 + } else { 5769 + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 5770 + swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5771 + } 5772 + } 5773 + } 5774 + 5775 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5776 + if (SourceScan[k] == dm_horz) { 5777 + req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 5778 + } else { 5779 + req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 5780 + } 5781 + DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] 5782 + / req_per_swath_ub; 5783 + DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = 5784 + DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 5785 + if (BytePerPixelDETC[k] == 0) { 5786 + DisplayPipeRequestDeliveryTimeChroma[k] = 0; 5787 + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 5788 + } else { 5789 + if (SourceScan[k] == dm_horz) { 5790 + req_per_swath_ub = swath_width_chroma_ub[k] 5791 + / BlockWidth256BytesC[k]; 5792 + } else { 5793 + req_per_swath_ub = swath_width_chroma_ub[k] 5794 + / BlockHeight256BytesC[k]; 5795 + } 5796 + DisplayPipeRequestDeliveryTimeChroma[k] = 5797 + DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 5798 + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 5799 + DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 5800 + } 5801 + } 5802 + } 5803 + 5804 + static void CalculateMetaAndPTETimes( 5805 + unsigned int NumberOfActivePlanes, 5806 + bool GPUVMEnable, 5807 + unsigned int MetaChunkSize, 5808 + unsigned int MinMetaChunkSizeBytes, 5809 + unsigned int GPUVMMaxPageTableLevels, 5810 + unsigned int HTotal[], 5811 + double VRatio[], 5812 + double VRatioPrefetchY[], 5813 + double VRatioPrefetchC[], 5814 + double DestinationLinesToRequestRowInVBlank[], 5815 + double DestinationLinesToRequestRowInImmediateFlip[], 5816 + double DestinationLinesToRequestVMInVBlank[], 5817 + double DestinationLinesToRequestVMInImmediateFlip[], 5818 + bool DCCEnable[], 5819 + double PixelClock[], 5820 + double BytePerPixelDETY[], 5821 + double BytePerPixelDETC[], 5822 + enum scan_direction_class SourceScan[], 5823 + unsigned int dpte_row_height[], 5824 + unsigned int dpte_row_height_chroma[], 5825 + unsigned int meta_row_width[], 5826 + unsigned int meta_row_height[], 5827 + unsigned int meta_req_width[], 5828 + unsigned int meta_req_height[], 5829 + long dpte_group_bytes[], 5830 + unsigned int PTERequestSizeY[], 5831 + unsigned int PTERequestSizeC[], 5832 + unsigned int PixelPTEReqWidthY[], 5833 + unsigned int PixelPTEReqHeightY[], 5834 + unsigned int PixelPTEReqWidthC[], 5835 + unsigned int PixelPTEReqHeightC[], 5836 + unsigned int dpte_row_width_luma_ub[], 5837 + unsigned int dpte_row_width_chroma_ub[], 5838 + unsigned int vm_group_bytes[], 5839 + unsigned int dpde0_bytes_per_frame_ub_l[], 5840 + unsigned int dpde0_bytes_per_frame_ub_c[], 5841 + unsigned int meta_pte_bytes_per_frame_ub_l[], 5842 + unsigned int meta_pte_bytes_per_frame_ub_c[], 5843 + double DST_Y_PER_PTE_ROW_NOM_L[], 5844 + double DST_Y_PER_PTE_ROW_NOM_C[], 5845 + double DST_Y_PER_META_ROW_NOM_L[], 5846 + double TimePerMetaChunkNominal[], 5847 + double TimePerMetaChunkVBlank[], 5848 + double TimePerMetaChunkFlip[], 5849 + double time_per_pte_group_nom_luma[], 5850 + double time_per_pte_group_vblank_luma[], 5851 + double time_per_pte_group_flip_luma[], 5852 + double time_per_pte_group_nom_chroma[], 5853 + double time_per_pte_group_vblank_chroma[], 5854 + double time_per_pte_group_flip_chroma[], 5855 + double TimePerVMGroupVBlank[], 5856 + double TimePerVMGroupFlip[], 5857 + double TimePerVMRequestVBlank[], 5858 + double TimePerVMRequestFlip[]) 5859 + { 5860 + unsigned int meta_chunk_width; 5861 + unsigned int min_meta_chunk_width; 5862 + unsigned int meta_chunk_per_row_int; 5863 + unsigned int meta_row_remainder; 5864 + unsigned int meta_chunk_threshold; 5865 + unsigned int meta_chunks_per_row_ub; 5866 + unsigned int dpte_group_width_luma; 5867 + unsigned int dpte_group_width_chroma; 5868 + unsigned int dpte_groups_per_row_luma_ub; 5869 + unsigned int dpte_groups_per_row_chroma_ub; 5870 + unsigned int num_group_per_lower_vm_stage; 5871 + unsigned int num_req_per_lower_vm_stage; 5872 + uint k; 5873 + 5874 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5875 + if (GPUVMEnable == true) { 5876 + DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 5877 + if (BytePerPixelDETC[k] == 0) { 5878 + DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 5879 + } else { 5880 + DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / (VRatio[k] / 2); 5881 + } 5882 + } else { 5883 + DST_Y_PER_PTE_ROW_NOM_L[k] = 0; 5884 + DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 5885 + } 5886 + if (DCCEnable[k] == true) { 5887 + DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 5888 + } else { 5889 + DST_Y_PER_META_ROW_NOM_L[k] = 0; 5890 + } 5891 + } 5892 + 5893 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5894 + if (DCCEnable[k] == true) { 5895 + meta_chunk_width = MetaChunkSize * 1024 * 256 5896 + / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k]; 5897 + min_meta_chunk_width = MinMetaChunkSizeBytes * 256 5898 + / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k]; 5899 + meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 5900 + meta_row_remainder = meta_row_width[k] % meta_chunk_width; 5901 + if (SourceScan[k] == dm_horz) { 5902 + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 5903 + } else { 5904 + meta_chunk_threshold = 2 * min_meta_chunk_width 5905 + - meta_req_height[k]; 5906 + } 5907 + if (meta_row_remainder <= meta_chunk_threshold) { 5908 + meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 5909 + } else { 5910 + meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 5911 + } 5912 + TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] 5913 + / PixelClock[k] / meta_chunks_per_row_ub; 5914 + TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] 5915 + * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 5916 + TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] 5917 + * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 5918 + } else { 5919 + TimePerMetaChunkNominal[k] = 0; 5920 + TimePerMetaChunkVBlank[k] = 0; 5921 + TimePerMetaChunkFlip[k] = 0; 5922 + } 5923 + } 5924 + 5925 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5926 + if (GPUVMEnable == true) { 5927 + if (SourceScan[k] == dm_horz) { 5928 + dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] 5929 + * PixelPTEReqWidthY[k]; 5930 + } else { 5931 + dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] 5932 + * PixelPTEReqHeightY[k]; 5933 + } 5934 + dpte_groups_per_row_luma_ub = dml_ceil( 5935 + dpte_row_width_luma_ub[k] / dpte_group_width_luma, 5936 + 1); 5937 + time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] 5938 + / PixelClock[k] / dpte_groups_per_row_luma_ub; 5939 + time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] 5940 + * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5941 + time_per_pte_group_flip_luma[k] = 5942 + DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] 5943 + / PixelClock[k] 5944 + / dpte_groups_per_row_luma_ub; 5945 + if (BytePerPixelDETC[k] == 0) { 5946 + time_per_pte_group_nom_chroma[k] = 0; 5947 + time_per_pte_group_vblank_chroma[k] = 0; 5948 + time_per_pte_group_flip_chroma[k] = 0; 5949 + } else { 5950 + if (SourceScan[k] == dm_horz) { 5951 + dpte_group_width_chroma = dpte_group_bytes[k] 5952 + / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 5953 + } else { 5954 + dpte_group_width_chroma = dpte_group_bytes[k] 5955 + / PTERequestSizeC[k] 5956 + * PixelPTEReqHeightC[k]; 5957 + } 5958 + dpte_groups_per_row_chroma_ub = dml_ceil( 5959 + dpte_row_width_chroma_ub[k] 5960 + / dpte_group_width_chroma, 5961 + 1); 5962 + time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] 5963 + * HTotal[k] / PixelClock[k] 5964 + / dpte_groups_per_row_chroma_ub; 5965 + time_per_pte_group_vblank_chroma[k] = 5966 + DestinationLinesToRequestRowInVBlank[k] * HTotal[k] 5967 + / PixelClock[k] 5968 + / dpte_groups_per_row_chroma_ub; 5969 + time_per_pte_group_flip_chroma[k] = 5970 + DestinationLinesToRequestRowInImmediateFlip[k] 5971 + * HTotal[k] / PixelClock[k] 5972 + / dpte_groups_per_row_chroma_ub; 5973 + } 5974 + } else { 5975 + time_per_pte_group_nom_luma[k] = 0; 5976 + time_per_pte_group_vblank_luma[k] = 0; 5977 + time_per_pte_group_flip_luma[k] = 0; 5978 + time_per_pte_group_nom_chroma[k] = 0; 5979 + time_per_pte_group_vblank_chroma[k] = 0; 5980 + time_per_pte_group_flip_chroma[k] = 0; 5981 + } 5982 + } 5983 + 5984 + for (k = 0; k < NumberOfActivePlanes; ++k) { 5985 + if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 5986 + if (DCCEnable[k] == false) { 5987 + if (BytePerPixelDETC[k] > 0) { 5988 + num_group_per_lower_vm_stage = 5989 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 5990 + + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 5991 + } else { 5992 + num_group_per_lower_vm_stage = 5993 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 5994 + } 5995 + } else { 5996 + if (GPUVMMaxPageTableLevels == 1) { 5997 + if (BytePerPixelDETC[k] > 0) { 5998 + num_group_per_lower_vm_stage = 5999 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6000 + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6001 + } else { 6002 + num_group_per_lower_vm_stage = 6003 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6004 + } 6005 + } else { 6006 + if (BytePerPixelDETC[k] > 0) { 6007 + num_group_per_lower_vm_stage = 6008 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6009 + + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) 6010 + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6011 + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6012 + } else { 6013 + num_group_per_lower_vm_stage = 6014 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6015 + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6016 + } 6017 + } 6018 + } 6019 + 6020 + if (DCCEnable[k] == false) { 6021 + if (BytePerPixelDETC[k] > 0) { 6022 + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] 6023 + / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; 6024 + } else { 6025 + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] 6026 + / 64; 6027 + } 6028 + } else { 6029 + if (GPUVMMaxPageTableLevels == 1) { 6030 + if (BytePerPixelDETC[k] > 0) { 6031 + num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 6032 + + meta_pte_bytes_per_frame_ub_c[k] / 64; 6033 + } else { 6034 + num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 6035 + } 6036 + } else { 6037 + if (BytePerPixelDETC[k] > 0) { 6038 + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 6039 + + dpde0_bytes_per_frame_ub_c[k] / 64 6040 + + meta_pte_bytes_per_frame_ub_l[k] / 64 6041 + + meta_pte_bytes_per_frame_ub_c[k] / 64; 6042 + } else { 6043 + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 6044 + + meta_pte_bytes_per_frame_ub_l[k] / 64; 6045 + } 6046 + } 6047 + } 6048 + 6049 + TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] 6050 + / PixelClock[k] / num_group_per_lower_vm_stage; 6051 + TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] 6052 + * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6053 + TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] 6054 + * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6055 + TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] 6056 + * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6057 + 6058 + if (GPUVMMaxPageTableLevels > 2) { 6059 + TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 6060 + TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 6061 + TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 6062 + TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 6063 + } 6064 + 6065 + } else { 6066 + TimePerVMGroupVBlank[k] = 0; 6067 + TimePerVMGroupFlip[k] = 0; 6068 + TimePerVMRequestVBlank[k] = 0; 6069 + TimePerVMRequestFlip[k] = 0; 6070 + } 6071 + } 6072 + } 6073 + 6074 + static double CalculateExtraLatency( 6075 + double UrgentRoundTripAndOutOfOrderLatency, 6076 + int TotalNumberOfActiveDPP, 6077 + int PixelChunkSizeInKByte, 6078 + int TotalNumberOfDCCActiveDPP, 6079 + int MetaChunkSize, 6080 + double ReturnBW, 6081 + bool GPUVMEnable, 6082 + bool HostVMEnable, 6083 + int NumberOfActivePlanes, 6084 + int NumberOfDPP[], 6085 + long dpte_group_bytes[], 6086 + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 6087 + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 6088 + int HostVMMaxPageTableLevels, 6089 + int HostVMCachedPageTableLevels) 6090 + { 6091 + double CalculateExtraLatency; 6092 + double HostVMInefficiencyFactor; 6093 + int HostVMDynamicLevels; 6094 + 6095 + if (GPUVMEnable && HostVMEnable) { 6096 + HostVMInefficiencyFactor = 6097 + PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData 6098 + / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; 6099 + HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels; 6100 + } else { 6101 + HostVMInefficiencyFactor = 1; 6102 + HostVMDynamicLevels = 0; 6103 + } 6104 + 6105 + CalculateExtraLatency = UrgentRoundTripAndOutOfOrderLatency 6106 + + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte 6107 + + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0 6108 + / ReturnBW; 6109 + 6110 + if (GPUVMEnable) { 6111 + int k; 6112 + 6113 + for (k = 0; k < NumberOfActivePlanes; k++) { 6114 + CalculateExtraLatency = CalculateExtraLatency 6115 + + NumberOfDPP[k] * dpte_group_bytes[k] 6116 + * (1 + 8 * HostVMDynamicLevels) 6117 + * HostVMInefficiencyFactor / ReturnBW; 6118 + } 6119 + } 6120 + return CalculateExtraLatency; 6121 + } 6122 + 6123 + #endif
+32
drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.h
··· 1 + /* 2 + * Copyright 2017 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + * Authors: AMD 23 + * 24 + */ 25 + 26 + #ifndef __DML21_DISPLAY_MODE_VBA_H__ 27 + #define __DML21_DISPLAY_MODE_VBA_H__ 28 + 29 + void dml21_recalculate(struct display_mode_lib *mode_lib); 30 + void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib); 31 + 32 + #endif /* _DML21_DISPLAY_MODE_VBA_H_ */
+1823
drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
··· 1 + /* 2 + * Copyright 2017 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + * Authors: AMD 23 + * 24 + */ 25 + 26 + #ifdef CONFIG_DRM_AMD_DC_DCN2_0 27 + 28 + #include "../display_mode_lib.h" 29 + #include "../display_mode_vba.h" 30 + #include "../dml_inline_defs.h" 31 + #include "display_rq_dlg_calc_21.h" 32 + 33 + /* 34 + * NOTE: 35 + * This file is gcc-parseable HW gospel, coming straight from HW engineers. 36 + * 37 + * It doesn't adhere to Linux kernel style and sometimes will do things in odd 38 + * ways. Unless there is something clearly wrong with it the code should 39 + * remain as-is as it provides us with a guarantee from HW that it is correct. 40 + */ 41 + 42 + static void calculate_ttu_cursor( 43 + struct display_mode_lib *mode_lib, 44 + double *refcyc_per_req_delivery_pre_cur, 45 + double *refcyc_per_req_delivery_cur, 46 + double refclk_freq_in_mhz, 47 + double ref_freq_to_pix_freq, 48 + double hscale_pixel_rate_l, 49 + double hscl_ratio, 50 + double vratio_pre_l, 51 + double vratio_l, 52 + unsigned int cur_width, 53 + enum cursor_bpp cur_bpp); 54 + 55 + static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) 56 + { 57 + unsigned int ret_val = 0; 58 + 59 + if (source_format == dm_444_16) { 60 + if (!is_chroma) 61 + ret_val = 2; 62 + } else if (source_format == dm_444_32) { 63 + if (!is_chroma) 64 + ret_val = 4; 65 + } else if (source_format == dm_444_64) { 66 + if (!is_chroma) 67 + ret_val = 8; 68 + } else if (source_format == dm_420_8) { 69 + if (is_chroma) 70 + ret_val = 2; 71 + else 72 + ret_val = 1; 73 + } else if (source_format == dm_420_10) { 74 + if (is_chroma) 75 + ret_val = 4; 76 + else 77 + ret_val = 2; 78 + } else if (source_format == dm_444_8) { 79 + ret_val = 1; 80 + } 81 + return ret_val; 82 + } 83 + 84 + static bool is_dual_plane(enum source_format_class source_format) 85 + { 86 + bool ret_val = 0; 87 + 88 + if ((source_format == dm_420_8) || (source_format == dm_420_10)) 89 + ret_val = 1; 90 + 91 + return ret_val; 92 + } 93 + 94 + static double get_refcyc_per_delivery( 95 + struct display_mode_lib *mode_lib, 96 + double refclk_freq_in_mhz, 97 + double pclk_freq_in_mhz, 98 + bool odm_combine, 99 + unsigned int recout_width, 100 + unsigned int hactive, 101 + double vratio, 102 + double hscale_pixel_rate, 103 + unsigned int delivery_width, 104 + unsigned int req_per_swath_ub) 105 + { 106 + double refcyc_per_delivery = 0.0; 107 + 108 + if (vratio <= 1.0) { 109 + if (odm_combine) 110 + refcyc_per_delivery = (double) refclk_freq_in_mhz 111 + * dml_min((double) recout_width, (double) hactive / 2.0) 112 + / pclk_freq_in_mhz / (double) req_per_swath_ub; 113 + else 114 + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width 115 + / pclk_freq_in_mhz / (double) req_per_swath_ub; 116 + } else { 117 + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width 118 + / (double) hscale_pixel_rate / (double) req_per_swath_ub; 119 + } 120 + 121 + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); 122 + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); 123 + dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width); 124 + dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio); 125 + dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub); 126 + dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery); 127 + 128 + return refcyc_per_delivery; 129 + 130 + } 131 + 132 + static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size) 133 + { 134 + if (tile_size == dm_256k_tile) 135 + return (256 * 1024); 136 + else if (tile_size == dm_64k_tile) 137 + return (64 * 1024); 138 + else 139 + return (4 * 1024); 140 + } 141 + 142 + static void extract_rq_sizing_regs( 143 + struct display_mode_lib *mode_lib, 144 + display_data_rq_regs_st *rq_regs, 145 + const display_data_rq_sizing_params_st rq_sizing) 146 + { 147 + dml_print("DML_DLG: %s: rq_sizing param\n", __func__); 148 + print__data_rq_sizing_params_st(mode_lib, rq_sizing); 149 + 150 + rq_regs->chunk_size = dml_log2(rq_sizing.chunk_bytes) - 10; 151 + 152 + if (rq_sizing.min_chunk_bytes == 0) 153 + rq_regs->min_chunk_size = 0; 154 + else 155 + rq_regs->min_chunk_size = dml_log2(rq_sizing.min_chunk_bytes) - 8 + 1; 156 + 157 + rq_regs->meta_chunk_size = dml_log2(rq_sizing.meta_chunk_bytes) - 10; 158 + if (rq_sizing.min_meta_chunk_bytes == 0) 159 + rq_regs->min_meta_chunk_size = 0; 160 + else 161 + rq_regs->min_meta_chunk_size = dml_log2(rq_sizing.min_meta_chunk_bytes) - 6 + 1; 162 + 163 + rq_regs->dpte_group_size = dml_log2(rq_sizing.dpte_group_bytes) - 6; 164 + rq_regs->mpte_group_size = dml_log2(rq_sizing.mpte_group_bytes) - 6; 165 + } 166 + 167 + static void extract_rq_regs( 168 + struct display_mode_lib *mode_lib, 169 + display_rq_regs_st *rq_regs, 170 + const display_rq_params_st rq_param) 171 + { 172 + unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; 173 + unsigned int detile_buf_plane1_addr = 0; 174 + 175 + extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), rq_param.sizing.rq_l); 176 + 177 + rq_regs->rq_regs_l.pte_row_height_linear = dml_floor( 178 + dml_log2(rq_param.dlg.rq_l.dpte_row_height), 179 + 1) - 3; 180 + 181 + if (rq_param.yuv420) { 182 + extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), rq_param.sizing.rq_c); 183 + rq_regs->rq_regs_c.pte_row_height_linear = dml_floor( 184 + dml_log2(rq_param.dlg.rq_c.dpte_row_height), 185 + 1) - 3; 186 + } 187 + 188 + rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height); 189 + rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height); 190 + 191 + // FIXME: take the max between luma, chroma chunk size? 192 + // okay for now, as we are setting chunk_bytes to 8kb anyways 193 + if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb 194 + rq_regs->drq_expansion_mode = 0; 195 + } else { 196 + rq_regs->drq_expansion_mode = 2; 197 + } 198 + rq_regs->prq_expansion_mode = 1; 199 + rq_regs->mrq_expansion_mode = 1; 200 + rq_regs->crq_expansion_mode = 1; 201 + 202 + if (rq_param.yuv420) { 203 + if ((double) rq_param.misc.rq_l.stored_swath_bytes 204 + / (double) rq_param.misc.rq_c.stored_swath_bytes <= 1.5) { 205 + detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); // half to chroma 206 + } else { 207 + detile_buf_plane1_addr = dml_round_to_multiple( 208 + (unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), 209 + 256, 210 + 0) / 64.0; // 2/3 to chroma 211 + } 212 + } 213 + rq_regs->plane1_base_address = detile_buf_plane1_addr; 214 + } 215 + 216 + static void handle_det_buf_split( 217 + struct display_mode_lib *mode_lib, 218 + display_rq_params_st *rq_param, 219 + const display_pipe_source_params_st pipe_src_param) 220 + { 221 + unsigned int total_swath_bytes = 0; 222 + unsigned int swath_bytes_l = 0; 223 + unsigned int swath_bytes_c = 0; 224 + unsigned int full_swath_bytes_packed_l = 0; 225 + unsigned int full_swath_bytes_packed_c = 0; 226 + bool req128_l = 0; 227 + bool req128_c = 0; 228 + bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear); 229 + bool surf_vert = (pipe_src_param.source_scan == dm_vert); 230 + unsigned int log2_swath_height_l = 0; 231 + unsigned int log2_swath_height_c = 0; 232 + unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; 233 + 234 + full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes; 235 + full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes; 236 + 237 + if (rq_param->yuv420_10bpc) { 238 + full_swath_bytes_packed_l = dml_round_to_multiple( 239 + rq_param->misc.rq_l.full_swath_bytes * 2 / 3, 240 + 256, 241 + 1) + 256; 242 + full_swath_bytes_packed_c = dml_round_to_multiple( 243 + rq_param->misc.rq_c.full_swath_bytes * 2 / 3, 244 + 256, 245 + 1) + 256; 246 + } 247 + 248 + if (rq_param->yuv420) { 249 + total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c; 250 + 251 + if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request 252 + req128_l = 0; 253 + req128_c = 0; 254 + swath_bytes_l = full_swath_bytes_packed_l; 255 + swath_bytes_c = full_swath_bytes_packed_c; 256 + } else { //128b request (for luma only for yuv420 8bpc) 257 + req128_l = 1; 258 + req128_c = 0; 259 + swath_bytes_l = full_swath_bytes_packed_l / 2; 260 + swath_bytes_c = full_swath_bytes_packed_c; 261 + } 262 + // Note: assumption, the config that pass in will fit into 263 + // the detiled buffer. 264 + } else { 265 + total_swath_bytes = 2 * full_swath_bytes_packed_l; 266 + 267 + if (total_swath_bytes <= detile_buf_size_in_bytes) 268 + req128_l = 0; 269 + else 270 + req128_l = 1; 271 + 272 + swath_bytes_l = total_swath_bytes; 273 + swath_bytes_c = 0; 274 + } 275 + rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l; 276 + rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c; 277 + 278 + if (surf_linear) { 279 + log2_swath_height_l = 0; 280 + log2_swath_height_c = 0; 281 + } else if (!surf_vert) { 282 + log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_height) - req128_l; 283 + log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_height) - req128_c; 284 + } else { 285 + log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_width) - req128_l; 286 + log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_width) - req128_c; 287 + } 288 + rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; 289 + rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c; 290 + 291 + dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l); 292 + dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c); 293 + dml_print( 294 + "DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", 295 + __func__, 296 + full_swath_bytes_packed_l); 297 + dml_print( 298 + "DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", 299 + __func__, 300 + full_swath_bytes_packed_c); 301 + } 302 + 303 + static void get_meta_and_pte_attr( 304 + struct display_mode_lib *mode_lib, 305 + display_data_rq_dlg_params_st *rq_dlg_param, 306 + display_data_rq_misc_params_st *rq_misc_param, 307 + display_data_rq_sizing_params_st *rq_sizing_param, 308 + unsigned int vp_width, 309 + unsigned int vp_height, 310 + unsigned int data_pitch, 311 + unsigned int meta_pitch, 312 + unsigned int source_format, 313 + unsigned int tiling, 314 + unsigned int macro_tile_size, 315 + unsigned int source_scan, 316 + unsigned int hostvm_enable, 317 + unsigned int is_chroma) 318 + { 319 + bool surf_linear = (tiling == dm_sw_linear); 320 + bool surf_vert = (source_scan == dm_vert); 321 + 322 + unsigned int bytes_per_element; 323 + unsigned int bytes_per_element_y = get_bytes_per_element( 324 + (enum source_format_class) (source_format), 325 + false); 326 + unsigned int bytes_per_element_c = get_bytes_per_element( 327 + (enum source_format_class) (source_format), 328 + true); 329 + 330 + unsigned int blk256_width = 0; 331 + unsigned int blk256_height = 0; 332 + 333 + unsigned int blk256_width_y = 0; 334 + unsigned int blk256_height_y = 0; 335 + unsigned int blk256_width_c = 0; 336 + unsigned int blk256_height_c = 0; 337 + unsigned int log2_bytes_per_element; 338 + unsigned int log2_blk256_width; 339 + unsigned int log2_blk256_height; 340 + unsigned int blk_bytes; 341 + unsigned int log2_blk_bytes; 342 + unsigned int log2_blk_height; 343 + unsigned int log2_blk_width; 344 + unsigned int log2_meta_req_bytes; 345 + unsigned int log2_meta_req_height; 346 + unsigned int log2_meta_req_width; 347 + unsigned int meta_req_width; 348 + unsigned int meta_req_height; 349 + unsigned int log2_meta_row_height; 350 + unsigned int meta_row_width_ub; 351 + unsigned int log2_meta_chunk_bytes; 352 + unsigned int log2_meta_chunk_height; 353 + 354 + //full sized meta chunk width in unit of data elements 355 + unsigned int log2_meta_chunk_width; 356 + unsigned int log2_min_meta_chunk_bytes; 357 + unsigned int min_meta_chunk_width; 358 + unsigned int meta_chunk_width; 359 + unsigned int meta_chunk_per_row_int; 360 + unsigned int meta_row_remainder; 361 + unsigned int meta_chunk_threshold; 362 + unsigned int meta_blk_bytes; 363 + unsigned int meta_blk_height; 364 + unsigned int meta_blk_width; 365 + unsigned int meta_surface_bytes; 366 + unsigned int vmpg_bytes; 367 + unsigned int meta_pte_req_per_frame_ub; 368 + unsigned int meta_pte_bytes_per_frame_ub; 369 + const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes); 370 + const unsigned int dpte_buf_in_pte_reqs = 371 + mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma + mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; 372 + const unsigned int pde_proc_buffer_size_64k_reqs = 373 + mode_lib->ip.pde_proc_buffer_size_64k_reqs; 374 + 375 + unsigned int log2_vmpg_height = 0; 376 + unsigned int log2_vmpg_width = 0; 377 + unsigned int log2_dpte_req_height_ptes = 0; 378 + unsigned int log2_dpte_req_height = 0; 379 + unsigned int log2_dpte_req_width = 0; 380 + unsigned int log2_dpte_row_height_linear = 0; 381 + unsigned int log2_dpte_row_height = 0; 382 + unsigned int log2_dpte_group_width = 0; 383 + unsigned int dpte_row_width_ub = 0; 384 + unsigned int dpte_req_height = 0; 385 + unsigned int dpte_req_width = 0; 386 + unsigned int dpte_group_width = 0; 387 + unsigned int log2_dpte_group_bytes = 0; 388 + unsigned int log2_dpte_group_length = 0; 389 + unsigned int pde_buf_entries; 390 + bool yuv420 = (source_format == dm_420_8 || source_format == dm_420_10); 391 + 392 + Calculate256BBlockSizes( 393 + (enum source_format_class) (source_format), 394 + (enum dm_swizzle_mode) (tiling), 395 + bytes_per_element_y, 396 + bytes_per_element_c, 397 + &blk256_height_y, 398 + &blk256_height_c, 399 + &blk256_width_y, 400 + &blk256_width_c); 401 + 402 + if (!is_chroma) { 403 + blk256_width = blk256_width_y; 404 + blk256_height = blk256_height_y; 405 + bytes_per_element = bytes_per_element_y; 406 + } else { 407 + blk256_width = blk256_width_c; 408 + blk256_height = blk256_height_c; 409 + bytes_per_element = bytes_per_element_c; 410 + } 411 + 412 + log2_bytes_per_element = dml_log2(bytes_per_element); 413 + 414 + dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear); 415 + dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert); 416 + dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width); 417 + dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height); 418 + 419 + log2_blk256_width = dml_log2((double) blk256_width); 420 + log2_blk256_height = dml_log2((double) blk256_height); 421 + blk_bytes = surf_linear ? 422 + 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); 423 + log2_blk_bytes = dml_log2((double) blk_bytes); 424 + log2_blk_height = 0; 425 + log2_blk_width = 0; 426 + 427 + // remember log rule 428 + // "+" in log is multiply 429 + // "-" in log is divide 430 + // "/2" is like square root 431 + // blk is vertical biased 432 + if (tiling != dm_sw_linear) 433 + log2_blk_height = log2_blk256_height 434 + + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1); 435 + else 436 + log2_blk_height = 0; // blk height of 1 437 + 438 + log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height; 439 + 440 + if (!surf_vert) { 441 + rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1) 442 + + blk256_width; 443 + rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width; 444 + } else { 445 + rq_dlg_param->swath_width_ub = dml_round_to_multiple( 446 + vp_height - 1, 447 + blk256_height, 448 + 1) + blk256_height; 449 + rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height; 450 + } 451 + 452 + if (!surf_vert) 453 + rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height 454 + * bytes_per_element; 455 + else 456 + rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width 457 + * bytes_per_element; 458 + 459 + rq_misc_param->blk256_height = blk256_height; 460 + rq_misc_param->blk256_width = blk256_width; 461 + 462 + // ------- 463 + // meta 464 + // ------- 465 + log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element 466 + 467 + // each 64b meta request for dcn is 8x8 meta elements and 468 + // a meta element covers one 256b block of the the data surface. 469 + log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 470 + log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element 471 + - log2_meta_req_height; 472 + meta_req_width = 1 << log2_meta_req_width; 473 + meta_req_height = 1 << log2_meta_req_height; 474 + log2_meta_row_height = 0; 475 + meta_row_width_ub = 0; 476 + 477 + // the dimensions of a meta row are meta_row_width x meta_row_height in elements. 478 + // calculate upper bound of the meta_row_width 479 + if (!surf_vert) { 480 + log2_meta_row_height = log2_meta_req_height; 481 + meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) 482 + + meta_req_width; 483 + rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width; 484 + } else { 485 + log2_meta_row_height = log2_meta_req_width; 486 + meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) 487 + + meta_req_height; 488 + rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height; 489 + } 490 + rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64; 491 + 492 + rq_dlg_param->meta_row_height = 1 << log2_meta_row_height; 493 + 494 + log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes); 495 + log2_meta_chunk_height = log2_meta_row_height; 496 + 497 + //full sized meta chunk width in unit of data elements 498 + log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element 499 + - log2_meta_chunk_height; 500 + log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes); 501 + min_meta_chunk_width = 1 502 + << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element 503 + - log2_meta_chunk_height); 504 + meta_chunk_width = 1 << log2_meta_chunk_width; 505 + meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width); 506 + meta_row_remainder = meta_row_width_ub % meta_chunk_width; 507 + meta_chunk_threshold = 0; 508 + meta_blk_bytes = 4096; 509 + meta_blk_height = blk256_height * 64; 510 + meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height; 511 + meta_surface_bytes = meta_pitch 512 + * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) 513 + + meta_blk_height) * bytes_per_element / 256; 514 + vmpg_bytes = mode_lib->soc.vmm_page_size_bytes; 515 + meta_pte_req_per_frame_ub = (dml_round_to_multiple( 516 + meta_surface_bytes - vmpg_bytes, 517 + 8 * vmpg_bytes, 518 + 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes); 519 + meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request 520 + rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub; 521 + 522 + dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height); 523 + dml_print("DML_DLG: %s: meta_blk_width = %d\n", __func__, meta_blk_width); 524 + dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes); 525 + dml_print( 526 + "DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n", 527 + __func__, 528 + meta_pte_req_per_frame_ub); 529 + dml_print( 530 + "DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n", 531 + __func__, 532 + meta_pte_bytes_per_frame_ub); 533 + 534 + if (!surf_vert) 535 + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width; 536 + else 537 + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height; 538 + 539 + if (meta_row_remainder <= meta_chunk_threshold) 540 + rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 541 + else 542 + rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 543 + 544 + // ------ 545 + // dpte 546 + // ------ 547 + if (surf_linear) { 548 + log2_vmpg_height = 0; // one line high 549 + } else { 550 + log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height; 551 + } 552 + log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height; 553 + 554 + // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. 555 + if (surf_linear) { //one 64B PTE request returns 8 PTEs 556 + log2_dpte_req_height_ptes = 0; 557 + log2_dpte_req_width = log2_vmpg_width + 3; 558 + log2_dpte_req_height = 0; 559 + } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size 560 + //one 64B req gives 8x1 PTEs for 4KB tile 561 + log2_dpte_req_height_ptes = 0; 562 + log2_dpte_req_width = log2_blk_width + 3; 563 + log2_dpte_req_height = log2_blk_height + 0; 564 + } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB 565 + //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB 566 + log2_dpte_req_height_ptes = 4; 567 + log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width 568 + log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height 569 + } else { //64KB page size and must 64KB tile block 570 + //one 64B req gives 8x1 PTEs for 64KB tile 571 + log2_dpte_req_height_ptes = 0; 572 + log2_dpte_req_width = log2_blk_width + 3; 573 + log2_dpte_req_height = log2_blk_height + 0; 574 + } 575 + 576 + // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height 577 + // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent 578 + // That depends on the pte shape (i.e. 8x1, 4x2, 2x4) 579 + //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes; 580 + //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes; 581 + dpte_req_height = 1 << log2_dpte_req_height; 582 + dpte_req_width = 1 << log2_dpte_req_width; 583 + 584 + // calculate pitch dpte row buffer can hold 585 + // round the result down to a power of two. 586 + pde_buf_entries = 587 + yuv420 ? (pde_proc_buffer_size_64k_reqs >> 1) : pde_proc_buffer_size_64k_reqs; 588 + if (surf_linear) { 589 + unsigned int dpte_row_height; 590 + 591 + log2_dpte_row_height_linear = dml_floor( 592 + dml_log2( 593 + dml_min( 594 + 64 * 1024 * pde_buf_entries 595 + / bytes_per_element, 596 + dpte_buf_in_pte_reqs 597 + * dpte_req_width) 598 + / data_pitch), 599 + 1); 600 + 601 + ASSERT(log2_dpte_row_height_linear >= 3); 602 + 603 + if (log2_dpte_row_height_linear > 7) 604 + log2_dpte_row_height_linear = 7; 605 + 606 + log2_dpte_row_height = log2_dpte_row_height_linear; 607 + // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary. 608 + // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering. 609 + dpte_row_height = 1 << log2_dpte_row_height; 610 + dpte_row_width_ub = dml_round_to_multiple( 611 + data_pitch * dpte_row_height - 1, 612 + dpte_req_width, 613 + 1) + dpte_req_width; 614 + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; 615 + } else { 616 + // the upper bound of the dpte_row_width without dependency on viewport position follows. 617 + // for tiled mode, row height is the same as req height and row store up to vp size upper bound 618 + if (!surf_vert) { 619 + log2_dpte_row_height = log2_dpte_req_height; 620 + dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) 621 + + dpte_req_width; 622 + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; 623 + } else { 624 + log2_dpte_row_height = 625 + (log2_blk_width < log2_dpte_req_width) ? 626 + log2_blk_width : log2_dpte_req_width; 627 + dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) 628 + + dpte_req_height; 629 + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height; 630 + } 631 + } 632 + if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB 633 + rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request 634 + else 635 + rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request 636 + 637 + rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height; 638 + 639 + // the dpte_group_bytes is reduced for the specific case of vertical 640 + // access of a tile surface that has dpte request of 8x1 ptes. 641 + 642 + if (hostvm_enable) 643 + rq_sizing_param->dpte_group_bytes = 512; 644 + else { 645 + if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group 646 + rq_sizing_param->dpte_group_bytes = 512; 647 + else 648 + //full size 649 + rq_sizing_param->dpte_group_bytes = 2048; 650 + } 651 + 652 + //since pte request size is 64byte, the number of data pte requests per full sized group is as follows. 653 + log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes); 654 + log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests 655 + 656 + // full sized data pte group width in elements 657 + if (!surf_vert) 658 + log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width; 659 + else 660 + log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height; 661 + 662 + //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B 663 + if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB 664 + log2_dpte_group_width = log2_dpte_group_width - 1; 665 + 666 + dpte_group_width = 1 << log2_dpte_group_width; 667 + 668 + // since dpte groups are only aligned to dpte_req_width and not dpte_group_width, 669 + // the upper bound for the dpte groups per row is as follows. 670 + rq_dlg_param->dpte_groups_per_row_ub = dml_ceil( 671 + (double) dpte_row_width_ub / dpte_group_width, 672 + 1); 673 + } 674 + 675 + static void get_surf_rq_param( 676 + struct display_mode_lib *mode_lib, 677 + display_data_rq_sizing_params_st *rq_sizing_param, 678 + display_data_rq_dlg_params_st *rq_dlg_param, 679 + display_data_rq_misc_params_st *rq_misc_param, 680 + const display_pipe_params_st pipe_param, 681 + bool is_chroma) 682 + { 683 + bool mode_422 = 0; 684 + unsigned int vp_width = 0; 685 + unsigned int vp_height = 0; 686 + unsigned int data_pitch = 0; 687 + unsigned int meta_pitch = 0; 688 + unsigned int ppe = mode_422 ? 2 : 1; 689 + 690 + // FIXME check if ppe apply for both luma and chroma in 422 case 691 + if (is_chroma) { 692 + vp_width = pipe_param.src.viewport_width_c / ppe; 693 + vp_height = pipe_param.src.viewport_height_c; 694 + data_pitch = pipe_param.src.data_pitch_c; 695 + meta_pitch = pipe_param.src.meta_pitch_c; 696 + } else { 697 + vp_width = pipe_param.src.viewport_width / ppe; 698 + vp_height = pipe_param.src.viewport_height; 699 + data_pitch = pipe_param.src.data_pitch; 700 + meta_pitch = pipe_param.src.meta_pitch; 701 + } 702 + 703 + if (pipe_param.dest.odm_combine) { 704 + unsigned int access_dir; 705 + unsigned int full_src_vp_width; 706 + unsigned int hactive_half; 707 + unsigned int src_hactive_half; 708 + access_dir = (pipe_param.src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed 709 + hactive_half = pipe_param.dest.hactive / 2; 710 + if (is_chroma) { 711 + full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio_c * pipe_param.dest.full_recout_width; 712 + src_hactive_half = pipe_param.scale_ratio_depth.hscl_ratio_c * hactive_half; 713 + } else { 714 + full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio * pipe_param.dest.full_recout_width; 715 + src_hactive_half = pipe_param.scale_ratio_depth.hscl_ratio * hactive_half; 716 + } 717 + 718 + if (access_dir == 0) { 719 + vp_width = dml_min(full_src_vp_width, src_hactive_half); 720 + dml_print("DML_DLG: %s: vp_width = %d\n", __func__, vp_width); 721 + } else { 722 + vp_height = dml_min(full_src_vp_width, src_hactive_half); 723 + dml_print("DML_DLG: %s: vp_height = %d\n", __func__, vp_height); 724 + 725 + } 726 + dml_print("DML_DLG: %s: full_src_vp_width = %d\n", __func__, full_src_vp_width); 727 + dml_print("DML_DLG: %s: hactive_half = %d\n", __func__, hactive_half); 728 + dml_print("DML_DLG: %s: src_hactive_half = %d\n", __func__, src_hactive_half); 729 + } 730 + rq_sizing_param->chunk_bytes = 8192; 731 + 732 + if (rq_sizing_param->chunk_bytes == 64 * 1024) 733 + rq_sizing_param->min_chunk_bytes = 0; 734 + else 735 + rq_sizing_param->min_chunk_bytes = 1024; 736 + 737 + rq_sizing_param->meta_chunk_bytes = 2048; 738 + rq_sizing_param->min_meta_chunk_bytes = 256; 739 + 740 + if (pipe_param.src.hostvm) 741 + rq_sizing_param->mpte_group_bytes = 512; 742 + else 743 + rq_sizing_param->mpte_group_bytes = 2048; 744 + 745 + get_meta_and_pte_attr( 746 + mode_lib, 747 + rq_dlg_param, 748 + rq_misc_param, 749 + rq_sizing_param, 750 + vp_width, 751 + vp_height, 752 + data_pitch, 753 + meta_pitch, 754 + pipe_param.src.source_format, 755 + pipe_param.src.sw_mode, 756 + pipe_param.src.macro_tile_size, 757 + pipe_param.src.source_scan, 758 + pipe_param.src.hostvm, 759 + is_chroma); 760 + } 761 + 762 + static void dml_rq_dlg_get_rq_params( 763 + struct display_mode_lib *mode_lib, 764 + display_rq_params_st *rq_param, 765 + const display_pipe_params_st pipe_param) 766 + { 767 + // get param for luma surface 768 + rq_param->yuv420 = pipe_param.src.source_format == dm_420_8 769 + || pipe_param.src.source_format == dm_420_10; 770 + rq_param->yuv420_10bpc = pipe_param.src.source_format == dm_420_10; 771 + 772 + get_surf_rq_param( 773 + mode_lib, 774 + &(rq_param->sizing.rq_l), 775 + &(rq_param->dlg.rq_l), 776 + &(rq_param->misc.rq_l), 777 + pipe_param, 778 + 0); 779 + 780 + if (is_dual_plane((enum source_format_class) (pipe_param.src.source_format))) { 781 + // get param for chroma surface 782 + get_surf_rq_param( 783 + mode_lib, 784 + &(rq_param->sizing.rq_c), 785 + &(rq_param->dlg.rq_c), 786 + &(rq_param->misc.rq_c), 787 + pipe_param, 788 + 1); 789 + } 790 + 791 + // calculate how to split the det buffer space between luma and chroma 792 + handle_det_buf_split(mode_lib, rq_param, pipe_param.src); 793 + print__rq_params_st(mode_lib, *rq_param); 794 + } 795 + 796 + void dml21_rq_dlg_get_rq_reg( 797 + struct display_mode_lib *mode_lib, 798 + display_rq_regs_st *rq_regs, 799 + const display_pipe_params_st pipe_param) 800 + { 801 + display_rq_params_st rq_param = {0}; 802 + 803 + memset(rq_regs, 0, sizeof(*rq_regs)); 804 + dml_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param); 805 + extract_rq_regs(mode_lib, rq_regs, rq_param); 806 + 807 + print__rq_regs_st(mode_lib, *rq_regs); 808 + } 809 + 810 + // Note: currently taken in as is. 811 + // Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. 812 + static void dml_rq_dlg_get_dlg_params( 813 + struct display_mode_lib *mode_lib, 814 + const display_e2e_pipe_params_st *e2e_pipe_param, 815 + const unsigned int num_pipes, 816 + const unsigned int pipe_idx, 817 + display_dlg_regs_st *disp_dlg_regs, 818 + display_ttu_regs_st *disp_ttu_regs, 819 + const display_rq_dlg_params_st rq_dlg_param, 820 + const display_dlg_sys_params_st dlg_sys_param, 821 + const bool cstate_en, 822 + const bool pstate_en) 823 + { 824 + const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; 825 + const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; 826 + const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout; 827 + const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; 828 + const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; 829 + const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; 830 + 831 + // ------------------------- 832 + // Section 1.15.2.1: OTG dependent Params 833 + // ------------------------- 834 + // Timing 835 + unsigned int htotal = dst->htotal; 836 + // unsigned int hblank_start = dst.hblank_start; // TODO: Remove 837 + unsigned int hblank_end = dst->hblank_end; 838 + unsigned int vblank_start = dst->vblank_start; 839 + unsigned int vblank_end = dst->vblank_end; 840 + unsigned int min_vblank = mode_lib->ip.min_vblank_lines; 841 + 842 + double dppclk_freq_in_mhz = clks->dppclk_mhz; 843 + double dispclk_freq_in_mhz = clks->dispclk_mhz; 844 + double refclk_freq_in_mhz = clks->refclk_mhz; 845 + double pclk_freq_in_mhz = dst->pixel_rate_mhz; 846 + bool interlaced = dst->interlaced; 847 + 848 + double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; 849 + 850 + double min_dcfclk_mhz; 851 + double t_calc_us; 852 + double min_ttu_vblank; 853 + 854 + double min_dst_y_ttu_vblank; 855 + unsigned int dlg_vblank_start; 856 + bool dual_plane; 857 + bool mode_422; 858 + unsigned int access_dir; 859 + unsigned int vp_height_l; 860 + unsigned int vp_width_l; 861 + unsigned int vp_height_c; 862 + unsigned int vp_width_c; 863 + 864 + // Scaling 865 + unsigned int htaps_l; 866 + unsigned int htaps_c; 867 + double hratio_l; 868 + double hratio_c; 869 + double vratio_l; 870 + double vratio_c; 871 + bool scl_enable; 872 + 873 + double line_time_in_us; 874 + // double vinit_l; 875 + // double vinit_c; 876 + // double vinit_bot_l; 877 + // double vinit_bot_c; 878 + 879 + // unsigned int swath_height_l; 880 + unsigned int swath_width_ub_l; 881 + // unsigned int dpte_bytes_per_row_ub_l; 882 + unsigned int dpte_groups_per_row_ub_l; 883 + // unsigned int meta_pte_bytes_per_frame_ub_l; 884 + // unsigned int meta_bytes_per_row_ub_l; 885 + 886 + // unsigned int swath_height_c; 887 + unsigned int swath_width_ub_c; 888 + // unsigned int dpte_bytes_per_row_ub_c; 889 + unsigned int dpte_groups_per_row_ub_c; 890 + 891 + unsigned int meta_chunks_per_row_ub_l; 892 + unsigned int meta_chunks_per_row_ub_c; 893 + unsigned int vupdate_offset; 894 + unsigned int vupdate_width; 895 + unsigned int vready_offset; 896 + 897 + unsigned int dppclk_delay_subtotal; 898 + unsigned int dispclk_delay_subtotal; 899 + unsigned int pixel_rate_delay_subtotal; 900 + 901 + unsigned int vstartup_start; 902 + unsigned int dst_x_after_scaler; 903 + unsigned int dst_y_after_scaler; 904 + double line_wait; 905 + double dst_y_prefetch; 906 + double dst_y_per_vm_vblank; 907 + double dst_y_per_row_vblank; 908 + double dst_y_per_vm_flip; 909 + double dst_y_per_row_flip; 910 + double max_dst_y_per_vm_vblank; 911 + double max_dst_y_per_row_vblank; 912 + double lsw; 913 + double vratio_pre_l; 914 + double vratio_pre_c; 915 + unsigned int req_per_swath_ub_l; 916 + unsigned int req_per_swath_ub_c; 917 + unsigned int meta_row_height_l; 918 + unsigned int meta_row_height_c; 919 + unsigned int swath_width_pixels_ub_l; 920 + unsigned int swath_width_pixels_ub_c; 921 + unsigned int scaler_rec_in_width_l; 922 + unsigned int scaler_rec_in_width_c; 923 + unsigned int dpte_row_height_l; 924 + unsigned int dpte_row_height_c; 925 + double hscale_pixel_rate_l; 926 + double hscale_pixel_rate_c; 927 + double min_hratio_fact_l; 928 + double min_hratio_fact_c; 929 + double refcyc_per_line_delivery_pre_l; 930 + double refcyc_per_line_delivery_pre_c; 931 + double refcyc_per_line_delivery_l; 932 + double refcyc_per_line_delivery_c; 933 + 934 + double refcyc_per_req_delivery_pre_l; 935 + double refcyc_per_req_delivery_pre_c; 936 + double refcyc_per_req_delivery_l; 937 + double refcyc_per_req_delivery_c; 938 + 939 + unsigned int full_recout_width; 940 + double xfc_transfer_delay; 941 + double xfc_precharge_delay; 942 + double xfc_remote_surface_flip_latency; 943 + double xfc_dst_y_delta_drq_limit; 944 + double xfc_prefetch_margin; 945 + double refcyc_per_req_delivery_pre_cur0; 946 + double refcyc_per_req_delivery_cur0; 947 + double refcyc_per_req_delivery_pre_cur1; 948 + double refcyc_per_req_delivery_cur1; 949 + 950 + memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); 951 + memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); 952 + 953 + dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en); 954 + dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en); 955 + 956 + dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz); 957 + dml_print("DML_DLG: %s: dispclk_freq_in_mhz = %3.2f\n", __func__, dispclk_freq_in_mhz); 958 + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); 959 + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); 960 + dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); 961 + ASSERT(ref_freq_to_pix_freq < 4.0); 962 + 963 + disp_dlg_regs->ref_freq_to_pix_freq = 964 + (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19)); 965 + disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal 966 + * dml_pow(2, 8)); 967 + disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits 968 + disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end 969 + * (double) ref_freq_to_pix_freq); 970 + ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13)); 971 + 972 + min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz; 973 + t_calc_us = get_tcalc(mode_lib, e2e_pipe_param, num_pipes); 974 + min_ttu_vblank = get_min_ttu_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); 975 + 976 + min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal; 977 + dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; 978 + 979 + disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2)); 980 + ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18)); 981 + 982 + dml_print( 983 + "DML_DLG: %s: min_dcfclk_mhz = %3.2f\n", 984 + __func__, 985 + min_dcfclk_mhz); 986 + dml_print( 987 + "DML_DLG: %s: min_ttu_vblank = %3.2f\n", 988 + __func__, 989 + min_ttu_vblank); 990 + dml_print( 991 + "DML_DLG: %s: min_dst_y_ttu_vblank = %3.2f\n", 992 + __func__, 993 + min_dst_y_ttu_vblank); 994 + dml_print( 995 + "DML_DLG: %s: t_calc_us = %3.2f\n", 996 + __func__, 997 + t_calc_us); 998 + dml_print( 999 + "DML_DLG: %s: disp_dlg_regs->min_dst_y_next_start = 0x%0x\n", 1000 + __func__, 1001 + disp_dlg_regs->min_dst_y_next_start); 1002 + dml_print( 1003 + "DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", 1004 + __func__, 1005 + ref_freq_to_pix_freq); 1006 + 1007 + // ------------------------- 1008 + // Section 1.15.2.2: Prefetch, Active and TTU 1009 + // ------------------------- 1010 + // Prefetch Calc 1011 + // Source 1012 + // dcc_en = src.dcc; 1013 + dual_plane = is_dual_plane((enum source_format_class) (src->source_format)); 1014 + mode_422 = 0; // FIXME 1015 + access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed 1016 + // bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0); 1017 + // bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1); 1018 + vp_height_l = src->viewport_height; 1019 + vp_width_l = src->viewport_width; 1020 + vp_height_c = src->viewport_height_c; 1021 + vp_width_c = src->viewport_width_c; 1022 + 1023 + // Scaling 1024 + htaps_l = taps->htaps; 1025 + htaps_c = taps->htaps_c; 1026 + hratio_l = scl->hscl_ratio; 1027 + hratio_c = scl->hscl_ratio_c; 1028 + vratio_l = scl->vscl_ratio; 1029 + vratio_c = scl->vscl_ratio_c; 1030 + scl_enable = scl->scl_enable; 1031 + 1032 + line_time_in_us = (htotal / pclk_freq_in_mhz); 1033 + swath_width_ub_l = rq_dlg_param.rq_l.swath_width_ub; 1034 + dpte_groups_per_row_ub_l = rq_dlg_param.rq_l.dpte_groups_per_row_ub; 1035 + swath_width_ub_c = rq_dlg_param.rq_c.swath_width_ub; 1036 + dpte_groups_per_row_ub_c = rq_dlg_param.rq_c.dpte_groups_per_row_ub; 1037 + 1038 + meta_chunks_per_row_ub_l = rq_dlg_param.rq_l.meta_chunks_per_row_ub; 1039 + meta_chunks_per_row_ub_c = rq_dlg_param.rq_c.meta_chunks_per_row_ub; 1040 + vupdate_offset = dst->vupdate_offset; 1041 + vupdate_width = dst->vupdate_width; 1042 + vready_offset = dst->vready_offset; 1043 + 1044 + dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal; 1045 + dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal; 1046 + 1047 + if (scl_enable) 1048 + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl; 1049 + else 1050 + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only; 1051 + 1052 + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter 1053 + + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor; 1054 + 1055 + if (dout->dsc_enable) { 1056 + double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); 1057 + 1058 + dispclk_delay_subtotal += dsc_delay; 1059 + } 1060 + 1061 + pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz 1062 + + dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz; 1063 + 1064 + vstartup_start = dst->vstartup_start; 1065 + if (interlaced) { 1066 + if (vstartup_start / 2.0 1067 + - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal 1068 + <= vblank_end / 2.0) 1069 + disp_dlg_regs->vready_after_vcount0 = 1; 1070 + else 1071 + disp_dlg_regs->vready_after_vcount0 = 0; 1072 + } else { 1073 + if (vstartup_start 1074 + - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal 1075 + <= vblank_end) 1076 + disp_dlg_regs->vready_after_vcount0 = 1; 1077 + else 1078 + disp_dlg_regs->vready_after_vcount0 = 0; 1079 + } 1080 + 1081 + // TODO: Where is this coming from? 1082 + if (interlaced) 1083 + vstartup_start = vstartup_start / 2; 1084 + 1085 + // TODO: What if this min_vblank doesn't match the value in the dml_config_settings.cpp? 1086 + if (vstartup_start >= min_vblank) { 1087 + dml_print( 1088 + "WARNING: DML_DLG: %s: vblank_start=%d vblank_end=%d\n", 1089 + __func__, 1090 + vblank_start, 1091 + vblank_end); 1092 + dml_print( 1093 + "WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", 1094 + __func__, 1095 + vstartup_start, 1096 + min_vblank); 1097 + min_vblank = vstartup_start + 1; 1098 + dml_print( 1099 + "WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", 1100 + __func__, 1101 + vstartup_start, 1102 + min_vblank); 1103 + } 1104 + 1105 + dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); 1106 + dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); 1107 + 1108 + dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal); 1109 + dml_print( 1110 + "DML_DLG: %s: pixel_rate_delay_subtotal = %d\n", 1111 + __func__, 1112 + pixel_rate_delay_subtotal); 1113 + dml_print( 1114 + "DML_DLG: %s: dst_x_after_scaler = %d\n", 1115 + __func__, 1116 + dst_x_after_scaler); 1117 + dml_print( 1118 + "DML_DLG: %s: dst_y_after_scaler = %d\n", 1119 + __func__, 1120 + dst_y_after_scaler); 1121 + 1122 + // Lwait 1123 + // TODO: Should this be urgent_latency_pixel_mixed_with_vm_data_us? 1124 + line_wait = mode_lib->soc.urgent_latency_pixel_data_only_us; 1125 + if (cstate_en) 1126 + line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait); 1127 + if (pstate_en) 1128 + line_wait = dml_max( 1129 + mode_lib->soc.dram_clock_change_latency_us 1130 + + mode_lib->soc.urgent_latency_pixel_data_only_us, // TODO: Should this be urgent_latency_pixel_mixed_with_vm_data_us? 1131 + line_wait); 1132 + line_wait = line_wait / line_time_in_us; 1133 + 1134 + dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); 1135 + dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); 1136 + 1137 + dst_y_per_vm_vblank = get_dst_y_per_vm_vblank( 1138 + mode_lib, 1139 + e2e_pipe_param, 1140 + num_pipes, 1141 + pipe_idx); 1142 + dst_y_per_row_vblank = get_dst_y_per_row_vblank( 1143 + mode_lib, 1144 + e2e_pipe_param, 1145 + num_pipes, 1146 + pipe_idx); 1147 + dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); 1148 + dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); 1149 + 1150 + max_dst_y_per_vm_vblank = 32.0; 1151 + max_dst_y_per_row_vblank = 16.0; 1152 + 1153 + // magic! 1154 + if (htotal <= 75) { 1155 + min_vblank = 300; 1156 + max_dst_y_per_vm_vblank = 100.0; 1157 + max_dst_y_per_row_vblank = 100.0; 1158 + } 1159 + 1160 + dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip); 1161 + dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip); 1162 + dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); 1163 + dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); 1164 + 1165 + ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank); 1166 + ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank); 1167 + 1168 + ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); 1169 + lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank); 1170 + 1171 + dml_print("DML_DLG: %s: lsw = %3.2f\n", __func__, lsw); 1172 + 1173 + vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); 1174 + vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); 1175 + 1176 + dml_print("DML_DLG: %s: vratio_pre_l=%3.2f\n", __func__, vratio_pre_l); 1177 + dml_print("DML_DLG: %s: vratio_pre_c=%3.2f\n", __func__, vratio_pre_c); 1178 + 1179 + // Active 1180 + req_per_swath_ub_l = rq_dlg_param.rq_l.req_per_swath_ub; 1181 + req_per_swath_ub_c = rq_dlg_param.rq_c.req_per_swath_ub; 1182 + meta_row_height_l = rq_dlg_param.rq_l.meta_row_height; 1183 + meta_row_height_c = rq_dlg_param.rq_c.meta_row_height; 1184 + swath_width_pixels_ub_l = 0; 1185 + swath_width_pixels_ub_c = 0; 1186 + scaler_rec_in_width_l = 0; 1187 + scaler_rec_in_width_c = 0; 1188 + dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height; 1189 + dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height; 1190 + 1191 + if (mode_422) { 1192 + swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element 1193 + swath_width_pixels_ub_c = swath_width_ub_c * 2; 1194 + } else { 1195 + swath_width_pixels_ub_l = swath_width_ub_l * 1; 1196 + swath_width_pixels_ub_c = swath_width_ub_c * 1; 1197 + } 1198 + 1199 + hscale_pixel_rate_l = 0.; 1200 + hscale_pixel_rate_c = 0.; 1201 + min_hratio_fact_l = 1.0; 1202 + min_hratio_fact_c = 1.0; 1203 + 1204 + if (htaps_l <= 1) 1205 + min_hratio_fact_l = 2.0; 1206 + else if (htaps_l <= 6) { 1207 + if ((hratio_l * 2.0) > 4.0) 1208 + min_hratio_fact_l = 4.0; 1209 + else 1210 + min_hratio_fact_l = hratio_l * 2.0; 1211 + } else { 1212 + if (hratio_l > 4.0) 1213 + min_hratio_fact_l = 4.0; 1214 + else 1215 + min_hratio_fact_l = hratio_l; 1216 + } 1217 + 1218 + hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz; 1219 + 1220 + if (htaps_c <= 1) 1221 + min_hratio_fact_c = 2.0; 1222 + else if (htaps_c <= 6) { 1223 + if ((hratio_c * 2.0) > 4.0) 1224 + min_hratio_fact_c = 4.0; 1225 + else 1226 + min_hratio_fact_c = hratio_c * 2.0; 1227 + } else { 1228 + if (hratio_c > 4.0) 1229 + min_hratio_fact_c = 4.0; 1230 + else 1231 + min_hratio_fact_c = hratio_c; 1232 + } 1233 + 1234 + hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz; 1235 + 1236 + refcyc_per_line_delivery_pre_l = 0.; 1237 + refcyc_per_line_delivery_pre_c = 0.; 1238 + refcyc_per_line_delivery_l = 0.; 1239 + refcyc_per_line_delivery_c = 0.; 1240 + 1241 + refcyc_per_req_delivery_pre_l = 0.; 1242 + refcyc_per_req_delivery_pre_c = 0.; 1243 + refcyc_per_req_delivery_l = 0.; 1244 + refcyc_per_req_delivery_c = 0.; 1245 + 1246 + full_recout_width = 0; 1247 + // In ODM 1248 + if (src->is_hsplit) { 1249 + // This "hack" is only allowed (and valid) for MPC combine. In ODM 1250 + // combine, you MUST specify the full_recout_width...according to Oswin 1251 + if (dst->full_recout_width == 0 && !dst->odm_combine) { 1252 + dml_print( 1253 + "DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n", 1254 + __func__); 1255 + full_recout_width = dst->recout_width * 2; // assume half split for dcn1 1256 + } else 1257 + full_recout_width = dst->full_recout_width; 1258 + } else 1259 + full_recout_width = dst->recout_width; 1260 + 1261 + // As of DCN2, mpc_combine and odm_combine are mutually exclusive 1262 + refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery( 1263 + mode_lib, 1264 + refclk_freq_in_mhz, 1265 + pclk_freq_in_mhz, 1266 + dst->odm_combine, 1267 + full_recout_width, 1268 + dst->hactive, 1269 + vratio_pre_l, 1270 + hscale_pixel_rate_l, 1271 + swath_width_pixels_ub_l, 1272 + 1); // per line 1273 + 1274 + refcyc_per_line_delivery_l = get_refcyc_per_delivery( 1275 + mode_lib, 1276 + refclk_freq_in_mhz, 1277 + pclk_freq_in_mhz, 1278 + dst->odm_combine, 1279 + full_recout_width, 1280 + dst->hactive, 1281 + vratio_l, 1282 + hscale_pixel_rate_l, 1283 + swath_width_pixels_ub_l, 1284 + 1); // per line 1285 + 1286 + dml_print("DML_DLG: %s: full_recout_width = %d\n", __func__, full_recout_width); 1287 + dml_print( 1288 + "DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", 1289 + __func__, 1290 + hscale_pixel_rate_l); 1291 + dml_print( 1292 + "DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", 1293 + __func__, 1294 + refcyc_per_line_delivery_pre_l); 1295 + dml_print( 1296 + "DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", 1297 + __func__, 1298 + refcyc_per_line_delivery_l); 1299 + 1300 + if (dual_plane) { 1301 + refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery( 1302 + mode_lib, 1303 + refclk_freq_in_mhz, 1304 + pclk_freq_in_mhz, 1305 + dst->odm_combine, 1306 + full_recout_width, 1307 + dst->hactive, 1308 + vratio_pre_c, 1309 + hscale_pixel_rate_c, 1310 + swath_width_pixels_ub_c, 1311 + 1); // per line 1312 + 1313 + refcyc_per_line_delivery_c = get_refcyc_per_delivery( 1314 + mode_lib, 1315 + refclk_freq_in_mhz, 1316 + pclk_freq_in_mhz, 1317 + dst->odm_combine, 1318 + full_recout_width, 1319 + dst->hactive, 1320 + vratio_c, 1321 + hscale_pixel_rate_c, 1322 + swath_width_pixels_ub_c, 1323 + 1); // per line 1324 + 1325 + dml_print( 1326 + "DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", 1327 + __func__, 1328 + refcyc_per_line_delivery_pre_c); 1329 + dml_print( 1330 + "DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", 1331 + __func__, 1332 + refcyc_per_line_delivery_c); 1333 + } 1334 + 1335 + // TTU - Luma / Chroma 1336 + if (access_dir) { // vertical access 1337 + scaler_rec_in_width_l = vp_height_l; 1338 + scaler_rec_in_width_c = vp_height_c; 1339 + } else { 1340 + scaler_rec_in_width_l = vp_width_l; 1341 + scaler_rec_in_width_c = vp_width_c; 1342 + } 1343 + 1344 + refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery( 1345 + mode_lib, 1346 + refclk_freq_in_mhz, 1347 + pclk_freq_in_mhz, 1348 + dst->odm_combine, 1349 + full_recout_width, 1350 + dst->hactive, 1351 + vratio_pre_l, 1352 + hscale_pixel_rate_l, 1353 + scaler_rec_in_width_l, 1354 + req_per_swath_ub_l); // per req 1355 + refcyc_per_req_delivery_l = get_refcyc_per_delivery( 1356 + mode_lib, 1357 + refclk_freq_in_mhz, 1358 + pclk_freq_in_mhz, 1359 + dst->odm_combine, 1360 + full_recout_width, 1361 + dst->hactive, 1362 + vratio_l, 1363 + hscale_pixel_rate_l, 1364 + scaler_rec_in_width_l, 1365 + req_per_swath_ub_l); // per req 1366 + 1367 + dml_print( 1368 + "DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", 1369 + __func__, 1370 + refcyc_per_req_delivery_pre_l); 1371 + dml_print( 1372 + "DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", 1373 + __func__, 1374 + refcyc_per_req_delivery_l); 1375 + 1376 + ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); 1377 + ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); 1378 + 1379 + if (dual_plane) { 1380 + refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery( 1381 + mode_lib, 1382 + refclk_freq_in_mhz, 1383 + pclk_freq_in_mhz, 1384 + dst->odm_combine, 1385 + full_recout_width, 1386 + dst->hactive, 1387 + vratio_pre_c, 1388 + hscale_pixel_rate_c, 1389 + scaler_rec_in_width_c, 1390 + req_per_swath_ub_c); // per req 1391 + refcyc_per_req_delivery_c = get_refcyc_per_delivery( 1392 + mode_lib, 1393 + refclk_freq_in_mhz, 1394 + pclk_freq_in_mhz, 1395 + dst->odm_combine, 1396 + full_recout_width, 1397 + dst->hactive, 1398 + vratio_c, 1399 + hscale_pixel_rate_c, 1400 + scaler_rec_in_width_c, 1401 + req_per_swath_ub_c); // per req 1402 + 1403 + dml_print( 1404 + "DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", 1405 + __func__, 1406 + refcyc_per_req_delivery_pre_c); 1407 + dml_print( 1408 + "DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", 1409 + __func__, 1410 + refcyc_per_req_delivery_c); 1411 + 1412 + ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); 1413 + ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); 1414 + } 1415 + 1416 + // XFC 1417 + xfc_transfer_delay = get_xfc_transfer_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); 1418 + xfc_precharge_delay = get_xfc_precharge_delay( 1419 + mode_lib, 1420 + e2e_pipe_param, 1421 + num_pipes, 1422 + pipe_idx); 1423 + xfc_remote_surface_flip_latency = get_xfc_remote_surface_flip_latency( 1424 + mode_lib, 1425 + e2e_pipe_param, 1426 + num_pipes, 1427 + pipe_idx); 1428 + xfc_dst_y_delta_drq_limit = xfc_remote_surface_flip_latency; 1429 + xfc_prefetch_margin = get_xfc_prefetch_margin( 1430 + mode_lib, 1431 + e2e_pipe_param, 1432 + num_pipes, 1433 + pipe_idx); 1434 + 1435 + // TTU - Cursor 1436 + refcyc_per_req_delivery_pre_cur0 = 0.0; 1437 + refcyc_per_req_delivery_cur0 = 0.0; 1438 + if (src->num_cursors > 0) { 1439 + calculate_ttu_cursor( 1440 + mode_lib, 1441 + &refcyc_per_req_delivery_pre_cur0, 1442 + &refcyc_per_req_delivery_cur0, 1443 + refclk_freq_in_mhz, 1444 + ref_freq_to_pix_freq, 1445 + hscale_pixel_rate_l, 1446 + scl->hscl_ratio, 1447 + vratio_pre_l, 1448 + vratio_l, 1449 + src->cur0_src_width, 1450 + (enum cursor_bpp) (src->cur0_bpp)); 1451 + } 1452 + 1453 + refcyc_per_req_delivery_pre_cur1 = 0.0; 1454 + refcyc_per_req_delivery_cur1 = 0.0; 1455 + if (src->num_cursors > 1) { 1456 + calculate_ttu_cursor( 1457 + mode_lib, 1458 + &refcyc_per_req_delivery_pre_cur1, 1459 + &refcyc_per_req_delivery_cur1, 1460 + refclk_freq_in_mhz, 1461 + ref_freq_to_pix_freq, 1462 + hscale_pixel_rate_l, 1463 + scl->hscl_ratio, 1464 + vratio_pre_l, 1465 + vratio_l, 1466 + src->cur1_src_width, 1467 + (enum cursor_bpp) (src->cur1_bpp)); 1468 + } 1469 + 1470 + // TTU - Misc 1471 + // all hard-coded 1472 + 1473 + // Assignment to register structures 1474 + disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line 1475 + disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk 1476 + ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13)); 1477 + disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2)); 1478 + disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2)); 1479 + disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2)); 1480 + disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2)); 1481 + disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2)); 1482 + 1483 + disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19)); 1484 + disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19)); 1485 + 1486 + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); 1487 + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); 1488 + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); 1489 + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); 1490 + 1491 + disp_dlg_regs->refcyc_per_pte_group_vblank_l = 1492 + (unsigned int) (dst_y_per_row_vblank * (double) htotal 1493 + * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l); 1494 + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13)); 1495 + 1496 + if (dual_plane) { 1497 + disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank 1498 + * (double) htotal * ref_freq_to_pix_freq 1499 + / (double) dpte_groups_per_row_ub_c); 1500 + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c 1501 + < (unsigned int)dml_pow(2, 13)); 1502 + } 1503 + 1504 + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = 1505 + (unsigned int) (dst_y_per_row_vblank * (double) htotal 1506 + * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l); 1507 + ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13)); 1508 + 1509 + disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = 1510 + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now 1511 + 1512 + disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal 1513 + * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l; 1514 + disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal 1515 + * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l; 1516 + 1517 + if (dual_plane) { 1518 + disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip 1519 + * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c; 1520 + disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip 1521 + * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c; 1522 + } 1523 + 1524 + disp_dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; 1525 + disp_dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; 1526 + disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;; 1527 + disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;; 1528 + 1529 + // Clamp to max for now 1530 + if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)dml_pow(2, 23)) 1531 + disp_dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1; 1532 + 1533 + if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)dml_pow(2, 23)) 1534 + disp_dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1; 1535 + 1536 + if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)dml_pow(2, 23)) 1537 + disp_dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1; 1538 + 1539 + if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)dml_pow(2, 23)) 1540 + disp_dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1; 1541 + disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l 1542 + / (double) vratio_l * dml_pow(2, 2)); 1543 + ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17)); 1544 + 1545 + if (dual_plane) { 1546 + disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c 1547 + / (double) vratio_c * dml_pow(2, 2)); 1548 + if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) { 1549 + dml_print( 1550 + "DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n", 1551 + __func__, 1552 + disp_dlg_regs->dst_y_per_pte_row_nom_c, 1553 + (unsigned int)dml_pow(2, 17) - 1); 1554 + } 1555 + } 1556 + 1557 + disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l 1558 + / (double) vratio_l * dml_pow(2, 2)); 1559 + ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17)); 1560 + 1561 + disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; // TODO: dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now 1562 + 1563 + dml_print( 1564 + "DML: Trow: %fus\n", 1565 + line_time_in_us * (double)dpte_row_height_l / (double)vratio_l); 1566 + 1567 + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l 1568 + / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq 1569 + / (double) dpte_groups_per_row_ub_l); 1570 + if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23)) 1571 + disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1; 1572 + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l 1573 + / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq 1574 + / (double) meta_chunks_per_row_ub_l); 1575 + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23)) 1576 + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1; 1577 + 1578 + if (dual_plane) { 1579 + disp_dlg_regs->refcyc_per_pte_group_nom_c = 1580 + (unsigned int) ((double) dpte_row_height_c / (double) vratio_c 1581 + * (double) htotal * ref_freq_to_pix_freq 1582 + / (double) dpte_groups_per_row_ub_c); 1583 + if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23)) 1584 + disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1; 1585 + 1586 + // TODO: Is this the right calculation? Does htotal need to be halved? 1587 + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = 1588 + (unsigned int) ((double) meta_row_height_c / (double) vratio_c 1589 + * (double) htotal * ref_freq_to_pix_freq 1590 + / (double) meta_chunks_per_row_ub_c); 1591 + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23)) 1592 + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1; 1593 + } 1594 + 1595 + disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor( 1596 + refcyc_per_line_delivery_pre_l, 1); 1597 + disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor( 1598 + refcyc_per_line_delivery_l, 1); 1599 + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13)); 1600 + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13)); 1601 + 1602 + disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor( 1603 + refcyc_per_line_delivery_pre_c, 1); 1604 + disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor( 1605 + refcyc_per_line_delivery_c, 1); 1606 + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13)); 1607 + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13)); 1608 + 1609 + disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; 1610 + disp_dlg_regs->dst_y_offset_cur0 = 0; 1611 + disp_dlg_regs->chunk_hdl_adjust_cur1 = 3; 1612 + disp_dlg_regs->dst_y_offset_cur1 = 0; 1613 + 1614 + disp_dlg_regs->xfc_reg_transfer_delay = xfc_transfer_delay; 1615 + disp_dlg_regs->xfc_reg_precharge_delay = xfc_precharge_delay; 1616 + disp_dlg_regs->xfc_reg_remote_surface_flip_latency = xfc_remote_surface_flip_latency; 1617 + disp_dlg_regs->xfc_reg_prefetch_margin = dml_ceil( 1618 + xfc_prefetch_margin * refclk_freq_in_mhz, 1); 1619 + 1620 + // slave has to have this value also set to off 1621 + if (src->xfc_enable && !src->xfc_slave) 1622 + disp_dlg_regs->dst_y_delta_drq_limit = dml_ceil(xfc_dst_y_delta_drq_limit, 1); 1623 + else 1624 + disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off 1625 + 1626 + disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l 1627 + * dml_pow(2, 10)); 1628 + disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l 1629 + * dml_pow(2, 10)); 1630 + disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c 1631 + * dml_pow(2, 10)); 1632 + disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c 1633 + * dml_pow(2, 10)); 1634 + disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = 1635 + (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); 1636 + disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 1637 + * dml_pow(2, 10)); 1638 + disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = 1639 + (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10)); 1640 + disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1 1641 + * dml_pow(2, 10)); 1642 + disp_ttu_regs->qos_level_low_wm = 0; 1643 + ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14)); 1644 + disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal 1645 + * ref_freq_to_pix_freq); 1646 + ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14)); 1647 + 1648 + disp_ttu_regs->qos_level_flip = 14; 1649 + disp_ttu_regs->qos_level_fixed_l = 8; 1650 + disp_ttu_regs->qos_level_fixed_c = 8; 1651 + disp_ttu_regs->qos_level_fixed_cur0 = 8; 1652 + disp_ttu_regs->qos_ramp_disable_l = 0; 1653 + disp_ttu_regs->qos_ramp_disable_c = 0; 1654 + disp_ttu_regs->qos_ramp_disable_cur0 = 0; 1655 + 1656 + disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz; 1657 + ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24)); 1658 + 1659 + print__ttu_regs_st(mode_lib, *disp_ttu_regs); 1660 + print__dlg_regs_st(mode_lib, *disp_dlg_regs); 1661 + } 1662 + 1663 + void dml21_rq_dlg_get_dlg_reg( 1664 + struct display_mode_lib *mode_lib, 1665 + display_dlg_regs_st *dlg_regs, 1666 + display_ttu_regs_st *ttu_regs, 1667 + display_e2e_pipe_params_st *e2e_pipe_param, 1668 + const unsigned int num_pipes, 1669 + const unsigned int pipe_idx, 1670 + const bool cstate_en, 1671 + const bool pstate_en, 1672 + const bool vm_en, 1673 + const bool ignore_viewport_pos, 1674 + const bool immediate_flip_support) 1675 + { 1676 + display_rq_params_st rq_param = {0}; 1677 + display_dlg_sys_params_st dlg_sys_param = {0}; 1678 + 1679 + // Get watermark and Tex. 1680 + dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes); 1681 + dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep( 1682 + mode_lib, 1683 + e2e_pipe_param, 1684 + num_pipes); 1685 + dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes); 1686 + dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes); 1687 + dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes); 1688 + dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes); 1689 + dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw( 1690 + mode_lib, 1691 + e2e_pipe_param, 1692 + num_pipes); 1693 + dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes( 1694 + mode_lib, 1695 + e2e_pipe_param, 1696 + num_pipes); 1697 + dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency 1698 + / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated 1699 + 1700 + print__dlg_sys_params_st(mode_lib, dlg_sys_param); 1701 + 1702 + // system parameter calculation done 1703 + 1704 + dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx); 1705 + dml_rq_dlg_get_rq_params(mode_lib, &rq_param, e2e_pipe_param[pipe_idx].pipe); 1706 + dml_rq_dlg_get_dlg_params( 1707 + mode_lib, 1708 + e2e_pipe_param, 1709 + num_pipes, 1710 + pipe_idx, 1711 + dlg_regs, 1712 + ttu_regs, 1713 + rq_param.dlg, 1714 + dlg_sys_param, 1715 + cstate_en, 1716 + pstate_en); 1717 + dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx); 1718 + } 1719 + 1720 + void dml_rq_dlg_get_arb_params(struct display_mode_lib *mode_lib, display_arb_params_st *arb_param) 1721 + { 1722 + memset(arb_param, 0, sizeof(*arb_param)); 1723 + arb_param->max_req_outstanding = 256; 1724 + arb_param->min_req_outstanding = 68; 1725 + arb_param->sat_level_us = 60; 1726 + } 1727 + 1728 + static void calculate_ttu_cursor( 1729 + struct display_mode_lib *mode_lib, 1730 + double *refcyc_per_req_delivery_pre_cur, 1731 + double *refcyc_per_req_delivery_cur, 1732 + double refclk_freq_in_mhz, 1733 + double ref_freq_to_pix_freq, 1734 + double hscale_pixel_rate_l, 1735 + double hscl_ratio, 1736 + double vratio_pre_l, 1737 + double vratio_l, 1738 + unsigned int cur_width, 1739 + enum cursor_bpp cur_bpp) 1740 + { 1741 + unsigned int cur_src_width = cur_width; 1742 + unsigned int cur_req_size = 0; 1743 + unsigned int cur_req_width = 0; 1744 + double cur_width_ub = 0.0; 1745 + double cur_req_per_width = 0.0; 1746 + double hactive_cur = 0.0; 1747 + 1748 + ASSERT(cur_src_width <= 256); 1749 + 1750 + *refcyc_per_req_delivery_pre_cur = 0.0; 1751 + *refcyc_per_req_delivery_cur = 0.0; 1752 + if (cur_src_width > 0) { 1753 + unsigned int cur_bit_per_pixel = 0; 1754 + 1755 + if (cur_bpp == dm_cur_2bit) { 1756 + cur_req_size = 64; // byte 1757 + cur_bit_per_pixel = 2; 1758 + } else { // 32bit 1759 + cur_bit_per_pixel = 32; 1760 + if (cur_src_width >= 1 && cur_src_width <= 16) 1761 + cur_req_size = 64; 1762 + else if (cur_src_width >= 17 && cur_src_width <= 31) 1763 + cur_req_size = 128; 1764 + else 1765 + cur_req_size = 256; 1766 + } 1767 + 1768 + cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0); 1769 + cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) 1770 + * (double) cur_req_width; 1771 + cur_req_per_width = cur_width_ub / (double) cur_req_width; 1772 + hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor 1773 + 1774 + if (vratio_pre_l <= 1.0) { 1775 + *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq 1776 + / (double) cur_req_per_width; 1777 + } else { 1778 + *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz 1779 + * (double) cur_src_width / hscale_pixel_rate_l 1780 + / (double) cur_req_per_width; 1781 + } 1782 + 1783 + ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13)); 1784 + 1785 + if (vratio_l <= 1.0) { 1786 + *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq 1787 + / (double) cur_req_per_width; 1788 + } else { 1789 + *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz 1790 + * (double) cur_src_width / hscale_pixel_rate_l 1791 + / (double) cur_req_per_width; 1792 + } 1793 + 1794 + dml_print( 1795 + "DML_DLG: %s: cur_req_width = %d\n", 1796 + __func__, 1797 + cur_req_width); 1798 + dml_print( 1799 + "DML_DLG: %s: cur_width_ub = %3.2f\n", 1800 + __func__, 1801 + cur_width_ub); 1802 + dml_print( 1803 + "DML_DLG: %s: cur_req_per_width = %3.2f\n", 1804 + __func__, 1805 + cur_req_per_width); 1806 + dml_print( 1807 + "DML_DLG: %s: hactive_cur = %3.2f\n", 1808 + __func__, 1809 + hactive_cur); 1810 + dml_print( 1811 + "DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n", 1812 + __func__, 1813 + *refcyc_per_req_delivery_pre_cur); 1814 + dml_print( 1815 + "DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n", 1816 + __func__, 1817 + *refcyc_per_req_delivery_cur); 1818 + 1819 + ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13)); 1820 + } 1821 + } 1822 + 1823 + #endif
+73
drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h
··· 1 + /* 2 + * Copyright 2017 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + * Authors: AMD 23 + * 24 + */ 25 + 26 + #ifndef __DML21_DISPLAY_RQ_DLG_CALC_H__ 27 + #define __DML21_DISPLAY_RQ_DLG_CALC_H__ 28 + 29 + #include "../dml_common_defs.h" 30 + #include "../display_rq_dlg_helpers.h" 31 + 32 + struct display_mode_lib; 33 + 34 + 35 + // Function: dml_rq_dlg_get_rq_reg 36 + // Main entry point for test to get the register values out of this DML class. 37 + // This function calls <get_rq_param> and <extract_rq_regs> functions to calculate 38 + // and then populate the rq_regs struct 39 + // Input: 40 + // pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) 41 + // Output: 42 + // rq_regs - struct that holds all the RQ registers field value. 43 + // See also: <display_rq_regs_st> 44 + void dml21_rq_dlg_get_rq_reg( 45 + struct display_mode_lib *mode_lib, 46 + display_rq_regs_st *rq_regs, 47 + const display_pipe_params_st pipe_param); 48 + 49 + // Function: dml_rq_dlg_get_dlg_reg 50 + // Calculate and return DLG and TTU register struct given the system setting 51 + // Output: 52 + // dlg_regs - output DLG register struct 53 + // ttu_regs - output DLG TTU register struct 54 + // Input: 55 + // e2e_pipe_param - "compacted" array of e2e pipe param struct 56 + // num_pipes - num of active "pipe" or "route" 57 + // pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg 58 + // cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered. 59 + // Added for legacy or unrealistic timing tests. 60 + void dml21_rq_dlg_get_dlg_reg( 61 + struct display_mode_lib *mode_lib, 62 + display_dlg_regs_st *dlg_regs, 63 + display_ttu_regs_st *ttu_regs, 64 + display_e2e_pipe_params_st *e2e_pipe_param, 65 + const unsigned int num_pipes, 66 + const unsigned int pipe_idx, 67 + const bool cstate_en, 68 + const bool pstate_en, 69 + const bool vm_en, 70 + const bool ignore_viewport_pos, 71 + const bool immediate_flip_support); 72 + 73 + #endif
+3
drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
··· 38 38 DML_PROJECT_NAVI10, 39 39 DML_PROJECT_NAVI10v2, 40 40 #endif 41 + #ifdef CONFIG_DRM_AMD_DC_DCN2_1 42 + DML_PROJECT_DCN21, 43 + #endif 41 44 }; 42 45 43 46 struct display_mode_lib;