Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amd/display: reduce stack for dml32_CalculatePrefetchSchedule

Move stack variables to dummy structure.

Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>

+227 -211
+4 -1
drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
··· 757 757 v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k]; 758 758 v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k]; 759 759 v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; 760 - v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, 760 + v->ErrorResult[k] = dml32_CalculatePrefetchSchedule( 761 + &v->dummy_vars.dml32_CalculatePrefetchSchedule, 762 + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, 761 763 &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k], 762 764 mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, 763 765 mode_lib->vba.DPPCLKDelaySCL, ··· 3197 3195 3198 3196 mode_lib->vba.NoTimeForPrefetch[i][j][k] = 3199 3197 dml32_CalculatePrefetchSchedule( 3198 + &v->dummy_vars.dml32_CalculatePrefetchSchedule, 3200 3199 v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, 3201 3200 &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe, 3202 3201 mode_lib->vba.DSCDelayPerState[i][k],
+184 -210
drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
··· 3342 3342 } // CalculateExtraLatency 3343 3343 3344 3344 bool dml32_CalculatePrefetchSchedule( 3345 + struct dml32_CalculatePrefetchSchedule *st_vars, 3345 3346 double HostVMInefficiencyFactor, 3346 3347 DmlPipe *myPipe, 3347 3348 unsigned int DSCDelay, ··· 3406 3405 double *VReadyOffsetPix) 3407 3406 { 3408 3407 bool MyError = false; 3409 - unsigned int DPPCycles, DISPCLKCycles; 3410 - double DSTTotalPixelsAfterScaler; 3411 - double LineTime; 3412 - double dst_y_prefetch_equ; 3413 - double prefetch_bw_oto; 3414 - double Tvm_oto; 3415 - double Tr0_oto; 3416 - double Tvm_oto_lines; 3417 - double Tr0_oto_lines; 3418 - double dst_y_prefetch_oto; 3419 - double TimeForFetchingMetaPTE = 0; 3420 - double TimeForFetchingRowInVBlank = 0; 3421 - double LinesToRequestPrefetchPixelData = 0; 3422 - unsigned int HostVMDynamicLevelsTrips; 3423 - double trip_to_mem; 3424 - double Tvm_trips; 3425 - double Tr0_trips; 3426 - double Tvm_trips_rounded; 3427 - double Tr0_trips_rounded; 3428 - double Lsw_oto; 3429 - double Tpre_rounded; 3430 - double prefetch_bw_equ; 3431 - double Tvm_equ; 3432 - double Tr0_equ; 3433 - double Tdmbf; 3434 - double Tdmec; 3435 - double Tdmsks; 3436 - double prefetch_sw_bytes; 3437 - double bytes_pp; 3438 - double dep_bytes; 3439 - unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__; 3440 - double min_Lsw; 3441 - double Tsw_est1 = 0; 3442 - double Tsw_est3 = 0; 3408 + 3409 + st_vars->TimeForFetchingMetaPTE = 0; 3410 + st_vars->TimeForFetchingRowInVBlank = 0; 3411 + st_vars->LinesToRequestPrefetchPixelData = 0; 3412 + st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__; 3413 + st_vars->Tsw_est1 = 0; 3414 + st_vars->Tsw_est3 = 0; 3443 3415 3444 3416 if (GPUVMEnable == true && HostVMEnable == true) 3445 - HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 3417 + st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 3446 3418 else 3447 - HostVMDynamicLevelsTrips = 0; 3419 + st_vars->HostVMDynamicLevelsTrips = 0; 3448 3420 #ifdef __DML_VBA_DEBUG__ 3449 3421 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 3450 3422 dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels); ··· 3440 3466 TSetup, 3441 3467 3442 3468 /* output */ 3443 - &Tdmbf, 3444 - &Tdmec, 3445 - &Tdmsks, 3469 + &st_vars->Tdmbf, 3470 + &st_vars->Tdmec, 3471 + &st_vars->Tdmsks, 3446 3472 VUpdateOffsetPix, 3447 3473 VUpdateWidthPix, 3448 3474 VReadyOffsetPix); 3449 3475 3450 - LineTime = myPipe->HTotal / myPipe->PixelClock; 3451 - trip_to_mem = UrgentLatency; 3452 - Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 3476 + st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock; 3477 + st_vars->trip_to_mem = UrgentLatency; 3478 + st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); 3453 3479 3454 3480 if (DynamicMetadataVMEnabled == true) 3455 - *Tdmdl = TWait + Tvm_trips + trip_to_mem; 3481 + *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem; 3456 3482 else 3457 3483 *Tdmdl = TWait + UrgentExtraLatency; 3458 3484 ··· 3462 3488 #endif 3463 3489 3464 3490 if (DynamicMetadataEnable == true) { 3465 - if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 3491 + if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) { 3466 3492 *NotEnoughTimeForDynamicMetadata = true; 3467 3493 #ifdef __DML_VBA_DEBUG__ 3468 3494 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 3469 3495 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", 3470 - __func__, Tdmbf); 3471 - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3496 + __func__, st_vars->Tdmbf); 3497 + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); 3472 3498 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", 3473 - __func__, Tdmsks); 3499 + __func__, st_vars->Tdmsks); 3474 3500 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", 3475 3501 __func__, *Tdmdl); 3476 3502 #endif ··· 3482 3508 } 3483 3509 3484 3510 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && 3485 - GPUVMEnable == true ? TWait + Tvm_trips : 0); 3511 + GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0); 3486 3512 3487 3513 if (myPipe->ScalerEnabled) 3488 - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 3514 + st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 3489 3515 else 3490 - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 3516 + st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 3491 3517 3492 - DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 3518 + st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 3493 3519 3494 - DISPCLKCycles = DISPCLKDelaySubtotal; 3520 + st_vars->DISPCLKCycles = DISPCLKDelaySubtotal; 3495 3521 3496 3522 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) 3497 3523 return true; 3498 3524 3499 - *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * 3525 + *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles * 3500 3526 myPipe->PixelClock / myPipe->Dispclk + DSCDelay; 3501 3527 3502 3528 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) ··· 3506 3532 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); 3507 3533 3508 3534 #ifdef __DML_VBA_DEBUG__ 3509 - dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 3535 + dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles); 3510 3536 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 3511 3537 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); 3512 - dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 3538 + dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles); 3513 3539 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); 3514 3540 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 3515 3541 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); ··· 3522 3548 else 3523 3549 *DSTYAfterScaler = 0; 3524 3550 3525 - DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 3526 - *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 3527 - *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 3551 + st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 3552 + *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 3553 + *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 3528 3554 #ifdef __DML_VBA_DEBUG__ 3529 3555 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 3530 3556 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); ··· 3532 3558 3533 3559 MyError = false; 3534 3560 3535 - Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 3561 + st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1); 3536 3562 3537 3563 if (GPUVMEnable == true) { 3538 - Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; 3539 - Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3564 + st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; 3565 + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; 3540 3566 if (GPUVMPageTableLevels >= 3) { 3541 - *Tno_bw = UrgentExtraLatency + trip_to_mem * 3542 - (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); 3567 + *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem * 3568 + (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); 3543 3569 } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) { 3544 - Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / 3545 - 4.0 * LineTime; // VBA_ERROR 3570 + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) / 3571 + 4.0 * st_vars->LineTime; // VBA_ERROR 3546 3572 *Tno_bw = UrgentExtraLatency; 3547 3573 } else { 3548 3574 *Tno_bw = 0; 3549 3575 } 3550 3576 } else if (myPipe->DCCEnable == true) { 3551 - Tvm_trips_rounded = LineTime / 4.0; 3552 - Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3577 + st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; 3578 + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; 3553 3579 *Tno_bw = 0; 3554 3580 } else { 3555 - Tvm_trips_rounded = LineTime / 4.0; 3556 - Tr0_trips_rounded = LineTime / 2.0; 3581 + st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; 3582 + st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0; 3557 3583 *Tno_bw = 0; 3558 3584 } 3559 - Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); 3560 - Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); 3585 + st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0); 3586 + st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0); 3561 3587 3562 3588 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 3563 3589 || myPipe->SourcePixelFormat == dm_420_12) { 3564 - bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 3590 + st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 3565 3591 } else { 3566 - bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 3592 + st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 3567 3593 } 3568 3594 3569 - prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY 3595 + st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY 3570 3596 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 3571 - prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, 3572 - prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); 3597 + st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, 3598 + st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime)); 3573 3599 3574 - min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; 3575 - min_Lsw = dml_max(min_Lsw, 1.0); 3576 - Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; 3600 + st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre; 3601 + st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0); 3602 + st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0; 3577 3603 3578 3604 if (GPUVMEnable == true) { 3579 - Tvm_oto = dml_max3( 3580 - Tvm_trips, 3581 - *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, 3582 - LineTime / 4.0); 3605 + st_vars->Tvm_oto = dml_max3( 3606 + st_vars->Tvm_trips, 3607 + *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto, 3608 + st_vars->LineTime / 4.0); 3583 3609 } else 3584 - Tvm_oto = LineTime / 4.0; 3610 + st_vars->Tvm_oto = st_vars->LineTime / 4.0; 3585 3611 3586 3612 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 3587 - Tr0_oto = dml_max4( 3588 - Tr0_trips, 3589 - (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, 3590 - (LineTime - Tvm_oto)/2.0, 3591 - LineTime / 4.0); 3613 + st_vars->Tr0_oto = dml_max4( 3614 + st_vars->Tr0_trips, 3615 + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto, 3616 + (st_vars->LineTime - st_vars->Tvm_oto)/2.0, 3617 + st_vars->LineTime / 4.0); 3592 3618 #ifdef __DML_VBA_DEBUG__ 3593 3619 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, 3594 - (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); 3595 - dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); 3596 - dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); 3597 - dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); 3620 + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto); 3621 + dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips); 3622 + dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto); 3623 + dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4); 3598 3624 #endif 3599 3625 } else 3600 - Tr0_oto = (LineTime - Tvm_oto) / 2.0; 3626 + st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0; 3601 3627 3602 - Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 3603 - Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 3604 - dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 3628 + st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0; 3629 + st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0; 3630 + st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto; 3605 3631 3606 - dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - 3632 + st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime - 3607 3633 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); 3608 3634 3609 3635 #ifdef __DML_VBA_DEBUG__ 3610 3636 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); 3611 - dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); 3637 + dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw); 3612 3638 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); 3613 3639 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); 3614 - dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); 3640 + dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem); 3615 3641 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3616 3642 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3617 3643 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3618 3644 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); 3619 3645 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3620 3646 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); 3621 - dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); 3622 - dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); 3647 + dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes); 3648 + dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp); 3623 3649 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 3624 3650 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 3625 3651 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 3626 3652 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 3627 - dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 3628 - dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 3629 - dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 3630 - dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 3631 - dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 3632 - dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); 3633 - dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); 3634 - dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); 3635 - dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); 3636 - dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); 3653 + dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips); 3654 + dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips); 3655 + dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto); 3656 + dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto); 3657 + dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto); 3658 + dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines); 3659 + dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines); 3660 + dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto); 3661 + dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto); 3662 + dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ); 3637 3663 #endif 3638 3664 3639 - dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 3640 - Tpre_rounded = dst_y_prefetch_equ * LineTime; 3665 + st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0; 3666 + st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime; 3641 3667 #ifdef __DML_VBA_DEBUG__ 3642 - dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); 3643 - dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); 3668 + dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ); 3669 + dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime); 3644 3670 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); 3645 3671 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", 3646 - __func__, VStartup * LineTime); 3672 + __func__, VStartup * st_vars->LineTime); 3647 3673 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); 3648 3674 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); 3649 - dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 3650 - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3675 + dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf); 3676 + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); 3651 3677 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); 3652 3678 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); 3653 3679 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", 3654 3680 __func__, *DSTYAfterScaler); 3655 3681 #endif 3656 - dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, 3682 + st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, 3657 3683 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 3658 3684 3659 - if (prefetch_sw_bytes < dep_bytes) 3660 - prefetch_sw_bytes = 2 * dep_bytes; 3685 + if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes) 3686 + st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes; 3661 3687 3662 3688 *PrefetchBandwidth = 0; 3663 3689 *DestinationLinesToRequestVMInVBlank = 0; ··· 3665 3691 *VRatioPrefetchY = 0; 3666 3692 *VRatioPrefetchC = 0; 3667 3693 *RequiredPrefetchPixDataBWLuma = 0; 3668 - if (dst_y_prefetch_equ > 1) { 3694 + if (st_vars->dst_y_prefetch_equ > 1) { 3669 3695 double PrefetchBandwidth1; 3670 3696 double PrefetchBandwidth2; 3671 3697 double PrefetchBandwidth3; 3672 3698 double PrefetchBandwidth4; 3673 3699 3674 - if (Tpre_rounded - *Tno_bw > 0) { 3700 + if (st_vars->Tpre_rounded - *Tno_bw > 0) { 3675 3701 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3676 3702 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3677 - + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 3678 - Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 3703 + + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw); 3704 + st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1; 3679 3705 } else 3680 3706 PrefetchBandwidth1 = 0; 3681 3707 3682 - if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) 3683 - && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 3708 + if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw) 3709 + && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) { 3684 3710 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3685 3711 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3686 - / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 3712 + / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw); 3687 3713 } 3688 3714 3689 - if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 3690 - PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / 3691 - (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 3715 + if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0) 3716 + PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) / 3717 + (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded); 3692 3718 else 3693 3719 PrefetchBandwidth2 = 0; 3694 3720 3695 - if (Tpre_rounded - Tvm_trips_rounded > 0) { 3721 + if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) { 3696 3722 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3697 - + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 3698 - Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 3723 + + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded); 3724 + st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3; 3699 3725 } else 3700 3726 PrefetchBandwidth3 = 0; 3701 3727 3702 3728 3703 3729 if (VStartup == MaxVStartup && 3704 - (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * 3705 - LineTime - Tvm_trips_rounded > 0) { 3730 + (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * 3731 + st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) { 3706 3732 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3707 - / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 3733 + / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded); 3708 3734 } 3709 3735 3710 - if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { 3711 - PrefetchBandwidth4 = prefetch_sw_bytes / 3712 - (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 3736 + if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) { 3737 + PrefetchBandwidth4 = st_vars->prefetch_sw_bytes / 3738 + (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded); 3713 3739 } else { 3714 3740 PrefetchBandwidth4 = 0; 3715 3741 } 3716 3742 3717 3743 #ifdef __DML_VBA_DEBUG__ 3718 - dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 3744 + dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded); 3719 3745 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); 3720 - dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 3721 - dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); 3722 - dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); 3746 + dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded); 3747 + dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1); 3748 + dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3); 3723 3749 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); 3724 3750 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); 3725 3751 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); ··· 3732 3758 3733 3759 if (PrefetchBandwidth1 > 0) { 3734 3760 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 3735 - >= Tvm_trips_rounded 3761 + >= st_vars->Tvm_trips_rounded 3736 3762 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3737 - / PrefetchBandwidth1 >= Tr0_trips_rounded) { 3763 + / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) { 3738 3764 Case1OK = true; 3739 3765 } else { 3740 3766 Case1OK = false; ··· 3745 3771 3746 3772 if (PrefetchBandwidth2 > 0) { 3747 3773 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 3748 - >= Tvm_trips_rounded 3774 + >= st_vars->Tvm_trips_rounded 3749 3775 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3750 - / PrefetchBandwidth2 < Tr0_trips_rounded) { 3776 + / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) { 3751 3777 Case2OK = true; 3752 3778 } else { 3753 3779 Case2OK = false; ··· 3758 3784 3759 3785 if (PrefetchBandwidth3 > 0) { 3760 3786 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < 3761 - Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * 3787 + st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * 3762 3788 HostVMInefficiencyFactor) / PrefetchBandwidth3 >= 3763 - Tr0_trips_rounded) { 3789 + st_vars->Tr0_trips_rounded) { 3764 3790 Case3OK = true; 3765 3791 } else { 3766 3792 Case3OK = false; ··· 3770 3796 } 3771 3797 3772 3798 if (Case1OK) 3773 - prefetch_bw_equ = PrefetchBandwidth1; 3799 + st_vars->prefetch_bw_equ = PrefetchBandwidth1; 3774 3800 else if (Case2OK) 3775 - prefetch_bw_equ = PrefetchBandwidth2; 3801 + st_vars->prefetch_bw_equ = PrefetchBandwidth2; 3776 3802 else if (Case3OK) 3777 - prefetch_bw_equ = PrefetchBandwidth3; 3803 + st_vars->prefetch_bw_equ = PrefetchBandwidth3; 3778 3804 else 3779 - prefetch_bw_equ = PrefetchBandwidth4; 3805 + st_vars->prefetch_bw_equ = PrefetchBandwidth4; 3780 3806 3781 3807 #ifdef __DML_VBA_DEBUG__ 3782 3808 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 3783 3809 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 3784 3810 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 3785 - dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 3811 + dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ); 3786 3812 #endif 3787 3813 3788 - if (prefetch_bw_equ > 0) { 3814 + if (st_vars->prefetch_bw_equ > 0) { 3789 3815 if (GPUVMEnable == true) { 3790 - Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * 3791 - HostVMInefficiencyFactor / prefetch_bw_equ, 3792 - Tvm_trips, LineTime / 4); 3816 + st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * 3817 + HostVMInefficiencyFactor / st_vars->prefetch_bw_equ, 3818 + st_vars->Tvm_trips, st_vars->LineTime / 4); 3793 3819 } else { 3794 - Tvm_equ = LineTime / 4; 3820 + st_vars->Tvm_equ = st_vars->LineTime / 4; 3795 3821 } 3796 3822 3797 3823 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 3798 - Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * 3799 - HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, 3800 - (LineTime - Tvm_equ) / 2, LineTime / 4); 3824 + st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * 3825 + HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips, 3826 + (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4); 3801 3827 } else { 3802 - Tr0_equ = (LineTime - Tvm_equ) / 2; 3828 + st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2; 3803 3829 } 3804 3830 } else { 3805 - Tvm_equ = 0; 3806 - Tr0_equ = 0; 3831 + st_vars->Tvm_equ = 0; 3832 + st_vars->Tr0_equ = 0; 3807 3833 #ifdef __DML_VBA_DEBUG__ 3808 3834 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 3809 3835 #endif 3810 3836 } 3811 3837 } 3812 3838 3813 - if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 3814 - *DestinationLinesForPrefetch = dst_y_prefetch_oto; 3815 - TimeForFetchingMetaPTE = Tvm_oto; 3816 - TimeForFetchingRowInVBlank = Tr0_oto; 3817 - *PrefetchBandwidth = prefetch_bw_oto; 3839 + if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) { 3840 + *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto; 3841 + st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto; 3842 + st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto; 3843 + *PrefetchBandwidth = st_vars->prefetch_bw_oto; 3818 3844 } else { 3819 - *DestinationLinesForPrefetch = dst_y_prefetch_equ; 3820 - TimeForFetchingMetaPTE = Tvm_equ; 3821 - TimeForFetchingRowInVBlank = Tr0_equ; 3822 - *PrefetchBandwidth = prefetch_bw_equ; 3845 + *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ; 3846 + st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ; 3847 + st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ; 3848 + *PrefetchBandwidth = st_vars->prefetch_bw_equ; 3823 3849 } 3824 3850 3825 - *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 3851 + *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0; 3826 3852 3827 3853 *DestinationLinesToRequestRowInVBlank = 3828 - dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 3854 + dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0; 3829 3855 3830 - LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - 3856 + st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - 3831 3857 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 3832 3858 3833 3859 #ifdef __DML_VBA_DEBUG__ 3834 3860 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 3835 3861 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 3836 3862 __func__, *DestinationLinesToRequestVMInVBlank); 3837 - dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 3838 - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3863 + dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank); 3864 + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); 3839 3865 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 3840 3866 __func__, *DestinationLinesToRequestRowInVBlank); 3841 3867 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3842 - dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 3868 + dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData); 3843 3869 #endif 3844 3870 3845 - if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { 3846 - *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 3871 + if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) { 3872 + *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData; 3847 3873 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3848 3874 #ifdef __DML_VBA_DEBUG__ 3849 3875 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); ··· 3851 3877 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); 3852 3878 #endif 3853 3879 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 3854 - if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 3880 + if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 3855 3881 *VRatioPrefetchY = 3856 3882 dml_max((double) PrefetchSourceLinesY / 3857 - LinesToRequestPrefetchPixelData, 3883 + st_vars->LinesToRequestPrefetchPixelData, 3858 3884 (double) MaxNumSwathY * SwathHeightY / 3859 - (LinesToRequestPrefetchPixelData - 3885 + (st_vars->LinesToRequestPrefetchPixelData - 3860 3886 (VInitPreFillY - 3.0) / 2.0)); 3861 3887 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3862 3888 } else { ··· 3870 3896 #endif 3871 3897 } 3872 3898 3873 - *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 3899 + *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData; 3874 3900 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3875 3901 3876 3902 #ifdef __DML_VBA_DEBUG__ ··· 3879 3905 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); 3880 3906 #endif 3881 3907 if ((SwathHeightC > 4)) { 3882 - if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 3908 + if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 3883 3909 *VRatioPrefetchC = 3884 3910 dml_max(*VRatioPrefetchC, 3885 3911 (double) MaxNumSwathC * SwathHeightC / 3886 - (LinesToRequestPrefetchPixelData - 3912 + (st_vars->LinesToRequestPrefetchPixelData - 3887 3913 (VInitPreFillC - 3.0) / 2.0)); 3888 3914 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3889 3915 } else { ··· 3898 3924 } 3899 3925 3900 3926 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY 3901 - / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub 3902 - / LineTime; 3927 + / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub 3928 + / st_vars->LineTime; 3903 3929 3904 3930 #ifdef __DML_VBA_DEBUG__ 3905 3931 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3906 3932 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3907 - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3933 + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); 3908 3934 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", 3909 3935 __func__, *RequiredPrefetchPixDataBWLuma); 3910 3936 #endif 3911 3937 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / 3912 - LinesToRequestPrefetchPixelData 3938 + st_vars->LinesToRequestPrefetchPixelData 3913 3939 * myPipe->BytePerPixelC 3914 - * swath_width_chroma_ub / LineTime; 3940 + * swath_width_chroma_ub / st_vars->LineTime; 3915 3941 } else { 3916 3942 MyError = true; 3917 3943 #ifdef __DML_VBA_DEBUG__ 3918 3944 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", 3919 - __func__, LinesToRequestPrefetchPixelData); 3945 + __func__, st_vars->LinesToRequestPrefetchPixelData); 3920 3946 #endif 3921 3947 *VRatioPrefetchY = 0; 3922 3948 *VRatioPrefetchC = 0; ··· 3925 3951 } 3926 3952 #ifdef __DML_VBA_DEBUG__ 3927 3953 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 3928 - (double)LinesToRequestPrefetchPixelData * LineTime + 3929 - 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 3930 - dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 3954 + (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime + 3955 + 2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE); 3956 + dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE); 3931 3957 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 3932 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); 3958 + (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime); 3933 3959 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 3934 - dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - 3935 - TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + 3936 - ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 3960 + dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime - 3961 + st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler + 3962 + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup); 3937 3963 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", 3938 3964 PixelPTEBytesPerRow); 3939 3965 #endif ··· 3941 3967 MyError = true; 3942 3968 #ifdef __DML_VBA_DEBUG__ 3943 3969 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", 3944 - __func__, dst_y_prefetch_equ); 3970 + __func__, st_vars->dst_y_prefetch_equ); 3945 3971 #endif 3946 3972 } 3947 3973 ··· 3957 3983 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 3958 3984 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 3959 3985 __func__, *DestinationLinesToRequestVMInVBlank); 3960 - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3986 + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); 3961 3987 #endif 3962 3988 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / 3963 - (*DestinationLinesToRequestVMInVBlank * LineTime); 3989 + (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime); 3964 3990 #ifdef __DML_VBA_DEBUG__ 3965 3991 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 3966 3992 #endif ··· 3977 4003 prefetch_row_bw = 0; 3978 4004 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 3979 4005 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / 3980 - (*DestinationLinesToRequestRowInVBlank * LineTime); 4006 + (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime); 3981 4007 3982 4008 #ifdef __DML_VBA_DEBUG__ 3983 4009 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); ··· 4000 4026 4001 4027 if (MyError) { 4002 4028 *PrefetchBandwidth = 0; 4003 - TimeForFetchingMetaPTE = 0; 4004 - TimeForFetchingRowInVBlank = 0; 4029 + st_vars->TimeForFetchingMetaPTE = 0; 4030 + st_vars->TimeForFetchingRowInVBlank = 0; 4005 4031 *DestinationLinesToRequestVMInVBlank = 0; 4006 4032 *DestinationLinesToRequestRowInVBlank = 0; 4007 4033 *DestinationLinesForPrefetch = 0; 4008 - LinesToRequestPrefetchPixelData = 0; 4034 + st_vars->LinesToRequestPrefetchPixelData = 0; 4009 4035 *VRatioPrefetchY = 0; 4010 4036 *VRatioPrefetchC = 0; 4011 4037 *RequiredPrefetchPixDataBWLuma = 0;
+1
drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
··· 715 715 unsigned int HostVMMaxNonCachedPageTableLevels); 716 716 717 717 bool dml32_CalculatePrefetchSchedule( 718 + struct dml32_CalculatePrefetchSchedule *st_vars, 718 719 double HostVMInefficiencyFactor, 719 720 DmlPipe *myPipe, 720 721 unsigned int DSCDelay,
+38
drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
··· 247 247 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; 248 248 }; 249 249 250 + struct dml32_CalculatePrefetchSchedule { 251 + unsigned int DPPCycles, DISPCLKCycles; 252 + double DSTTotalPixelsAfterScaler; 253 + double LineTime; 254 + double dst_y_prefetch_equ; 255 + double prefetch_bw_oto; 256 + double Tvm_oto; 257 + double Tr0_oto; 258 + double Tvm_oto_lines; 259 + double Tr0_oto_lines; 260 + double dst_y_prefetch_oto; 261 + double TimeForFetchingMetaPTE; 262 + double TimeForFetchingRowInVBlank; 263 + double LinesToRequestPrefetchPixelData; 264 + unsigned int HostVMDynamicLevelsTrips; 265 + double trip_to_mem; 266 + double Tvm_trips; 267 + double Tr0_trips; 268 + double Tvm_trips_rounded; 269 + double Tr0_trips_rounded; 270 + double Lsw_oto; 271 + double Tpre_rounded; 272 + double prefetch_bw_equ; 273 + double Tvm_equ; 274 + double Tr0_equ; 275 + double Tdmbf; 276 + double Tdmec; 277 + double Tdmsks; 278 + double prefetch_sw_bytes; 279 + double bytes_pp; 280 + double dep_bytes; 281 + unsigned int max_vratio_pre; 282 + double min_Lsw; 283 + double Tsw_est1; 284 + double Tsw_est3; 285 + }; 286 + 250 287 struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation { 251 288 unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX]; 252 289 double dummy_single_array[2][DC__NUM_DPP__MAX]; ··· 358 321 struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration; 359 322 struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath; 360 323 struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport; 324 + struct dml32_CalculatePrefetchSchedule dml32_CalculatePrefetchSchedule; 361 325 }; 362 326 363 327 struct vba_vars_st {