1 /*
2 * Copyright 2020 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26 #ifdef CONFIG_DRM_AMD_DC_DCN
27 #include "dc.h"
28 #include "dc_link.h"
29 #include "../display_mode_lib.h"
30 #include "display_mode_vba_30.h"
31 #include "../dml_inline_defs.h"
32
33
34 /*
35 * NOTE:
36 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
37 *
38 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
39 * ways. Unless there is something clearly wrong with it the code should
40 * remain as-is as it provides us with a guarantee from HW that it is correct.
41 */
42
43
44 typedef struct {
45 double DPPCLK;
46 double DISPCLK;
47 double PixelClock;
48 double DCFCLKDeepSleep;
49 unsigned int DPPPerPlane;
50 bool ScalerEnabled;
51 enum scan_direction_class SourceScan;
52 unsigned int BlockWidth256BytesY;
53 unsigned int BlockHeight256BytesY;
54 unsigned int BlockWidth256BytesC;
55 unsigned int BlockHeight256BytesC;
56 unsigned int InterlaceEnable;
57 unsigned int NumberOfCursors;
58 unsigned int VBlank;
59 unsigned int HTotal;
60 unsigned int DCCEnable;
61 bool ODMCombineEnabled;
62 } Pipe;
63
64 #define BPP_INVALID 0
65 #define BPP_BLENDED_PIPE 0xffffffff
66 #define DCN30_MAX_DSC_IMAGE_WIDTH 5184
67 #define DCN30_MAX_FMT_420_BUFFER_WIDTH 4096
68
69 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
70 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
71 struct display_mode_lib *mode_lib);
72 static unsigned int dscceComputeDelay(
73 unsigned int bpc,
74 double BPP,
75 unsigned int sliceWidth,
76 unsigned int numSlices,
77 enum output_format_class pixelFormat,
78 enum output_encoder_class Output);
79 static unsigned int dscComputeDelay(
80 enum output_format_class pixelFormat,
81 enum output_encoder_class Output);
82 // Super monster function with some 45 argument
83 static bool CalculatePrefetchSchedule(
84 struct display_mode_lib *mode_lib,
85 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
86 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
87 Pipe *myPipe,
88 unsigned int DSCDelay,
89 double DPPCLKDelaySubtotalPlusCNVCFormater,
90 double DPPCLKDelaySCL,
91 double DPPCLKDelaySCLLBOnly,
92 double DPPCLKDelayCNVCCursor,
93 double DISPCLKDelaySubtotal,
94 unsigned int DPP_RECOUT_WIDTH,
95 enum output_format_class OutputFormat,
96 unsigned int MaxInterDCNTileRepeaters,
97 unsigned int VStartup,
98 unsigned int MaxVStartup,
99 unsigned int GPUVMPageTableLevels,
100 bool GPUVMEnable,
101 bool HostVMEnable,
102 unsigned int HostVMMaxNonCachedPageTableLevels,
103 double HostVMMinPageSize,
104 bool DynamicMetadataEnable,
105 bool DynamicMetadataVMEnabled,
106 int DynamicMetadataLinesBeforeActiveRequired,
107 unsigned int DynamicMetadataTransmittedBytes,
108 double UrgentLatency,
109 double UrgentExtraLatency,
110 double TCalc,
111 unsigned int PDEAndMetaPTEBytesFrame,
112 unsigned int MetaRowByte,
113 unsigned int PixelPTEBytesPerRow,
114 double PrefetchSourceLinesY,
115 unsigned int SwathWidthY,
116 int BytePerPixelY,
117 double VInitPreFillY,
118 unsigned int MaxNumSwathY,
119 double PrefetchSourceLinesC,
120 unsigned int SwathWidthC,
121 int BytePerPixelC,
122 double VInitPreFillC,
123 unsigned int MaxNumSwathC,
124 long swath_width_luma_ub,
125 long swath_width_chroma_ub,
126 unsigned int SwathHeightY,
127 unsigned int SwathHeightC,
128 double TWait,
129 bool ProgressiveToInterlaceUnitInOPP,
130 double *DSTXAfterScaler,
131 double *DSTYAfterScaler,
132 double *DestinationLinesForPrefetch,
133 double *PrefetchBandwidth,
134 double *DestinationLinesToRequestVMInVBlank,
135 double *DestinationLinesToRequestRowInVBlank,
136 double *VRatioPrefetchY,
137 double *VRatioPrefetchC,
138 double *RequiredPrefetchPixDataBWLuma,
139 double *RequiredPrefetchPixDataBWChroma,
140 bool *NotEnoughTimeForDynamicMetadata,
141 double *Tno_bw,
142 double *prefetch_vmrow_bw,
143 double *Tdmdl_vm,
144 double *Tdmdl,
145 unsigned int *VUpdateOffsetPix,
146 double *VUpdateWidthPix,
147 double *VReadyOffsetPix);
148 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
149 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
150 static void CalculateDCCConfiguration(
151 bool DCCEnabled,
152 bool DCCProgrammingAssumesScanDirectionUnknown,
153 enum source_format_class SourcePixelFormat,
154 unsigned int ViewportWidthLuma,
155 unsigned int ViewportWidthChroma,
156 unsigned int ViewportHeightLuma,
157 unsigned int ViewportHeightChroma,
158 double DETBufferSize,
159 unsigned int RequestHeight256ByteLuma,
160 unsigned int RequestHeight256ByteChroma,
161 enum dm_swizzle_mode TilingFormat,
162 unsigned int BytePerPixelY,
163 unsigned int BytePerPixelC,
164 double BytePerPixelDETY,
165 double BytePerPixelDETC,
166 enum scan_direction_class ScanOrientation,
167 unsigned int *MaxUncompressedBlockLuma,
168 unsigned int *MaxUncompressedBlockChroma,
169 unsigned int *MaxCompressedBlockLuma,
170 unsigned int *MaxCompressedBlockChroma,
171 unsigned int *IndependentBlockLuma,
172 unsigned int *IndependentBlockChroma);
173 static double CalculatePrefetchSourceLines(
174 struct display_mode_lib *mode_lib,
175 double VRatio,
176 double vtaps,
177 bool Interlace,
178 bool ProgressiveToInterlaceUnitInOPP,
179 unsigned int SwathHeight,
180 unsigned int ViewportYStart,
181 double *VInitPreFill,
182 unsigned int *MaxNumSwath);
183 static unsigned int CalculateVMAndRowBytes(
184 struct display_mode_lib *mode_lib,
185 bool DCCEnable,
186 unsigned int BlockHeight256Bytes,
187 unsigned int BlockWidth256Bytes,
188 enum source_format_class SourcePixelFormat,
189 unsigned int SurfaceTiling,
190 unsigned int BytePerPixel,
191 enum scan_direction_class ScanDirection,
192 unsigned int SwathWidth,
193 unsigned int ViewportHeight,
194 bool GPUVMEnable,
195 bool HostVMEnable,
196 unsigned int HostVMMaxNonCachedPageTableLevels,
197 unsigned int GPUVMMinPageSize,
198 unsigned int HostVMMinPageSize,
199 unsigned int PTEBufferSizeInRequests,
200 unsigned int Pitch,
201 unsigned int DCCMetaPitch,
202 unsigned int *MacroTileWidth,
203 unsigned int *MetaRowByte,
204 unsigned int *PixelPTEBytesPerRow,
205 bool *PTEBufferSizeNotExceeded,
206 unsigned int *dpte_row_width_ub,
207 unsigned int *dpte_row_height,
208 unsigned int *MetaRequestWidth,
209 unsigned int *MetaRequestHeight,
210 unsigned int *meta_row_width,
211 unsigned int *meta_row_height,
212 unsigned int *vm_group_bytes,
213 unsigned int *dpte_group_bytes,
214 unsigned int *PixelPTEReqWidth,
215 unsigned int *PixelPTEReqHeight,
216 unsigned int *PTERequestSize,
217 unsigned int *DPDE0BytesFrame,
218 unsigned int *MetaPTEBytesFrame);
219 static double CalculateTWait(
220 unsigned int PrefetchMode,
221 double DRAMClockChangeLatency,
222 double UrgentLatency,
223 double SREnterPlusExitTime);
224 static void CalculateRowBandwidth(
225 bool GPUVMEnable,
226 enum source_format_class SourcePixelFormat,
227 double VRatio,
228 double VRatioChroma,
229 bool DCCEnable,
230 double LineTime,
231 unsigned int MetaRowByteLuma,
232 unsigned int MetaRowByteChroma,
233 unsigned int meta_row_height_luma,
234 unsigned int meta_row_height_chroma,
235 unsigned int PixelPTEBytesPerRowLuma,
236 unsigned int PixelPTEBytesPerRowChroma,
237 unsigned int dpte_row_height_luma,
238 unsigned int dpte_row_height_chroma,
239 double *meta_row_bw,
240 double *dpte_row_bw);
241 static void CalculateFlipSchedule(
242 struct display_mode_lib *mode_lib,
243 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
244 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
245 double UrgentExtraLatency,
246 double UrgentLatency,
247 unsigned int GPUVMMaxPageTableLevels,
248 bool HostVMEnable,
249 unsigned int HostVMMaxNonCachedPageTableLevels,
250 bool GPUVMEnable,
251 double HostVMMinPageSize,
252 double PDEAndMetaPTEBytesPerFrame,
253 double MetaRowBytes,
254 double DPTEBytesPerRow,
255 double BandwidthAvailableForImmediateFlip,
256 unsigned int TotImmediateFlipBytes,
257 enum source_format_class SourcePixelFormat,
258 double LineTime,
259 double VRatio,
260 double VRatioChroma,
261 double Tno_bw,
262 bool DCCEnable,
263 unsigned int dpte_row_height,
264 unsigned int meta_row_height,
265 unsigned int dpte_row_height_chroma,
266 unsigned int meta_row_height_chroma,
267 double *DestinationLinesToRequestVMInImmediateFlip,
268 double *DestinationLinesToRequestRowInImmediateFlip,
269 double *final_flip_bw,
270 bool *ImmediateFlipSupportedForPipe);
271 static double CalculateWriteBackDelay(
272 enum source_format_class WritebackPixelFormat,
273 double WritebackHRatio,
274 double WritebackVRatio,
275 unsigned int WritebackVTaps,
276 long WritebackDestinationWidth,
277 long WritebackDestinationHeight,
278 long WritebackSourceHeight,
279 unsigned int HTotal);
280 static void CalculateDynamicMetadataParameters(
281 int MaxInterDCNTileRepeaters,
282 double DPPCLK,
283 double DISPCLK,
284 double DCFClkDeepSleep,
285 double PixelClock,
286 long HTotal,
287 long VBlank,
288 long DynamicMetadataTransmittedBytes,
289 long DynamicMetadataLinesBeforeActiveRequired,
290 int InterlaceEnable,
291 bool ProgressiveToInterlaceUnitInOPP,
292 double *Tsetup,
293 double *Tdmbf,
294 double *Tdmec,
295 double *Tdmsks);
296 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
297 struct display_mode_lib *mode_lib,
298 unsigned int PrefetchMode,
299 unsigned int NumberOfActivePlanes,
300 unsigned int MaxLineBufferLines,
301 unsigned int LineBufferSize,
302 unsigned int DPPOutputBufferPixels,
303 unsigned int DETBufferSizeInKByte,
304 unsigned int WritebackInterfaceBufferSize,
305 double DCFCLK,
306 double ReturnBW,
307 bool GPUVMEnable,
308 unsigned int dpte_group_bytes[],
309 unsigned int MetaChunkSize,
310 double UrgentLatency,
311 double ExtraLatency,
312 double WritebackLatency,
313 double WritebackChunkSize,
314 double SOCCLK,
315 double DRAMClockChangeLatency,
316 double SRExitTime,
317 double SREnterPlusExitTime,
318 double DCFCLKDeepSleep,
319 unsigned int DPPPerPlane[],
320 bool DCCEnable[],
321 double DPPCLK[],
322 unsigned int DETBufferSizeY[],
323 unsigned int DETBufferSizeC[],
324 unsigned int SwathHeightY[],
325 unsigned int SwathHeightC[],
326 unsigned int LBBitPerPixel[],
327 double SwathWidthY[],
328 double SwathWidthC[],
329 double HRatio[],
330 double HRatioChroma[],
331 unsigned int vtaps[],
332 unsigned int VTAPsChroma[],
333 double VRatio[],
334 double VRatioChroma[],
335 unsigned int HTotal[],
336 double PixelClock[],
337 unsigned int BlendingAndTiming[],
338 double BytePerPixelDETY[],
339 double BytePerPixelDETC[],
340 double DSTXAfterScaler[],
341 double DSTYAfterScaler[],
342 bool WritebackEnable[],
343 enum source_format_class WritebackPixelFormat[],
344 double WritebackDestinationWidth[],
345 double WritebackDestinationHeight[],
346 double WritebackSourceHeight[],
347 enum clock_change_support *DRAMClockChangeSupport,
348 double *UrgentWatermark,
349 double *WritebackUrgentWatermark,
350 double *DRAMClockChangeWatermark,
351 double *WritebackDRAMClockChangeWatermark,
352 double *StutterExitWatermark,
353 double *StutterEnterPlusExitWatermark,
354 double *MinActiveDRAMClockChangeLatencySupported);
355 static void CalculateDCFCLKDeepSleep(
356 struct display_mode_lib *mode_lib,
357 unsigned int NumberOfActivePlanes,
358 int BytePerPixelY[],
359 int BytePerPixelC[],
360 double VRatio[],
361 double VRatioChroma[],
362 double SwathWidthY[],
363 double SwathWidthC[],
364 unsigned int DPPPerPlane[],
365 double HRatio[],
366 double HRatioChroma[],
367 double PixelClock[],
368 double PSCL_THROUGHPUT[],
369 double PSCL_THROUGHPUT_CHROMA[],
370 double DPPCLK[],
371 double ReadBandwidthLuma[],
372 double ReadBandwidthChroma[],
373 int ReturnBusWidth,
374 double *DCFCLKDeepSleep);
375 static void CalculateUrgentBurstFactor(
376 long swath_width_luma_ub,
377 long swath_width_chroma_ub,
378 unsigned int DETBufferSizeInKByte,
379 unsigned int SwathHeightY,
380 unsigned int SwathHeightC,
381 double LineTime,
382 double UrgentLatency,
383 double CursorBufferSize,
384 unsigned int CursorWidth,
385 unsigned int CursorBPP,
386 double VRatio,
387 double VRatioC,
388 double BytePerPixelInDETY,
389 double BytePerPixelInDETC,
390 double DETBufferSizeY,
391 double DETBufferSizeC,
392 double *UrgentBurstFactorCursor,
393 double *UrgentBurstFactorLuma,
394 double *UrgentBurstFactorChroma,
395 bool *NotEnoughUrgentLatencyHiding);
396
397 static void UseMinimumDCFCLK(
398 struct display_mode_lib *mode_lib,
399 struct vba_vars_st *v,
400 int MaxPrefetchMode,
401 int ReorderingBytes);
402
403 static void CalculatePixelDeliveryTimes(
404 unsigned int NumberOfActivePlanes,
405 double VRatio[],
406 double VRatioChroma[],
407 double VRatioPrefetchY[],
408 double VRatioPrefetchC[],
409 unsigned int swath_width_luma_ub[],
410 unsigned int swath_width_chroma_ub[],
411 unsigned int DPPPerPlane[],
412 double HRatio[],
413 double HRatioChroma[],
414 double PixelClock[],
415 double PSCL_THROUGHPUT[],
416 double PSCL_THROUGHPUT_CHROMA[],
417 double DPPCLK[],
418 int BytePerPixelC[],
419 enum scan_direction_class SourceScan[],
420 unsigned int NumberOfCursors[],
421 unsigned int CursorWidth[][2],
422 unsigned int CursorBPP[][2],
423 unsigned int BlockWidth256BytesY[],
424 unsigned int BlockHeight256BytesY[],
425 unsigned int BlockWidth256BytesC[],
426 unsigned int BlockHeight256BytesC[],
427 double DisplayPipeLineDeliveryTimeLuma[],
428 double DisplayPipeLineDeliveryTimeChroma[],
429 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
430 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
431 double DisplayPipeRequestDeliveryTimeLuma[],
432 double DisplayPipeRequestDeliveryTimeChroma[],
433 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
434 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
435 double CursorRequestDeliveryTime[],
436 double CursorRequestDeliveryTimePrefetch[]);
437
438 static void CalculateMetaAndPTETimes(
439 int NumberOfActivePlanes,
440 bool GPUVMEnable,
441 int MetaChunkSize,
442 int MinMetaChunkSizeBytes,
443 int HTotal[],
444 double VRatio[],
445 double VRatioChroma[],
446 double DestinationLinesToRequestRowInVBlank[],
447 double DestinationLinesToRequestRowInImmediateFlip[],
448 bool DCCEnable[],
449 double PixelClock[],
450 int BytePerPixelY[],
451 int BytePerPixelC[],
452 enum scan_direction_class SourceScan[],
453 int dpte_row_height[],
454 int dpte_row_height_chroma[],
455 int meta_row_width[],
456 int meta_row_width_chroma[],
457 int meta_row_height[],
458 int meta_row_height_chroma[],
459 int meta_req_width[],
460 int meta_req_width_chroma[],
461 int meta_req_height[],
462 int meta_req_height_chroma[],
463 int dpte_group_bytes[],
464 int PTERequestSizeY[],
465 int PTERequestSizeC[],
466 int PixelPTEReqWidthY[],
467 int PixelPTEReqHeightY[],
468 int PixelPTEReqWidthC[],
469 int PixelPTEReqHeightC[],
470 int dpte_row_width_luma_ub[],
471 int dpte_row_width_chroma_ub[],
472 double DST_Y_PER_PTE_ROW_NOM_L[],
473 double DST_Y_PER_PTE_ROW_NOM_C[],
474 double DST_Y_PER_META_ROW_NOM_L[],
475 double DST_Y_PER_META_ROW_NOM_C[],
476 double TimePerMetaChunkNominal[],
477 double TimePerChromaMetaChunkNominal[],
478 double TimePerMetaChunkVBlank[],
479 double TimePerChromaMetaChunkVBlank[],
480 double TimePerMetaChunkFlip[],
481 double TimePerChromaMetaChunkFlip[],
482 double time_per_pte_group_nom_luma[],
483 double time_per_pte_group_vblank_luma[],
484 double time_per_pte_group_flip_luma[],
485 double time_per_pte_group_nom_chroma[],
486 double time_per_pte_group_vblank_chroma[],
487 double time_per_pte_group_flip_chroma[]);
488
489 static void CalculateVMGroupAndRequestTimes(
490 unsigned int NumberOfActivePlanes,
491 bool GPUVMEnable,
492 unsigned int GPUVMMaxPageTableLevels,
493 unsigned int HTotal[],
494 int BytePerPixelC[],
495 double DestinationLinesToRequestVMInVBlank[],
496 double DestinationLinesToRequestVMInImmediateFlip[],
497 bool DCCEnable[],
498 double PixelClock[],
499 int dpte_row_width_luma_ub[],
500 int dpte_row_width_chroma_ub[],
501 int vm_group_bytes[],
502 unsigned int dpde0_bytes_per_frame_ub_l[],
503 unsigned int dpde0_bytes_per_frame_ub_c[],
504 int meta_pte_bytes_per_frame_ub_l[],
505 int meta_pte_bytes_per_frame_ub_c[],
506 double TimePerVMGroupVBlank[],
507 double TimePerVMGroupFlip[],
508 double TimePerVMRequestVBlank[],
509 double TimePerVMRequestFlip[]);
510
511 static void CalculateStutterEfficiency(
512 int NumberOfActivePlanes,
513 long ROBBufferSizeInKByte,
514 double TotalDataReadBandwidth,
515 double DCFCLK,
516 double ReturnBW,
517 double SRExitTime,
518 bool SynchronizedVBlank,
519 int DPPPerPlane[],
520 unsigned int DETBufferSizeY[],
521 int BytePerPixelY[],
522 double BytePerPixelDETY[],
523 double SwathWidthY[],
524 int SwathHeightY[],
525 int SwathHeightC[],
526 double DCCRateLuma[],
527 double DCCRateChroma[],
528 int HTotal[],
529 int VTotal[],
530 double PixelClock[],
531 double VRatio[],
532 enum scan_direction_class SourceScan[],
533 int BlockHeight256BytesY[],
534 int BlockWidth256BytesY[],
535 int BlockHeight256BytesC[],
536 int BlockWidth256BytesC[],
537 int DCCYMaxUncompressedBlock[],
538 int DCCCMaxUncompressedBlock[],
539 int VActive[],
540 bool DCCEnable[],
541 bool WritebackEnable[],
542 double ReadBandwidthPlaneLuma[],
543 double ReadBandwidthPlaneChroma[],
544 double meta_row_bw[],
545 double dpte_row_bw[],
546 double *StutterEfficiencyNotIncludingVBlank,
547 double *StutterEfficiency,
548 double *StutterPeriodOut);
549
550 static void CalculateSwathAndDETConfiguration(
551 bool ForceSingleDPP,
552 int NumberOfActivePlanes,
553 unsigned int DETBufferSizeInKByte,
554 double MaximumSwathWidthLuma[],
555 double MaximumSwathWidthChroma[],
556 enum scan_direction_class SourceScan[],
557 enum source_format_class SourcePixelFormat[],
558 enum dm_swizzle_mode SurfaceTiling[],
559 int ViewportWidth[],
560 int ViewportHeight[],
561 int SurfaceWidthY[],
562 int SurfaceWidthC[],
563 int SurfaceHeightY[],
564 int SurfaceHeightC[],
565 int Read256BytesBlockHeightY[],
566 int Read256BytesBlockHeightC[],
567 int Read256BytesBlockWidthY[],
568 int Read256BytesBlockWidthC[],
569 enum odm_combine_mode ODMCombineEnabled[],
570 int BlendingAndTiming[],
571 int BytePerPixY[],
572 int BytePerPixC[],
573 double BytePerPixDETY[],
574 double BytePerPixDETC[],
575 int HActive[],
576 double HRatio[],
577 double HRatioChroma[],
578 int DPPPerPlane[],
579 int swath_width_luma_ub[],
580 int swath_width_chroma_ub[],
581 double SwathWidth[],
582 double SwathWidthChroma[],
583 int SwathHeightY[],
584 int SwathHeightC[],
585 unsigned int DETBufferSizeY[],
586 unsigned int DETBufferSizeC[],
587 bool ViewportSizeSupportPerPlane[],
588 bool *ViewportSizeSupport);
589 static void CalculateSwathWidth(
590 bool ForceSingleDPP,
591 int NumberOfActivePlanes,
592 enum source_format_class SourcePixelFormat[],
593 enum scan_direction_class SourceScan[],
594 unsigned int ViewportWidth[],
595 unsigned int ViewportHeight[],
596 unsigned int SurfaceWidthY[],
597 unsigned int SurfaceWidthC[],
598 unsigned int SurfaceHeightY[],
599 unsigned int SurfaceHeightC[],
600 enum odm_combine_mode ODMCombineEnabled[],
601 int BytePerPixY[],
602 int BytePerPixC[],
603 int Read256BytesBlockHeightY[],
604 int Read256BytesBlockHeightC[],
605 int Read256BytesBlockWidthY[],
606 int Read256BytesBlockWidthC[],
607 int BlendingAndTiming[],
608 unsigned int HActive[],
609 double HRatio[],
610 int DPPPerPlane[],
611 double SwathWidthSingleDPPY[],
612 double SwathWidthSingleDPPC[],
613 double SwathWidthY[],
614 double SwathWidthC[],
615 int MaximumSwathHeightY[],
616 int MaximumSwathHeightC[],
617 unsigned int swath_width_luma_ub[],
618 unsigned int swath_width_chroma_ub[]);
619 static double CalculateExtraLatency(
620 long RoundTripPingLatencyCycles,
621 long ReorderingBytes,
622 double DCFCLK,
623 int TotalNumberOfActiveDPP,
624 int PixelChunkSizeInKByte,
625 int TotalNumberOfDCCActiveDPP,
626 int MetaChunkSize,
627 double ReturnBW,
628 bool GPUVMEnable,
629 bool HostVMEnable,
630 int NumberOfActivePlanes,
631 int NumberOfDPP[],
632 int dpte_group_bytes[],
633 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
634 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
635 double HostVMMinPageSize,
636 int HostVMMaxNonCachedPageTableLevels);
637 static double CalculateExtraLatencyBytes(
638 long ReorderingBytes,
639 int TotalNumberOfActiveDPP,
640 int PixelChunkSizeInKByte,
641 int TotalNumberOfDCCActiveDPP,
642 int MetaChunkSize,
643 bool GPUVMEnable,
644 bool HostVMEnable,
645 int NumberOfActivePlanes,
646 int NumberOfDPP[],
647 int dpte_group_bytes[],
648 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
649 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
650 double HostVMMinPageSize,
651 int HostVMMaxNonCachedPageTableLevels);
652 static double CalculateUrgentLatency(
653 double UrgentLatencyPixelDataOnly,
654 double UrgentLatencyPixelMixedWithVMData,
655 double UrgentLatencyVMDataOnly,
656 bool DoUrgentLatencyAdjustment,
657 double UrgentLatencyAdjustmentFabricClockComponent,
658 double UrgentLatencyAdjustmentFabricClockReference,
659 double FabricClockSingle);
660
dml30_recalculate(struct display_mode_lib * mode_lib)661 void dml30_recalculate(struct display_mode_lib *mode_lib)
662 {
663 ModeSupportAndSystemConfiguration(mode_lib);
664 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
665 DisplayPipeConfiguration(mode_lib);
666 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
667 }
668
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)669 static unsigned int dscceComputeDelay(
670 unsigned int bpc,
671 double BPP,
672 unsigned int sliceWidth,
673 unsigned int numSlices,
674 enum output_format_class pixelFormat,
675 enum output_encoder_class Output)
676 {
677 // valid bpc = source bits per component in the set of {8, 10, 12}
678 // valid bpp = increments of 1/16 of a bit
679 // min = 6/7/8 in N420/N422/444, respectively
680 // max = such that compression is 1:1
681 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
682 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
683 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
684
685 // fixed value
686 unsigned int rcModelSize = 8192;
687
688 // N422/N420 operate at 2 pixels per clock
689 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L,
690 Delay, pixels;
691
692 if (pixelFormat == dm_420)
693 pixelsPerClock = 2;
694 // #all other modes operate at 1 pixel per clock
695 else if (pixelFormat == dm_444)
696 pixelsPerClock = 1;
697 else if (pixelFormat == dm_n422)
698 pixelsPerClock = 2;
699 else
700 pixelsPerClock = 1;
701
702 //initial transmit delay as per PPS
703 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
704
705 //compute ssm delay
706 if (bpc == 8)
707 D = 81;
708 else if (bpc == 10)
709 D = 89;
710 else
711 D = 113;
712
713 //divide by pixel per cycle to compute slice width as seen by DSC
714 w = sliceWidth / pixelsPerClock;
715
716 //422 mode has an additional cycle of delay
717 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
718 s = 0;
719 else
720 s = 1;
721
722 //main calculation for the dscce
723 ix = initalXmitDelay + 45;
724 wx = (w + 2) / 3;
725 P = 3 * wx - w;
726 l0 = ix / w;
727 a = ix + P * l0;
728 ax = (a + 2) / 3 + D + 6 + 1;
729 L = (ax + wx - 1) / wx;
730 if ((ix % w) == 0 && P != 0)
731 lstall = 1;
732 else
733 lstall = 0;
734 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
735
736 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
737 pixels = Delay * 3 * pixelsPerClock;
738 return pixels;
739 }
740
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)741 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
742 {
743 unsigned int Delay = 0;
744
745 if (pixelFormat == dm_420) {
746 // sfr
747 Delay = Delay + 2;
748 // dsccif
749 Delay = Delay + 0;
750 // dscc - input deserializer
751 Delay = Delay + 3;
752 // dscc gets pixels every other cycle
753 Delay = Delay + 2;
754 // dscc - input cdc fifo
755 Delay = Delay + 12;
756 // dscc gets pixels every other cycle
757 Delay = Delay + 13;
758 // dscc - cdc uncertainty
759 Delay = Delay + 2;
760 // dscc - output cdc fifo
761 Delay = Delay + 7;
762 // dscc gets pixels every other cycle
763 Delay = Delay + 3;
764 // dscc - cdc uncertainty
765 Delay = Delay + 2;
766 // dscc - output serializer
767 Delay = Delay + 1;
768 // sft
769 Delay = Delay + 1;
770 } else if (pixelFormat == dm_n422) {
771 // sfr
772 Delay = Delay + 2;
773 // dsccif
774 Delay = Delay + 1;
775 // dscc - input deserializer
776 Delay = Delay + 5;
777 // dscc - input cdc fifo
778 Delay = Delay + 25;
779 // dscc - cdc uncertainty
780 Delay = Delay + 2;
781 // dscc - output cdc fifo
782 Delay = Delay + 10;
783 // dscc - cdc uncertainty
784 Delay = Delay + 2;
785 // dscc - output serializer
786 Delay = Delay + 1;
787 // sft
788 Delay = Delay + 1;
789 }
790 else {
791 // sfr
792 Delay = Delay + 2;
793 // dsccif
794 Delay = Delay + 0;
795 // dscc - input deserializer
796 Delay = Delay + 3;
797 // dscc - input cdc fifo
798 Delay = Delay + 12;
799 // dscc - cdc uncertainty
800 Delay = Delay + 2;
801 // dscc - output cdc fifo
802 Delay = Delay + 7;
803 // dscc - output serializer
804 Delay = Delay + 1;
805 // dscc - cdc uncertainty
806 Delay = Delay + 2;
807 // sft
808 Delay = Delay + 1;
809 }
810
811 return Delay;
812 }
813
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,int BytePerPixelY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,int BytePerPixelC,double VInitPreFillC,unsigned int MaxNumSwathC,long swath_width_luma_ub,long swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,bool ProgressiveToInterlaceUnitInOPP,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)814 static bool CalculatePrefetchSchedule(
815 struct display_mode_lib *mode_lib,
816 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
817 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
818 Pipe *myPipe,
819 unsigned int DSCDelay,
820 double DPPCLKDelaySubtotalPlusCNVCFormater,
821 double DPPCLKDelaySCL,
822 double DPPCLKDelaySCLLBOnly,
823 double DPPCLKDelayCNVCCursor,
824 double DISPCLKDelaySubtotal,
825 unsigned int DPP_RECOUT_WIDTH,
826 enum output_format_class OutputFormat,
827 unsigned int MaxInterDCNTileRepeaters,
828 unsigned int VStartup,
829 unsigned int MaxVStartup,
830 unsigned int GPUVMPageTableLevels,
831 bool GPUVMEnable,
832 bool HostVMEnable,
833 unsigned int HostVMMaxNonCachedPageTableLevels,
834 double HostVMMinPageSize,
835 bool DynamicMetadataEnable,
836 bool DynamicMetadataVMEnabled,
837 int DynamicMetadataLinesBeforeActiveRequired,
838 unsigned int DynamicMetadataTransmittedBytes,
839 double UrgentLatency,
840 double UrgentExtraLatency,
841 double TCalc,
842 unsigned int PDEAndMetaPTEBytesFrame,
843 unsigned int MetaRowByte,
844 unsigned int PixelPTEBytesPerRow,
845 double PrefetchSourceLinesY,
846 unsigned int SwathWidthY,
847 int BytePerPixelY,
848 double VInitPreFillY,
849 unsigned int MaxNumSwathY,
850 double PrefetchSourceLinesC,
851 unsigned int SwathWidthC,
852 int BytePerPixelC,
853 double VInitPreFillC,
854 unsigned int MaxNumSwathC,
855 long swath_width_luma_ub,
856 long swath_width_chroma_ub,
857 unsigned int SwathHeightY,
858 unsigned int SwathHeightC,
859 double TWait,
860 bool ProgressiveToInterlaceUnitInOPP,
861 double *DSTXAfterScaler,
862 double *DSTYAfterScaler,
863 double *DestinationLinesForPrefetch,
864 double *PrefetchBandwidth,
865 double *DestinationLinesToRequestVMInVBlank,
866 double *DestinationLinesToRequestRowInVBlank,
867 double *VRatioPrefetchY,
868 double *VRatioPrefetchC,
869 double *RequiredPrefetchPixDataBWLuma,
870 double *RequiredPrefetchPixDataBWChroma,
871 bool *NotEnoughTimeForDynamicMetadata,
872 double *Tno_bw,
873 double *prefetch_vmrow_bw,
874 double *Tdmdl_vm,
875 double *Tdmdl,
876 unsigned int *VUpdateOffsetPix,
877 double *VUpdateWidthPix,
878 double *VReadyOffsetPix)
879 {
880 bool MyError = false;
881 unsigned int DPPCycles = 0, DISPCLKCycles = 0;
882 double DSTTotalPixelsAfterScaler = 0;
883 double LineTime = 0, Tsetup = 0;
884 double dst_y_prefetch_equ = 0;
885 double Tsw_oto = 0;
886 double prefetch_bw_oto = 0;
887 double Tvm_oto = 0;
888 double Tr0_oto = 0;
889 double Tvm_oto_lines = 0;
890 double Tr0_oto_lines = 0;
891 double dst_y_prefetch_oto = 0;
892 double TimeForFetchingMetaPTE = 0;
893 double TimeForFetchingRowInVBlank = 0;
894 double LinesToRequestPrefetchPixelData = 0;
895 double HostVMInefficiencyFactor = 0;
896 unsigned int HostVMDynamicLevelsTrips = 0;
897 double trip_to_mem = 0;
898 double Tvm_trips = 0;
899 double Tr0_trips = 0;
900 double Tvm_trips_rounded = 0;
901 double Tr0_trips_rounded = 0;
902 double Lsw_oto = 0;
903 double Tpre_rounded = 0;
904 double prefetch_bw_equ = 0;
905 double Tvm_equ = 0;
906 double Tr0_equ = 0;
907 double Tdmbf = 0;
908 double Tdmec = 0;
909 double Tdmsks = 0;
910
911 if (GPUVMEnable == true && HostVMEnable == true) {
912 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
913 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
914 } else {
915 HostVMInefficiencyFactor = 1;
916 HostVMDynamicLevelsTrips = 0;
917 }
918
919 CalculateDynamicMetadataParameters(
920 MaxInterDCNTileRepeaters,
921 myPipe->DPPCLK,
922 myPipe->DISPCLK,
923 myPipe->DCFCLKDeepSleep,
924 myPipe->PixelClock,
925 myPipe->HTotal,
926 myPipe->VBlank,
927 DynamicMetadataTransmittedBytes,
928 DynamicMetadataLinesBeforeActiveRequired,
929 myPipe->InterlaceEnable,
930 ProgressiveToInterlaceUnitInOPP,
931 &Tsetup,
932 &Tdmbf,
933 &Tdmec,
934 &Tdmsks);
935
936 LineTime = myPipe->HTotal / myPipe->PixelClock;
937 trip_to_mem = UrgentLatency;
938 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
939
940 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
941 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
942 } else {
943 *Tdmdl = TWait + UrgentExtraLatency;
944 }
945
946 if (DynamicMetadataEnable == true) {
947 if (VStartup * LineTime < Tsetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
948 *NotEnoughTimeForDynamicMetadata = true;
949 } else {
950 *NotEnoughTimeForDynamicMetadata = false;
951 dml_print("DML: Not Enough Time for Dynamic Meta!\n");
952 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
953 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
954 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
955 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
956 }
957 } else {
958 *NotEnoughTimeForDynamicMetadata = false;
959 }
960
961 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
962
963 if (myPipe->ScalerEnabled)
964 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
965 else
966 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
967
968 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
969
970 DISPCLKCycles = DISPCLKDelaySubtotal;
971
972 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
973 return true;
974
975 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK
976 + DSCDelay;
977
978 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
979
980 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
981 *DSTYAfterScaler = 1;
982 else
983 *DSTYAfterScaler = 0;
984
985 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
986 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
987 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
988
989 MyError = false;
990
991
992 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
993 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
994 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
995
996 if (GPUVMEnable) {
997 if (GPUVMPageTableLevels >= 3) {
998 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
999 } else
1000 *Tno_bw = 0;
1001 } else if (!myPipe->DCCEnable)
1002 *Tno_bw = LineTime;
1003 else
1004 *Tno_bw = LineTime / 4;
1005
1006 dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime
1007 - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1008
1009 Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC);
1010 Tsw_oto = Lsw_oto * LineTime;
1011
1012 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) / Tsw_oto;
1013
1014 if (GPUVMEnable == true) {
1015 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
1016 Tvm_trips,
1017 LineTime / 4.0);
1018 } else
1019 Tvm_oto = LineTime / 4.0;
1020
1021 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1022 Tr0_oto = dml_max3(
1023 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
1024 LineTime - Tvm_oto, LineTime / 4);
1025 } else
1026 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1027
1028 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1029 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1030 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1031
1032 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1033 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1034
1035 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1036 dml_print("DML: dst_y_prefetch_equ: %f\n", dst_y_prefetch_equ);
1037
1038 dml_print("DML: LineTime: %f\n", LineTime);
1039 dml_print("DML: VStartup: %d\n", VStartup);
1040 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1041 dml_print("DML: Tsetup: %fus - time from vstartup to vready\n", Tsetup);
1042 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1043 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1044 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1045 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1046 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1047 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1048 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1049 dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1050 dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)*DSTYAfterScaler);
1051
1052 *PrefetchBandwidth = 0;
1053 *DestinationLinesToRequestVMInVBlank = 0;
1054 *DestinationLinesToRequestRowInVBlank = 0;
1055 *VRatioPrefetchY = 0;
1056 *VRatioPrefetchC = 0;
1057 *RequiredPrefetchPixDataBWLuma = 0;
1058 if (dst_y_prefetch_equ > 1) {
1059 double PrefetchBandwidth1 = 0;
1060 double PrefetchBandwidth2 = 0;
1061 double PrefetchBandwidth3 = 0;
1062 double PrefetchBandwidth4 = 0;
1063
1064 if (Tpre_rounded - *Tno_bw > 0)
1065 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
1066 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1067 + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY
1068 + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1069 / (Tpre_rounded - *Tno_bw);
1070 else
1071 PrefetchBandwidth1 = 0;
1072
1073 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw) > 0) {
1074 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw);
1075 }
1076
1077 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1078 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
1079 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1080 swath_width_luma_ub * BytePerPixelY +
1081 PrefetchSourceLinesC * swath_width_chroma_ub *
1082 BytePerPixelC) /
1083 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1084 else
1085 PrefetchBandwidth2 = 0;
1086
1087 if (Tpre_rounded - Tvm_trips_rounded > 0)
1088 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow *
1089 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1090 swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC *
1091 swath_width_chroma_ub * BytePerPixelC) / (Tpre_rounded -
1092 Tvm_trips_rounded);
1093 else
1094 PrefetchBandwidth3 = 0;
1095
1096 if (VStartup == MaxVStartup && (PrefetchBandwidth3 > 4 * prefetch_bw_oto) && Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1097 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded);
1098 }
1099
1100 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1101 PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1102 / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1103 else
1104 PrefetchBandwidth4 = 0;
1105
1106 {
1107 bool Case1OK;
1108 bool Case2OK;
1109 bool Case3OK;
1110
1111 if (PrefetchBandwidth1 > 0) {
1112 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
1113 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1114 Case1OK = true;
1115 } else {
1116 Case1OK = false;
1117 }
1118 } else {
1119 Case1OK = false;
1120 }
1121
1122 if (PrefetchBandwidth2 > 0) {
1123 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
1124 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1125 Case2OK = true;
1126 } else {
1127 Case2OK = false;
1128 }
1129 } else {
1130 Case2OK = false;
1131 }
1132
1133 if (PrefetchBandwidth3 > 0) {
1134 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3
1135 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1136 Case3OK = true;
1137 } else {
1138 Case3OK = false;
1139 }
1140 } else {
1141 Case3OK = false;
1142 }
1143
1144 if (Case1OK) {
1145 prefetch_bw_equ = PrefetchBandwidth1;
1146 } else if (Case2OK) {
1147 prefetch_bw_equ = PrefetchBandwidth2;
1148 } else if (Case3OK) {
1149 prefetch_bw_equ = PrefetchBandwidth3;
1150 } else {
1151 prefetch_bw_equ = PrefetchBandwidth4;
1152 }
1153
1154 dml_print("DML: prefetch_bw_equ: %f\n", prefetch_bw_equ);
1155
1156 if (prefetch_bw_equ > 0) {
1157 if (GPUVMEnable) {
1158 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1159 } else {
1160 Tvm_equ = LineTime / 4;
1161 }
1162
1163 if ((GPUVMEnable || myPipe->DCCEnable)) {
1164 Tr0_equ = dml_max4(
1165 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1166 Tr0_trips,
1167 (LineTime - Tvm_equ) / 2,
1168 LineTime / 4);
1169 } else {
1170 Tr0_equ = (LineTime - Tvm_equ) / 2;
1171 }
1172 } else {
1173 Tvm_equ = 0;
1174 Tr0_equ = 0;
1175 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1176 }
1177 }
1178
1179 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1180 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1181 TimeForFetchingMetaPTE = Tvm_oto;
1182 TimeForFetchingRowInVBlank = Tr0_oto;
1183 *PrefetchBandwidth = prefetch_bw_oto;
1184 } else {
1185 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1186 TimeForFetchingMetaPTE = Tvm_equ;
1187 TimeForFetchingRowInVBlank = Tr0_equ;
1188 *PrefetchBandwidth = prefetch_bw_equ;
1189 }
1190
1191 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1192
1193 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1194
1195
1196 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank
1197 - 2 * *DestinationLinesToRequestRowInVBlank;
1198
1199 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1200
1201 *VRatioPrefetchY = (double) PrefetchSourceLinesY
1202 / LinesToRequestPrefetchPixelData;
1203 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1204 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1205 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1206 *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1207 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1208 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1209 } else {
1210 MyError = true;
1211 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1212 *VRatioPrefetchY = 0;
1213 }
1214 }
1215
1216 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1217 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1218
1219 if ((SwathHeightC > 4)) {
1220 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1221 *VRatioPrefetchC = dml_max(*VRatioPrefetchC,
1222 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1223 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1224 } else {
1225 MyError = true;
1226 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1227 *VRatioPrefetchC = 0;
1228 }
1229 }
1230
1231 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * BytePerPixelY * swath_width_luma_ub / LineTime;
1232 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * BytePerPixelC * swath_width_chroma_ub / LineTime;
1233 } else {
1234 MyError = true;
1235 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1236 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1237 *VRatioPrefetchY = 0;
1238 *VRatioPrefetchC = 0;
1239 *RequiredPrefetchPixDataBWLuma = 0;
1240 *RequiredPrefetchPixDataBWChroma = 0;
1241 }
1242
1243 dml_print("DML: Tpre: %fus - sum of tim to request meta pte, 2 x data pte + meta data, swaths\n", (double)LinesToRequestPrefetchPixelData * LineTime + 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1244 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1245 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1246 dml_print("DML: Tr1: %fus - time to fetch second row of data pagetables and second row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1247 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)LinesToRequestPrefetchPixelData * LineTime);
1248 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1249 dml_print("DML: Tvstartup - Tsetup - Tcalc - Twait - Tpre - To > 0\n");
1250 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup);
1251 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1252
1253 } else {
1254 MyError = true;
1255 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1256 }
1257
1258 {
1259 double prefetch_vm_bw = 0;
1260 double prefetch_row_bw = 0;
1261
1262 if (PDEAndMetaPTEBytesFrame == 0) {
1263 prefetch_vm_bw = 0;
1264 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1265 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1266 } else {
1267 prefetch_vm_bw = 0;
1268 MyError = true;
1269 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1270 }
1271 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1272 prefetch_row_bw = 0;
1273 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1274 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1275 } else {
1276 prefetch_row_bw = 0;
1277 MyError = true;
1278 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1279 }
1280
1281 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1282 }
1283
1284 if (MyError) {
1285 *PrefetchBandwidth = 0;
1286 TimeForFetchingMetaPTE = 0;
1287 TimeForFetchingRowInVBlank = 0;
1288 *DestinationLinesToRequestVMInVBlank = 0;
1289 *DestinationLinesToRequestRowInVBlank = 0;
1290 *DestinationLinesForPrefetch = 0;
1291 LinesToRequestPrefetchPixelData = 0;
1292 *VRatioPrefetchY = 0;
1293 *VRatioPrefetchC = 0;
1294 *RequiredPrefetchPixDataBWLuma = 0;
1295 *RequiredPrefetchPixDataBWChroma = 0;
1296 }
1297
1298 return MyError;
1299 }
1300
RoundToDFSGranularityUp(double Clock,double VCOSpeed)1301 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1302 {
1303 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1304 }
1305
RoundToDFSGranularityDown(double Clock,double VCOSpeed)1306 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1307 {
1308 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1309 }
1310
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,double DETBufferSize,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum scan_direction_class ScanOrientation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)1311 static void CalculateDCCConfiguration(
1312 bool DCCEnabled,
1313 bool DCCProgrammingAssumesScanDirectionUnknown,
1314 enum source_format_class SourcePixelFormat,
1315 unsigned int SurfaceWidthLuma,
1316 unsigned int SurfaceWidthChroma,
1317 unsigned int SurfaceHeightLuma,
1318 unsigned int SurfaceHeightChroma,
1319 double DETBufferSize,
1320 unsigned int RequestHeight256ByteLuma,
1321 unsigned int RequestHeight256ByteChroma,
1322 enum dm_swizzle_mode TilingFormat,
1323 unsigned int BytePerPixelY,
1324 unsigned int BytePerPixelC,
1325 double BytePerPixelDETY,
1326 double BytePerPixelDETC,
1327 enum scan_direction_class ScanOrientation,
1328 unsigned int *MaxUncompressedBlockLuma,
1329 unsigned int *MaxUncompressedBlockChroma,
1330 unsigned int *MaxCompressedBlockLuma,
1331 unsigned int *MaxCompressedBlockChroma,
1332 unsigned int *IndependentBlockLuma,
1333 unsigned int *IndependentBlockChroma)
1334 {
1335 int yuv420 = 0;
1336 int horz_div_l = 0;
1337 int horz_div_c = 0;
1338 int vert_div_l = 0;
1339 int vert_div_c = 0;
1340
1341 int req128_horz_wc_l = 0;
1342 int req128_horz_wc_c = 0;
1343 int req128_vert_wc_l = 0;
1344 int req128_vert_wc_c = 0;
1345 int segment_order_horz_contiguous_luma = 0;
1346 int segment_order_horz_contiguous_chroma = 0;
1347 int segment_order_vert_contiguous_luma = 0;
1348 int segment_order_vert_contiguous_chroma = 0;
1349
1350 long full_swath_bytes_horz_wc_l = 0;
1351 long full_swath_bytes_horz_wc_c = 0;
1352 long full_swath_bytes_vert_wc_l = 0;
1353 long full_swath_bytes_vert_wc_c = 0;
1354
1355 long swath_buf_size = 0;
1356 double detile_buf_vp_horz_limit = 0;
1357 double detile_buf_vp_vert_limit = 0;
1358
1359 long MAS_vp_horz_limit = 0;
1360 long MAS_vp_vert_limit = 0;
1361 long max_vp_horz_width = 0;
1362 long max_vp_vert_height = 0;
1363 long eff_surf_width_l = 0;
1364 long eff_surf_width_c = 0;
1365 long eff_surf_height_l = 0;
1366 long eff_surf_height_c = 0;
1367
1368 typedef enum {
1369 REQ_256Bytes,
1370 REQ_128BytesNonContiguous,
1371 REQ_128BytesContiguous,
1372 REQ_NA
1373 } RequestType;
1374
1375 RequestType RequestLuma;
1376 RequestType RequestChroma;
1377
1378 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1379 horz_div_l = 1;
1380 horz_div_c = 1;
1381 vert_div_l = 1;
1382 vert_div_c = 1;
1383
1384 if (BytePerPixelY == 1)
1385 vert_div_l = 0;
1386 if (BytePerPixelC == 1)
1387 vert_div_c = 0;
1388 if (BytePerPixelY == 8
1389 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1390 || TilingFormat == dm_sw_64kb_s_x))
1391 horz_div_l = 0;
1392 if (BytePerPixelC == 8
1393 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1394 || TilingFormat == dm_sw_64kb_s_x))
1395 horz_div_c = 0;
1396
1397 if (BytePerPixelC == 0) {
1398 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1399 detile_buf_vp_horz_limit = (double) swath_buf_size
1400 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1401 / (1 + horz_div_l));
1402 detile_buf_vp_vert_limit = (double) swath_buf_size
1403 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1404 } else {
1405 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1406 detile_buf_vp_horz_limit = (double) swath_buf_size
1407 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1408 / (1 + horz_div_l)
1409 + (double) RequestHeight256ByteChroma
1410 * BytePerPixelC / (1 + horz_div_c)
1411 / (1 + yuv420));
1412 detile_buf_vp_vert_limit = (double) swath_buf_size
1413 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)
1414 + 256.0 / RequestHeight256ByteChroma
1415 / (1 + vert_div_c) / (1 + yuv420));
1416 }
1417
1418 if (SourcePixelFormat == dm_420_10) {
1419 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1420 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1421 }
1422
1423 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1424 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1425
1426 MAS_vp_horz_limit = 5760;
1427 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1428 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1429 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1430 eff_surf_width_l =
1431 (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1432 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1433 eff_surf_height_l = (
1434 SurfaceHeightLuma > max_vp_vert_height ?
1435 max_vp_vert_height : SurfaceHeightLuma);
1436 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1437
1438 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1439 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1440 if (BytePerPixelC > 0) {
1441 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma
1442 * BytePerPixelC;
1443 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1444 } else {
1445 full_swath_bytes_horz_wc_c = 0;
1446 full_swath_bytes_vert_wc_c = 0;
1447 }
1448
1449 if (SourcePixelFormat == dm_420_10) {
1450 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1451 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1452 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1453 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1454 }
1455
1456 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1457 req128_horz_wc_l = 0;
1458 req128_horz_wc_c = 0;
1459 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c
1460 && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c
1461 <= DETBufferSize) {
1462 req128_horz_wc_l = 0;
1463 req128_horz_wc_c = 1;
1464 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c
1465 && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c
1466 <= DETBufferSize) {
1467 req128_horz_wc_l = 1;
1468 req128_horz_wc_c = 0;
1469 } else {
1470 req128_horz_wc_l = 1;
1471 req128_horz_wc_c = 1;
1472 }
1473
1474 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1475 req128_vert_wc_l = 0;
1476 req128_vert_wc_c = 0;
1477 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c
1478 && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c
1479 <= DETBufferSize) {
1480 req128_vert_wc_l = 0;
1481 req128_vert_wc_c = 1;
1482 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c
1483 && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c
1484 <= DETBufferSize) {
1485 req128_vert_wc_l = 1;
1486 req128_vert_wc_c = 0;
1487 } else {
1488 req128_vert_wc_l = 1;
1489 req128_vert_wc_c = 1;
1490 }
1491
1492 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1493 segment_order_horz_contiguous_luma = 0;
1494 } else {
1495 segment_order_horz_contiguous_luma = 1;
1496 }
1497 if ((BytePerPixelY == 8
1498 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1499 || TilingFormat == dm_sw_64kb_d_t
1500 || TilingFormat == dm_sw_64kb_r_x))
1501 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1502 segment_order_vert_contiguous_luma = 0;
1503 } else {
1504 segment_order_vert_contiguous_luma = 1;
1505 }
1506 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1507 segment_order_horz_contiguous_chroma = 0;
1508 } else {
1509 segment_order_horz_contiguous_chroma = 1;
1510 }
1511 if ((BytePerPixelC == 8
1512 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1513 || TilingFormat == dm_sw_64kb_d_t
1514 || TilingFormat == dm_sw_64kb_r_x))
1515 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1516 segment_order_vert_contiguous_chroma = 0;
1517 } else {
1518 segment_order_vert_contiguous_chroma = 1;
1519 }
1520
1521 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1522 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1523 RequestLuma = REQ_256Bytes;
1524 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0)
1525 || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1526 RequestLuma = REQ_128BytesNonContiguous;
1527 } else {
1528 RequestLuma = REQ_128BytesContiguous;
1529 }
1530 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1531 RequestChroma = REQ_256Bytes;
1532 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0)
1533 || (req128_vert_wc_c == 1
1534 && segment_order_vert_contiguous_chroma == 0)) {
1535 RequestChroma = REQ_128BytesNonContiguous;
1536 } else {
1537 RequestChroma = REQ_128BytesContiguous;
1538 }
1539 } else if (ScanOrientation != dm_vert) {
1540 if (req128_horz_wc_l == 0) {
1541 RequestLuma = REQ_256Bytes;
1542 } else if (segment_order_horz_contiguous_luma == 0) {
1543 RequestLuma = REQ_128BytesNonContiguous;
1544 } else {
1545 RequestLuma = REQ_128BytesContiguous;
1546 }
1547 if (req128_horz_wc_c == 0) {
1548 RequestChroma = REQ_256Bytes;
1549 } else if (segment_order_horz_contiguous_chroma == 0) {
1550 RequestChroma = REQ_128BytesNonContiguous;
1551 } else {
1552 RequestChroma = REQ_128BytesContiguous;
1553 }
1554 } else {
1555 if (req128_vert_wc_l == 0) {
1556 RequestLuma = REQ_256Bytes;
1557 } else if (segment_order_vert_contiguous_luma == 0) {
1558 RequestLuma = REQ_128BytesNonContiguous;
1559 } else {
1560 RequestLuma = REQ_128BytesContiguous;
1561 }
1562 if (req128_vert_wc_c == 0) {
1563 RequestChroma = REQ_256Bytes;
1564 } else if (segment_order_vert_contiguous_chroma == 0) {
1565 RequestChroma = REQ_128BytesNonContiguous;
1566 } else {
1567 RequestChroma = REQ_128BytesContiguous;
1568 }
1569 }
1570
1571 if (RequestLuma == REQ_256Bytes) {
1572 *MaxUncompressedBlockLuma = 256;
1573 *MaxCompressedBlockLuma = 256;
1574 *IndependentBlockLuma = 0;
1575 } else if (RequestLuma == REQ_128BytesContiguous) {
1576 *MaxUncompressedBlockLuma = 256;
1577 *MaxCompressedBlockLuma = 128;
1578 *IndependentBlockLuma = 128;
1579 } else {
1580 *MaxUncompressedBlockLuma = 256;
1581 *MaxCompressedBlockLuma = 64;
1582 *IndependentBlockLuma = 64;
1583 }
1584
1585 if (RequestChroma == REQ_256Bytes) {
1586 *MaxUncompressedBlockChroma = 256;
1587 *MaxCompressedBlockChroma = 256;
1588 *IndependentBlockChroma = 0;
1589 } else if (RequestChroma == REQ_128BytesContiguous) {
1590 *MaxUncompressedBlockChroma = 256;
1591 *MaxCompressedBlockChroma = 128;
1592 *IndependentBlockChroma = 128;
1593 } else {
1594 *MaxUncompressedBlockChroma = 256;
1595 *MaxCompressedBlockChroma = 64;
1596 *IndependentBlockChroma = 64;
1597 }
1598
1599 if (DCCEnabled != true || BytePerPixelC == 0) {
1600 *MaxUncompressedBlockChroma = 0;
1601 *MaxCompressedBlockChroma = 0;
1602 *IndependentBlockChroma = 0;
1603 }
1604
1605 if (DCCEnabled != true) {
1606 *MaxUncompressedBlockLuma = 0;
1607 *MaxCompressedBlockLuma = 0;
1608 *IndependentBlockLuma = 0;
1609 }
1610 }
1611
1612
CalculatePrefetchSourceLines(struct display_mode_lib * mode_lib,double VRatio,double vtaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)1613 static double CalculatePrefetchSourceLines(
1614 struct display_mode_lib *mode_lib,
1615 double VRatio,
1616 double vtaps,
1617 bool Interlace,
1618 bool ProgressiveToInterlaceUnitInOPP,
1619 unsigned int SwathHeight,
1620 unsigned int ViewportYStart,
1621 double *VInitPreFill,
1622 unsigned int *MaxNumSwath)
1623 {
1624 unsigned int MaxPartialSwath = 0;
1625
1626 if (ProgressiveToInterlaceUnitInOPP)
1627 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1628 else
1629 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1630
1631 if (!mode_lib->vba.IgnoreViewportPositioning) {
1632
1633 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1634
1635 if (*VInitPreFill > 1.0)
1636 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1637 else
1638 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1639 % SwathHeight;
1640 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1641
1642 } else {
1643
1644 if (ViewportYStart != 0)
1645 dml_print(
1646 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1647
1648 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1649
1650 if (*VInitPreFill > 1.0)
1651 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1652 else
1653 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1654 % SwathHeight;
1655 }
1656
1657 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1658 }
1659
CalculateVMAndRowBytes(struct display_mode_lib * mode_lib,bool DCCEnable,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum scan_direction_class ScanDirection,unsigned int SwathWidth,unsigned int ViewportHeight,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMinPageSize,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int * MacroTileWidth,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,bool * PTEBufferSizeNotExceeded,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * vm_group_bytes,unsigned int * dpte_group_bytes,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)1660 static unsigned int CalculateVMAndRowBytes(
1661 struct display_mode_lib *mode_lib,
1662 bool DCCEnable,
1663 unsigned int BlockHeight256Bytes,
1664 unsigned int BlockWidth256Bytes,
1665 enum source_format_class SourcePixelFormat,
1666 unsigned int SurfaceTiling,
1667 unsigned int BytePerPixel,
1668 enum scan_direction_class ScanDirection,
1669 unsigned int SwathWidth,
1670 unsigned int ViewportHeight,
1671 bool GPUVMEnable,
1672 bool HostVMEnable,
1673 unsigned int HostVMMaxNonCachedPageTableLevels,
1674 unsigned int GPUVMMinPageSize,
1675 unsigned int HostVMMinPageSize,
1676 unsigned int PTEBufferSizeInRequests,
1677 unsigned int Pitch,
1678 unsigned int DCCMetaPitch,
1679 unsigned int *MacroTileWidth,
1680 unsigned int *MetaRowByte,
1681 unsigned int *PixelPTEBytesPerRow,
1682 bool *PTEBufferSizeNotExceeded,
1683 unsigned int *dpte_row_width_ub,
1684 unsigned int *dpte_row_height,
1685 unsigned int *MetaRequestWidth,
1686 unsigned int *MetaRequestHeight,
1687 unsigned int *meta_row_width,
1688 unsigned int *meta_row_height,
1689 unsigned int *vm_group_bytes,
1690 unsigned int *dpte_group_bytes,
1691 unsigned int *PixelPTEReqWidth,
1692 unsigned int *PixelPTEReqHeight,
1693 unsigned int *PTERequestSize,
1694 unsigned int *DPDE0BytesFrame,
1695 unsigned int *MetaPTEBytesFrame)
1696 {
1697 unsigned int MPDEBytesFrame = 0;
1698 unsigned int DCCMetaSurfaceBytes = 0;
1699 unsigned int MacroTileSizeBytes = 0;
1700 unsigned int MacroTileHeight = 0;
1701 unsigned int ExtraDPDEBytesFrame = 0;
1702 unsigned int PDEAndMetaPTEBytesFrame = 0;
1703 unsigned int PixelPTEReqHeightPTEs = 0;
1704 unsigned int HostVMDynamicLevels = 0;
1705
1706 double FractionOfPTEReturnDrop;
1707
1708 if (GPUVMEnable == true && HostVMEnable == true) {
1709 if (HostVMMinPageSize < 2048) {
1710 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1711 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1712 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1713 } else {
1714 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1715 }
1716 }
1717
1718 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1719 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1720 if (ScanDirection != dm_vert) {
1721 *meta_row_height = *MetaRequestHeight;
1722 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1723 + *MetaRequestWidth;
1724 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1725 } else {
1726 *meta_row_height = *MetaRequestWidth;
1727 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1728 + *MetaRequestHeight;
1729 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1730 }
1731 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1732 + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1733 if (GPUVMEnable == true) {
1734 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1735 MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1);
1736 } else {
1737 *MetaPTEBytesFrame = 0;
1738 MPDEBytesFrame = 0;
1739 }
1740
1741 if (DCCEnable != true) {
1742 *MetaPTEBytesFrame = 0;
1743 MPDEBytesFrame = 0;
1744 *MetaRowByte = 0;
1745 }
1746
1747 if (SurfaceTiling == dm_sw_linear) {
1748 MacroTileSizeBytes = 256;
1749 MacroTileHeight = BlockHeight256Bytes;
1750 } else {
1751 MacroTileSizeBytes = 65536;
1752 MacroTileHeight = 16 * BlockHeight256Bytes;
1753 }
1754 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1755
1756 if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) {
1757 if (ScanDirection != dm_vert) {
1758 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1759 } else {
1760 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1761 }
1762 ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2);
1763 } else {
1764 *DPDE0BytesFrame = 0;
1765 ExtraDPDEBytesFrame = 0;
1766 }
1767
1768 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1769 + ExtraDPDEBytesFrame;
1770
1771 if (HostVMEnable == true) {
1772 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1773 }
1774
1775 if (SurfaceTiling == dm_sw_linear) {
1776 PixelPTEReqHeightPTEs = 1;
1777 *PixelPTEReqHeight = 1;
1778 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1779 *PTERequestSize = 64;
1780 FractionOfPTEReturnDrop = 0;
1781 } else if (MacroTileSizeBytes == 4096) {
1782 PixelPTEReqHeightPTEs = 1;
1783 *PixelPTEReqHeight = MacroTileHeight;
1784 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1785 *PTERequestSize = 64;
1786 if (ScanDirection != dm_vert)
1787 FractionOfPTEReturnDrop = 0;
1788 else
1789 FractionOfPTEReturnDrop = 7 / 8;
1790 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1791 PixelPTEReqHeightPTEs = 16;
1792 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1793 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1794 *PTERequestSize = 128;
1795 FractionOfPTEReturnDrop = 0;
1796 } else {
1797 PixelPTEReqHeightPTEs = 1;
1798 *PixelPTEReqHeight = MacroTileHeight;
1799 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1800 *PTERequestSize = 64;
1801 FractionOfPTEReturnDrop = 0;
1802 }
1803
1804 if (SurfaceTiling == dm_sw_linear) {
1805 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1806 *dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1807 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1808 } else if (ScanDirection != dm_vert) {
1809 *dpte_row_height = *PixelPTEReqHeight;
1810 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1811 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1812 } else {
1813 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1814 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1815 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1816 }
1817 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1818 <= 64 * PTEBufferSizeInRequests) {
1819 *PTEBufferSizeNotExceeded = true;
1820 } else {
1821 *PTEBufferSizeNotExceeded = false;
1822 }
1823
1824 if (GPUVMEnable != true) {
1825 *PixelPTEBytesPerRow = 0;
1826 *PTEBufferSizeNotExceeded = true;
1827 }
1828 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1829
1830 if (HostVMEnable == true) {
1831 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1832 }
1833
1834 if (HostVMEnable == true) {
1835 *vm_group_bytes = 512;
1836 *dpte_group_bytes = 512;
1837 } else if (GPUVMEnable == true) {
1838 *vm_group_bytes = 2048;
1839 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1840 *dpte_group_bytes = 512;
1841 } else {
1842 *dpte_group_bytes = 2048;
1843 }
1844 } else {
1845 *vm_group_bytes = 0;
1846 *dpte_group_bytes = 0;
1847 }
1848
1849 return PDEAndMetaPTEBytesFrame;
1850 }
1851
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib * mode_lib)1852 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1853 struct display_mode_lib *mode_lib)
1854 {
1855 struct vba_vars_st *v = &mode_lib->vba;
1856 unsigned int j, k;
1857 long ReorderBytes = 0;
1858 unsigned int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
1859 double MaxTotalRDBandwidth = 0;
1860 double MaxTotalRDBandwidthNoUrgentBurst = 0;
1861 bool DestinationLineTimesForPrefetchLessThan2 = false;
1862 bool VRatioPrefetchMoreThan4 = false;
1863 double TWait;
1864
1865 v->WritebackDISPCLK = 0.0;
1866 v->DISPCLKWithRamping = 0;
1867 v->DISPCLKWithoutRamping = 0;
1868 v->GlobalDPPCLK = 0.0;
1869 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
1870 v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] = dml_min3(
1871 v->ReturnBusWidth * v->DCFCLK,
1872 v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth,
1873 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
1874 if (v->HostVMEnable != true) {
1875 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
1876 } else {
1877 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
1878 }
1879 /* End DAL custom code */
1880
1881 // DISPCLK and DPPCLK Calculation
1882 //
1883 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1884 if (v->WritebackEnable[k]) {
1885 v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK,
1886 dml30_CalculateWriteBackDISPCLK(
1887 v->WritebackPixelFormat[k],
1888 v->PixelClock[k],
1889 v->WritebackHRatio[k],
1890 v->WritebackVRatio[k],
1891 v->WritebackHTaps[k],
1892 v->WritebackVTaps[k],
1893 v->WritebackSourceWidth[k],
1894 v->WritebackDestinationWidth[k],
1895 v->HTotal[k],
1896 v->WritebackLineBufferSize));
1897 }
1898 }
1899
1900 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1901 if (v->HRatio[k] > 1) {
1902 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1903 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
1904 } else {
1905 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1906 v->MaxDCHUBToPSCLThroughput,
1907 v->MaxPSCLToLBThroughput);
1908 }
1909
1910 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
1911 * dml_max(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
1912 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
1913
1914 if ((v->htaps[k] > 6 || v->vtaps[k] > 6)
1915 && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
1916 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
1917 }
1918
1919 if ((v->SourcePixelFormat[k] != dm_420_8
1920 && v->SourcePixelFormat[k] != dm_420_10
1921 && v->SourcePixelFormat[k] != dm_420_12
1922 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
1923 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1924 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
1925 } else {
1926 if (v->HRatioChroma[k] > 1) {
1927 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1928 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
1929 } else {
1930 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
1931 v->MaxDCHUBToPSCLThroughput,
1932 v->MaxPSCLToLBThroughput);
1933 }
1934 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
1935 * dml_max3(v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
1936 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 1.0);
1937
1938 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6)
1939 && v->DPPCLKUsingSingleDPPChroma
1940 < 2 * v->PixelClock[k]) {
1941 v->DPPCLKUsingSingleDPPChroma = 2
1942 * v->PixelClock[k];
1943 }
1944
1945 v->DPPCLKUsingSingleDPP[k] = dml_max(
1946 v->DPPCLKUsingSingleDPPLuma,
1947 v->DPPCLKUsingSingleDPPChroma);
1948 }
1949 }
1950
1951 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1952 if (v->BlendingAndTiming[k] != k)
1953 continue;
1954 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
1955 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1956 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1957 * (1 + v->DISPCLKRampingMargin / 100));
1958 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1959 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1960 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
1961 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1962 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1963 * (1 + v->DISPCLKRampingMargin / 100));
1964 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1965 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1966 } else {
1967 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1968 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1969 * (1 + v->DISPCLKRampingMargin / 100));
1970 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1971 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1972 }
1973 }
1974
1975 v->DISPCLKWithRamping = dml_max(
1976 v->DISPCLKWithRamping,
1977 v->WritebackDISPCLK);
1978 v->DISPCLKWithoutRamping = dml_max(
1979 v->DISPCLKWithoutRamping,
1980 v->WritebackDISPCLK);
1981
1982 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
1983 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1984 v->DISPCLKWithRamping,
1985 v->DISPCLKDPPCLKVCOSpeed);
1986 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1987 v->DISPCLKWithoutRamping,
1988 v->DISPCLKDPPCLKVCOSpeed);
1989 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
1990 v->soc.clock_limits[mode_lib->soc.num_states - 1].dispclk_mhz,
1991 v->DISPCLKDPPCLKVCOSpeed);
1992 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity
1993 > v->MaxDispclkRoundedToDFSGranularity) {
1994 v->DISPCLK_calculated =
1995 v->DISPCLKWithoutRampingRoundedToDFSGranularity;
1996 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity
1997 > v->MaxDispclkRoundedToDFSGranularity) {
1998 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
1999 } else {
2000 v->DISPCLK_calculated =
2001 v->DISPCLKWithRampingRoundedToDFSGranularity;
2002 }
2003 v->DISPCLK = v->DISPCLK_calculated;
2004 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2005
2006 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2007 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k]
2008 / v->DPPPerPlane[k]
2009 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2010 v->GlobalDPPCLK = dml_max(
2011 v->GlobalDPPCLK,
2012 v->DPPCLK_calculated[k]);
2013 }
2014 v->GlobalDPPCLK = RoundToDFSGranularityUp(
2015 v->GlobalDPPCLK,
2016 v->DISPCLKDPPCLKVCOSpeed);
2017 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2018 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255
2019 * dml_ceil(
2020 v->DPPCLK_calculated[k] * 255.0
2021 / v->GlobalDPPCLK,
2022 1);
2023 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2024 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2025 }
2026
2027 // Urgent and B P-State/DRAM Clock Change Watermark
2028 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2029 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2030
2031 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2032 dml30_CalculateBytePerPixelAnd256BBlockSizes(
2033 v->SourcePixelFormat[k],
2034 v->SurfaceTiling[k],
2035 &v->BytePerPixelY[k],
2036 &v->BytePerPixelC[k],
2037 &v->BytePerPixelDETY[k],
2038 &v->BytePerPixelDETC[k],
2039 &v->BlockHeight256BytesY[k],
2040 &v->BlockHeight256BytesC[k],
2041 &v->BlockWidth256BytesY[k],
2042 &v->BlockWidth256BytesC[k]);
2043 }
2044
2045 CalculateSwathWidth(
2046 false,
2047 v->NumberOfActivePlanes,
2048 v->SourcePixelFormat,
2049 v->SourceScan,
2050 v->ViewportWidth,
2051 v->ViewportHeight,
2052 v->SurfaceWidthY,
2053 v->SurfaceWidthC,
2054 v->SurfaceHeightY,
2055 v->SurfaceHeightC,
2056 v->ODMCombineEnabled,
2057 v->BytePerPixelY,
2058 v->BytePerPixelC,
2059 v->BlockHeight256BytesY,
2060 v->BlockHeight256BytesC,
2061 v->BlockWidth256BytesY,
2062 v->BlockWidth256BytesC,
2063 v->BlendingAndTiming,
2064 v->HActive,
2065 v->HRatio,
2066 v->DPPPerPlane,
2067 v->SwathWidthSingleDPPY,
2068 v->SwathWidthSingleDPPC,
2069 v->SwathWidthY,
2070 v->SwathWidthC,
2071 v->dummyinteger3,
2072 v->dummyinteger4,
2073 v->swath_width_luma_ub,
2074 v->swath_width_chroma_ub);
2075
2076
2077 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2078 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2079 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioChroma[k];
2080 DTRACE("read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2081 }
2082
2083
2084 // DCFCLK Deep Sleep
2085 CalculateDCFCLKDeepSleep(
2086 mode_lib,
2087 v->NumberOfActivePlanes,
2088 v->BytePerPixelY,
2089 v->BytePerPixelC,
2090 v->VRatio,
2091 v->VRatioChroma,
2092 v->SwathWidthY,
2093 v->SwathWidthC,
2094 v->DPPPerPlane,
2095 v->HRatio,
2096 v->HRatioChroma,
2097 v->PixelClock,
2098 v->PSCL_THROUGHPUT_LUMA,
2099 v->PSCL_THROUGHPUT_CHROMA,
2100 v->DPPCLK,
2101 v->ReadBandwidthPlaneLuma,
2102 v->ReadBandwidthPlaneChroma,
2103 v->ReturnBusWidth,
2104 &v->DCFCLKDeepSleep);
2105
2106 // DSCCLK
2107 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2108 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2109 v->DSCCLK_calculated[k] = 0.0;
2110 } else {
2111 if (v->OutputFormat[k] == dm_420)
2112 v->DSCFormatFactor = 2;
2113 else if (v->OutputFormat[k] == dm_444)
2114 v->DSCFormatFactor = 1;
2115 else if (v->OutputFormat[k] == dm_n422)
2116 v->DSCFormatFactor = 2;
2117 else
2118 v->DSCFormatFactor = 1;
2119 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2120 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12
2121 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2122 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2123 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6
2124 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2125 else
2126 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3
2127 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2128 }
2129 }
2130
2131 // DSC Delay
2132 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2133 double BPP = v->OutputBppPerState[k][v->VoltageLevel];
2134
2135 if (v->DSCEnabled[k] && BPP != 0) {
2136 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2137 v->DSCDelay[k] = dscceComputeDelay(v->DSCInputBitPerComponent[k],
2138 BPP,
2139 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2140 v->NumberOfDSCSlices[k],
2141 v->OutputFormat[k],
2142 v->Output[k])
2143 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2144 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2145 v->DSCDelay[k] = 2 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2146 BPP,
2147 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2148 v->NumberOfDSCSlices[k] / 2.0,
2149 v->OutputFormat[k],
2150 v->Output[k])
2151 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2152 } else {
2153 v->DSCDelay[k] = 4 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2154 BPP,
2155 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2156 v->NumberOfDSCSlices[k] / 4.0,
2157 v->OutputFormat[k],
2158 v->Output[k])
2159 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2160 }
2161 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2162 } else {
2163 v->DSCDelay[k] = 0;
2164 }
2165 }
2166
2167 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2168 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2169 if (j != k && v->BlendingAndTiming[k] == j
2170 && v->DSCEnabled[j])
2171 v->DSCDelay[k] = v->DSCDelay[j];
2172
2173 // Prefetch
2174 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2175 unsigned int PDEAndMetaPTEBytesFrameY = 0;
2176 unsigned int PixelPTEBytesPerRowY = 0;
2177 unsigned int MetaRowByteY = 0;
2178 unsigned int MetaRowByteC = 0;
2179 unsigned int PDEAndMetaPTEBytesFrameC = 0;
2180 unsigned int PixelPTEBytesPerRowC = 0;
2181 bool PTEBufferSizeNotExceededY = 0;
2182 bool PTEBufferSizeNotExceededC = 0;
2183
2184
2185 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2186 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2187 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2188 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2189 } else {
2190 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2191 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2192
2193 }
2194 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2195 mode_lib,
2196 v->DCCEnable[k],
2197 v->BlockHeight256BytesC[k],
2198 v->BlockWidth256BytesC[k],
2199 v->SourcePixelFormat[k],
2200 v->SurfaceTiling[k],
2201 v->BytePerPixelC[k],
2202 v->SourceScan[k],
2203 v->SwathWidthC[k],
2204 v->ViewportHeightChroma[k],
2205 v->GPUVMEnable,
2206 v->HostVMEnable,
2207 v->HostVMMaxNonCachedPageTableLevels,
2208 v->GPUVMMinPageSize,
2209 v->HostVMMinPageSize,
2210 v->PTEBufferSizeInRequestsForChroma,
2211 v->PitchC[k],
2212 v->DCCMetaPitchC[k],
2213 &v->MacroTileWidthC[k],
2214 &MetaRowByteC,
2215 &PixelPTEBytesPerRowC,
2216 &PTEBufferSizeNotExceededC,
2217 &v->dpte_row_width_chroma_ub[k],
2218 &v->dpte_row_height_chroma[k],
2219 &v->meta_req_width_chroma[k],
2220 &v->meta_req_height_chroma[k],
2221 &v->meta_row_width_chroma[k],
2222 &v->meta_row_height_chroma[k],
2223 &v->dummyinteger1,
2224 &v->dummyinteger2,
2225 &v->PixelPTEReqWidthC[k],
2226 &v->PixelPTEReqHeightC[k],
2227 &v->PTERequestSizeC[k],
2228 &v->dpde0_bytes_per_frame_ub_c[k],
2229 &v->meta_pte_bytes_per_frame_ub_c[k]);
2230
2231 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2232 mode_lib,
2233 v->VRatioChroma[k],
2234 v->VTAPsChroma[k],
2235 v->Interlace[k],
2236 v->ProgressiveToInterlaceUnitInOPP,
2237 v->SwathHeightC[k],
2238 v->ViewportYStartC[k],
2239 &v->VInitPreFillC[k],
2240 &v->MaxNumSwathC[k]);
2241 } else {
2242 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2243 v->PTEBufferSizeInRequestsForChroma = 0;
2244 PixelPTEBytesPerRowC = 0;
2245 PDEAndMetaPTEBytesFrameC = 0;
2246 MetaRowByteC = 0;
2247 v->MaxNumSwathC[k] = 0;
2248 v->PrefetchSourceLinesC[k] = 0;
2249 }
2250
2251 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2252 mode_lib,
2253 v->DCCEnable[k],
2254 v->BlockHeight256BytesY[k],
2255 v->BlockWidth256BytesY[k],
2256 v->SourcePixelFormat[k],
2257 v->SurfaceTiling[k],
2258 v->BytePerPixelY[k],
2259 v->SourceScan[k],
2260 v->SwathWidthY[k],
2261 v->ViewportHeight[k],
2262 v->GPUVMEnable,
2263 v->HostVMEnable,
2264 v->HostVMMaxNonCachedPageTableLevels,
2265 v->GPUVMMinPageSize,
2266 v->HostVMMinPageSize,
2267 v->PTEBufferSizeInRequestsForLuma,
2268 v->PitchY[k],
2269 v->DCCMetaPitchY[k],
2270 &v->MacroTileWidthY[k],
2271 &MetaRowByteY,
2272 &PixelPTEBytesPerRowY,
2273 &PTEBufferSizeNotExceededY,
2274 &v->dpte_row_width_luma_ub[k],
2275 &v->dpte_row_height[k],
2276 &v->meta_req_width[k],
2277 &v->meta_req_height[k],
2278 &v->meta_row_width[k],
2279 &v->meta_row_height[k],
2280 &v->vm_group_bytes[k],
2281 &v->dpte_group_bytes[k],
2282 &v->PixelPTEReqWidthY[k],
2283 &v->PixelPTEReqHeightY[k],
2284 &v->PTERequestSizeY[k],
2285 &v->dpde0_bytes_per_frame_ub_l[k],
2286 &v->meta_pte_bytes_per_frame_ub_l[k]);
2287
2288 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2289 mode_lib,
2290 v->VRatio[k],
2291 v->vtaps[k],
2292 v->Interlace[k],
2293 v->ProgressiveToInterlaceUnitInOPP,
2294 v->SwathHeightY[k],
2295 v->ViewportYStartY[k],
2296 &v->VInitPreFillY[k],
2297 &v->MaxNumSwathY[k]);
2298 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2299 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
2300 + PDEAndMetaPTEBytesFrameC;
2301 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2302
2303 CalculateRowBandwidth(
2304 v->GPUVMEnable,
2305 v->SourcePixelFormat[k],
2306 v->VRatio[k],
2307 v->VRatioChroma[k],
2308 v->DCCEnable[k],
2309 v->HTotal[k] / v->PixelClock[k],
2310 MetaRowByteY,
2311 MetaRowByteC,
2312 v->meta_row_height[k],
2313 v->meta_row_height_chroma[k],
2314 PixelPTEBytesPerRowY,
2315 PixelPTEBytesPerRowC,
2316 v->dpte_row_height[k],
2317 v->dpte_row_height_chroma[k],
2318 &v->meta_row_bw[k],
2319 &v->dpte_row_bw[k]);
2320 }
2321
2322 v->TotalDCCActiveDPP = 0;
2323 v->TotalActiveDPP = 0;
2324 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2325 v->TotalActiveDPP = v->TotalActiveDPP
2326 + v->DPPPerPlane[k];
2327 if (v->DCCEnable[k])
2328 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP
2329 + v->DPPPerPlane[k];
2330 }
2331
2332
2333 ReorderBytes = v->NumberOfChannels * dml_max3(
2334 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2335 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2336 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2337
2338 v->UrgentExtraLatency = CalculateExtraLatency(
2339 v->RoundTripPingLatencyCycles,
2340 ReorderBytes,
2341 v->DCFCLK,
2342 v->TotalActiveDPP,
2343 v->PixelChunkSizeInKByte,
2344 v->TotalDCCActiveDPP,
2345 v->MetaChunkSize,
2346 v->ReturnBW,
2347 v->GPUVMEnable,
2348 v->HostVMEnable,
2349 v->NumberOfActivePlanes,
2350 v->DPPPerPlane,
2351 v->dpte_group_bytes,
2352 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2353 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2354 v->HostVMMinPageSize,
2355 v->HostVMMaxNonCachedPageTableLevels);
2356
2357 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2358
2359 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2360 if (v->BlendingAndTiming[k] == k) {
2361 if (v->WritebackEnable[k] == true) {
2362 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency +
2363 CalculateWriteBackDelay(v->WritebackPixelFormat[k],
2364 v->WritebackHRatio[k],
2365 v->WritebackVRatio[k],
2366 v->WritebackVTaps[k],
2367 v->WritebackDestinationWidth[k],
2368 v->WritebackDestinationHeight[k],
2369 v->WritebackSourceHeight[k],
2370 v->HTotal[k]) / v->DISPCLK;
2371 } else
2372 v->WritebackDelay[v->VoltageLevel][k] = 0;
2373 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2374 if (v->BlendingAndTiming[j] == k
2375 && v->WritebackEnable[j] == true) {
2376 v->WritebackDelay[v->VoltageLevel][k] = dml_max(v->WritebackDelay[v->VoltageLevel][k],
2377 v->WritebackLatency + CalculateWriteBackDelay(
2378 v->WritebackPixelFormat[j],
2379 v->WritebackHRatio[j],
2380 v->WritebackVRatio[j],
2381 v->WritebackVTaps[j],
2382 v->WritebackDestinationWidth[j],
2383 v->WritebackDestinationHeight[j],
2384 v->WritebackSourceHeight[j],
2385 v->HTotal[k]) / v->DISPCLK);
2386 }
2387 }
2388 }
2389 }
2390
2391 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2392 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2393 if (v->BlendingAndTiming[k] == j)
2394 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2395
2396 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2397 v->MaxVStartupLines[k] = v->VTotal[k] - v->VActive[k] - dml_max(1.0, dml_ceil((double) v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1));
2398 }
2399
2400 v->MaximumMaxVStartupLines = 0;
2401 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2402 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2403
2404 if (v->DRAMClockChangeLatencyOverride > 0.0) {
2405 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatencyOverride;
2406 } else {
2407 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatency;
2408 }
2409 v->UrgentLatency = CalculateUrgentLatency(v->UrgentLatencyPixelDataOnly, v->UrgentLatencyPixelMixedWithVMData, v->UrgentLatencyVMDataOnly, v->DoUrgentLatencyAdjustment, v->UrgentLatencyAdjustmentFabricClockComponent, v->UrgentLatencyAdjustmentFabricClockReference, v->FabricClock);
2410
2411
2412 v->FractionOfUrgentBandwidth = 0.0;
2413 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2414
2415 v->VStartupLines = 13;
2416
2417 do {
2418 MaxTotalRDBandwidth = 0;
2419 MaxTotalRDBandwidthNoUrgentBurst = 0;
2420 DestinationLineTimesForPrefetchLessThan2 = false;
2421 VRatioPrefetchMoreThan4 = false;
2422 TWait = CalculateTWait(
2423 PrefetchMode,
2424 v->FinalDRAMClockChangeLatency,
2425 v->UrgentLatency,
2426 v->SREnterPlusExitTime);
2427
2428 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2429 Pipe myPipe = { 0 };
2430
2431 myPipe.DPPCLK = v->DPPCLK[k];
2432 myPipe.DISPCLK = v->DISPCLK;
2433 myPipe.PixelClock = v->PixelClock[k];
2434 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2435 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2436 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2437 myPipe.SourceScan = v->SourceScan[k];
2438 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2439 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2440 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2441 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2442 myPipe.InterlaceEnable = v->Interlace[k];
2443 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2444 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2445 myPipe.HTotal = v->HTotal[k];
2446 myPipe.DCCEnable = v->DCCEnable[k];
2447 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
2448
2449 v->ErrorResult[k] = CalculatePrefetchSchedule(
2450 mode_lib,
2451 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2452 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2453 &myPipe,
2454 v->DSCDelay[k],
2455 v->DPPCLKDelaySubtotal
2456 + v->DPPCLKDelayCNVCFormater,
2457 v->DPPCLKDelaySCL,
2458 v->DPPCLKDelaySCLLBOnly,
2459 v->DPPCLKDelayCNVCCursor,
2460 v->DISPCLKDelaySubtotal,
2461 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2462 v->OutputFormat[k],
2463 v->MaxInterDCNTileRepeaters,
2464 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2465 v->MaxVStartupLines[k],
2466 v->GPUVMMaxPageTableLevels,
2467 v->GPUVMEnable,
2468 v->HostVMEnable,
2469 v->HostVMMaxNonCachedPageTableLevels,
2470 v->HostVMMinPageSize,
2471 v->DynamicMetadataEnable[k],
2472 v->DynamicMetadataVMEnabled,
2473 v->DynamicMetadataLinesBeforeActiveRequired[k],
2474 v->DynamicMetadataTransmittedBytes[k],
2475 v->UrgentLatency,
2476 v->UrgentExtraLatency,
2477 v->TCalc,
2478 v->PDEAndMetaPTEBytesFrame[k],
2479 v->MetaRowByte[k],
2480 v->PixelPTEBytesPerRow[k],
2481 v->PrefetchSourceLinesY[k],
2482 v->SwathWidthY[k],
2483 v->BytePerPixelY[k],
2484 v->VInitPreFillY[k],
2485 v->MaxNumSwathY[k],
2486 v->PrefetchSourceLinesC[k],
2487 v->SwathWidthC[k],
2488 v->BytePerPixelC[k],
2489 v->VInitPreFillC[k],
2490 v->MaxNumSwathC[k],
2491 v->swath_width_luma_ub[k],
2492 v->swath_width_chroma_ub[k],
2493 v->SwathHeightY[k],
2494 v->SwathHeightC[k],
2495 TWait,
2496 v->ProgressiveToInterlaceUnitInOPP,
2497 &v->DSTXAfterScaler[k],
2498 &v->DSTYAfterScaler[k],
2499 &v->DestinationLinesForPrefetch[k],
2500 &v->PrefetchBandwidth[k],
2501 &v->DestinationLinesToRequestVMInVBlank[k],
2502 &v->DestinationLinesToRequestRowInVBlank[k],
2503 &v->VRatioPrefetchY[k],
2504 &v->VRatioPrefetchC[k],
2505 &v->RequiredPrefetchPixDataBWLuma[k],
2506 &v->RequiredPrefetchPixDataBWChroma[k],
2507 &v->NotEnoughTimeForDynamicMetadata[k],
2508 &v->Tno_bw[k],
2509 &v->prefetch_vmrow_bw[k],
2510 &v->Tdmdl_vm[k],
2511 &v->Tdmdl[k],
2512 &v->VUpdateOffsetPix[k],
2513 &v->VUpdateWidthPix[k],
2514 &v->VReadyOffsetPix[k]);
2515 if (v->BlendingAndTiming[k] == k) {
2516 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2517 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[k];
2518 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[k];
2519 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[k] / 4.0, 1);
2520 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2521 } else {
2522 int x = v->BlendingAndTiming[k];
2523 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2524 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[x];
2525 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[x];
2526 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[x] / 4.0, 1);
2527 if (!v->MaxVStartupLines[x])
2528 v->MaxVStartupLines[x] = v->MaxVStartupLines[k];
2529 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[x]);
2530 }
2531 }
2532
2533 v->NotEnoughUrgentLatencyHiding[0][0] = false;
2534 v->NotEnoughUrgentLatencyHidingPre = false;
2535
2536 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2537 v->cursor_bw[k] = v->NumberOfCursors[k]
2538 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2539 / 8.0
2540 / (v->HTotal[k] / v->PixelClock[k])
2541 * v->VRatio[k];
2542 v->cursor_bw_pre[k] = v->NumberOfCursors[k]
2543 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2544 / 8.0
2545 / (v->HTotal[k] / v->PixelClock[k])
2546 * v->VRatioPrefetchY[k];
2547
2548 CalculateUrgentBurstFactor(
2549 v->swath_width_luma_ub[k],
2550 v->swath_width_chroma_ub[k],
2551 v->DETBufferSizeInKByte[0],
2552 v->SwathHeightY[k],
2553 v->SwathHeightC[k],
2554 v->HTotal[k] / v->PixelClock[k],
2555 v->UrgentLatency,
2556 v->CursorBufferSize,
2557 v->CursorWidth[k][0],
2558 v->CursorBPP[k][0],
2559 v->VRatio[k],
2560 v->VRatioChroma[k],
2561 v->BytePerPixelDETY[k],
2562 v->BytePerPixelDETC[k],
2563 v->DETBufferSizeY[k],
2564 v->DETBufferSizeC[k],
2565 &v->UrgentBurstFactorCursor[k],
2566 &v->UrgentBurstFactorLuma[k],
2567 &v->UrgentBurstFactorChroma[k],
2568 &v->NoUrgentLatencyHiding[k]);
2569
2570 CalculateUrgentBurstFactor(
2571 v->swath_width_luma_ub[k],
2572 v->swath_width_chroma_ub[k],
2573 v->DETBufferSizeInKByte[0],
2574 v->SwathHeightY[k],
2575 v->SwathHeightC[k],
2576 v->HTotal[k] / v->PixelClock[k],
2577 v->UrgentLatency,
2578 v->CursorBufferSize,
2579 v->CursorWidth[k][0],
2580 v->CursorBPP[k][0],
2581 v->VRatioPrefetchY[k],
2582 v->VRatioPrefetchC[k],
2583 v->BytePerPixelDETY[k],
2584 v->BytePerPixelDETC[k],
2585 v->DETBufferSizeY[k],
2586 v->DETBufferSizeC[k],
2587 &v->UrgentBurstFactorCursorPre[k],
2588 &v->UrgentBurstFactorLumaPre[k],
2589 &v->UrgentBurstFactorChromaPre[k],
2590 &v->NoUrgentLatencyHidingPre[k]);
2591
2592 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2593 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2594 v->ReadBandwidthPlaneLuma[k] *
2595 v->UrgentBurstFactorLuma[k] +
2596 v->ReadBandwidthPlaneChroma[k] *
2597 v->UrgentBurstFactorChroma[k] +
2598 v->cursor_bw[k] *
2599 v->UrgentBurstFactorCursor[k] +
2600 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2601 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2602 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] *
2603 v->UrgentBurstFactorCursorPre[k]);
2604
2605 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2606 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2607 v->ReadBandwidthPlaneLuma[k] +
2608 v->ReadBandwidthPlaneChroma[k] +
2609 v->cursor_bw[k] +
2610 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2611 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2612
2613 if (v->DestinationLinesForPrefetch[k] < 2)
2614 DestinationLineTimesForPrefetchLessThan2 = true;
2615 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2616 VRatioPrefetchMoreThan4 = true;
2617 if (v->NoUrgentLatencyHiding[k] == true)
2618 v->NotEnoughUrgentLatencyHiding[0][0] = true;
2619
2620 if (v->NoUrgentLatencyHidingPre[k] == true)
2621 v->NotEnoughUrgentLatencyHidingPre = true;
2622 }
2623 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2624
2625
2626 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding[0][0] == 0
2627 && v->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2628 && !DestinationLineTimesForPrefetchLessThan2)
2629 v->PrefetchModeSupported = true;
2630 else {
2631 v->PrefetchModeSupported = false;
2632 dml_print("DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2633 dml_print("DML: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", MaxTotalRDBandwidth, v->ReturnBW);
2634 dml_print("DML: VRatioPrefetch %s more than 4\n", (VRatioPrefetchMoreThan4) ? "is" : "is not");
2635 dml_print("DML: DestinationLines for Prefetch %s less than 2\n", (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2636 }
2637
2638 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2639 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2640 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2641 v->BandwidthAvailableForImmediateFlip =
2642 v->BandwidthAvailableForImmediateFlip
2643 - dml_max(
2644 v->ReadBandwidthPlaneLuma[k] * v->UrgentBurstFactorLuma[k]
2645 + v->ReadBandwidthPlaneChroma[k] * v->UrgentBurstFactorChroma[k]
2646 + v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2647 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2648 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2649 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2650 }
2651
2652 v->TotImmediateFlipBytes = 0;
2653 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2654 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2655 }
2656 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2657 CalculateFlipSchedule(
2658 mode_lib,
2659 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2660 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2661 v->UrgentExtraLatency,
2662 v->UrgentLatency,
2663 v->GPUVMMaxPageTableLevels,
2664 v->HostVMEnable,
2665 v->HostVMMaxNonCachedPageTableLevels,
2666 v->GPUVMEnable,
2667 v->HostVMMinPageSize,
2668 v->PDEAndMetaPTEBytesFrame[k],
2669 v->MetaRowByte[k],
2670 v->PixelPTEBytesPerRow[k],
2671 v->BandwidthAvailableForImmediateFlip,
2672 v->TotImmediateFlipBytes,
2673 v->SourcePixelFormat[k],
2674 v->HTotal[k] / v->PixelClock[k],
2675 v->VRatio[k],
2676 v->VRatioChroma[k],
2677 v->Tno_bw[k],
2678 v->DCCEnable[k],
2679 v->dpte_row_height[k],
2680 v->meta_row_height[k],
2681 v->dpte_row_height_chroma[k],
2682 v->meta_row_height_chroma[k],
2683 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2684 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2685 &v->final_flip_bw[k],
2686 &v->ImmediateFlipSupportedForPipe[k]);
2687 }
2688 v->total_dcn_read_bw_with_flip = 0.0;
2689 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2690 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2691 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + dml_max3(
2692 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2693 v->DPPPerPlane[k] * v->final_flip_bw[k] +
2694 v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] +
2695 v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k] +
2696 v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2697 v->DPPPerPlane[k] * (v->final_flip_bw[k] +
2698 v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2699 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2700 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2701 v->total_dcn_read_bw_with_flip_no_urgent_burst =
2702 v->total_dcn_read_bw_with_flip_no_urgent_burst +
2703 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2704 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2705 v->DPPPerPlane[k] * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2706
2707 }
2708 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2709
2710 v->ImmediateFlipSupported = true;
2711 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2712 v->ImmediateFlipSupported = false;
2713 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2714 }
2715 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2716 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2717 v->ImmediateFlipSupported = false;
2718 }
2719 }
2720 } else {
2721 v->ImmediateFlipSupported = false;
2722 }
2723
2724 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2725 if (v->ErrorResult[k] || v->NotEnoughTimeForDynamicMetadata[k]) {
2726 v->PrefetchModeSupported = false;
2727 dml_print("DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2728 }
2729 }
2730
2731 v->VStartupLines = v->VStartupLines + 1;
2732 v->PrefetchModeSupported = (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport &&
2733 !v->HostVMEnable && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2734 v->ImmediateFlipSupported)) ? true : false;
2735 } while (!v->PrefetchModeSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2736 ASSERT(v->PrefetchModeSupported);
2737
2738 //Watermarks and NB P-State/DRAM Clock Change Support
2739 {
2740 enum clock_change_support DRAMClockChangeSupport = 0; // dummy
2741 CalculateWatermarksAndDRAMSpeedChangeSupport(
2742 mode_lib,
2743 PrefetchMode,
2744 v->NumberOfActivePlanes,
2745 v->MaxLineBufferLines,
2746 v->LineBufferSize,
2747 v->DPPOutputBufferPixels,
2748 v->DETBufferSizeInKByte[0],
2749 v->WritebackInterfaceBufferSize,
2750 v->DCFCLK,
2751 v->ReturnBW,
2752 v->GPUVMEnable,
2753 v->dpte_group_bytes,
2754 v->MetaChunkSize,
2755 v->UrgentLatency,
2756 v->UrgentExtraLatency,
2757 v->WritebackLatency,
2758 v->WritebackChunkSize,
2759 v->SOCCLK,
2760 v->FinalDRAMClockChangeLatency,
2761 v->SRExitTime,
2762 v->SREnterPlusExitTime,
2763 v->DCFCLKDeepSleep,
2764 v->DPPPerPlane,
2765 v->DCCEnable,
2766 v->DPPCLK,
2767 v->DETBufferSizeY,
2768 v->DETBufferSizeC,
2769 v->SwathHeightY,
2770 v->SwathHeightC,
2771 v->LBBitPerPixel,
2772 v->SwathWidthY,
2773 v->SwathWidthC,
2774 v->HRatio,
2775 v->HRatioChroma,
2776 v->vtaps,
2777 v->VTAPsChroma,
2778 v->VRatio,
2779 v->VRatioChroma,
2780 v->HTotal,
2781 v->PixelClock,
2782 v->BlendingAndTiming,
2783 v->BytePerPixelDETY,
2784 v->BytePerPixelDETC,
2785 v->DSTXAfterScaler,
2786 v->DSTYAfterScaler,
2787 v->WritebackEnable,
2788 v->WritebackPixelFormat,
2789 v->WritebackDestinationWidth,
2790 v->WritebackDestinationHeight,
2791 v->WritebackSourceHeight,
2792 &DRAMClockChangeSupport,
2793 &v->UrgentWatermark,
2794 &v->WritebackUrgentWatermark,
2795 &v->DRAMClockChangeWatermark,
2796 &v->WritebackDRAMClockChangeWatermark,
2797 &v->StutterExitWatermark,
2798 &v->StutterEnterPlusExitWatermark,
2799 &v->MinActiveDRAMClockChangeLatencySupported);
2800
2801 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2802 if (v->WritebackEnable[k] == true) {
2803 if (v->BlendingAndTiming[k] == k) {
2804 v->ThisVStartup = v->VStartup[k];
2805 } else {
2806 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2807 if (v->BlendingAndTiming[k] == j) {
2808 v->ThisVStartup = v->VStartup[j];
2809 }
2810 }
2811 }
2812 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0,
2813 v->ThisVStartup * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2814 } else {
2815 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2816 }
2817 }
2818
2819 }
2820
2821
2822 //Display Pipeline Delivery Time in Prefetch, Groups
2823 CalculatePixelDeliveryTimes(
2824 v->NumberOfActivePlanes,
2825 v->VRatio,
2826 v->VRatioChroma,
2827 v->VRatioPrefetchY,
2828 v->VRatioPrefetchC,
2829 v->swath_width_luma_ub,
2830 v->swath_width_chroma_ub,
2831 v->DPPPerPlane,
2832 v->HRatio,
2833 v->HRatioChroma,
2834 v->PixelClock,
2835 v->PSCL_THROUGHPUT_LUMA,
2836 v->PSCL_THROUGHPUT_CHROMA,
2837 v->DPPCLK,
2838 v->BytePerPixelC,
2839 v->SourceScan,
2840 v->NumberOfCursors,
2841 v->CursorWidth,
2842 v->CursorBPP,
2843 v->BlockWidth256BytesY,
2844 v->BlockHeight256BytesY,
2845 v->BlockWidth256BytesC,
2846 v->BlockHeight256BytesC,
2847 v->DisplayPipeLineDeliveryTimeLuma,
2848 v->DisplayPipeLineDeliveryTimeChroma,
2849 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
2850 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
2851 v->DisplayPipeRequestDeliveryTimeLuma,
2852 v->DisplayPipeRequestDeliveryTimeChroma,
2853 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2854 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
2855 v->CursorRequestDeliveryTime,
2856 v->CursorRequestDeliveryTimePrefetch);
2857
2858 CalculateMetaAndPTETimes(
2859 v->NumberOfActivePlanes,
2860 v->GPUVMEnable,
2861 v->MetaChunkSize,
2862 v->MinMetaChunkSizeBytes,
2863 v->HTotal,
2864 v->VRatio,
2865 v->VRatioChroma,
2866 v->DestinationLinesToRequestRowInVBlank,
2867 v->DestinationLinesToRequestRowInImmediateFlip,
2868 v->DCCEnable,
2869 v->PixelClock,
2870 v->BytePerPixelY,
2871 v->BytePerPixelC,
2872 v->SourceScan,
2873 v->dpte_row_height,
2874 v->dpte_row_height_chroma,
2875 v->meta_row_width,
2876 v->meta_row_width_chroma,
2877 v->meta_row_height,
2878 v->meta_row_height_chroma,
2879 v->meta_req_width,
2880 v->meta_req_width_chroma,
2881 v->meta_req_height,
2882 v->meta_req_height_chroma,
2883 v->dpte_group_bytes,
2884 v->PTERequestSizeY,
2885 v->PTERequestSizeC,
2886 v->PixelPTEReqWidthY,
2887 v->PixelPTEReqHeightY,
2888 v->PixelPTEReqWidthC,
2889 v->PixelPTEReqHeightC,
2890 v->dpte_row_width_luma_ub,
2891 v->dpte_row_width_chroma_ub,
2892 v->DST_Y_PER_PTE_ROW_NOM_L,
2893 v->DST_Y_PER_PTE_ROW_NOM_C,
2894 v->DST_Y_PER_META_ROW_NOM_L,
2895 v->DST_Y_PER_META_ROW_NOM_C,
2896 v->TimePerMetaChunkNominal,
2897 v->TimePerChromaMetaChunkNominal,
2898 v->TimePerMetaChunkVBlank,
2899 v->TimePerChromaMetaChunkVBlank,
2900 v->TimePerMetaChunkFlip,
2901 v->TimePerChromaMetaChunkFlip,
2902 v->time_per_pte_group_nom_luma,
2903 v->time_per_pte_group_vblank_luma,
2904 v->time_per_pte_group_flip_luma,
2905 v->time_per_pte_group_nom_chroma,
2906 v->time_per_pte_group_vblank_chroma,
2907 v->time_per_pte_group_flip_chroma);
2908
2909 CalculateVMGroupAndRequestTimes(
2910 v->NumberOfActivePlanes,
2911 v->GPUVMEnable,
2912 v->GPUVMMaxPageTableLevels,
2913 v->HTotal,
2914 v->BytePerPixelC,
2915 v->DestinationLinesToRequestVMInVBlank,
2916 v->DestinationLinesToRequestVMInImmediateFlip,
2917 v->DCCEnable,
2918 v->PixelClock,
2919 v->dpte_row_width_luma_ub,
2920 v->dpte_row_width_chroma_ub,
2921 v->vm_group_bytes,
2922 v->dpde0_bytes_per_frame_ub_l,
2923 v->dpde0_bytes_per_frame_ub_c,
2924 v->meta_pte_bytes_per_frame_ub_l,
2925 v->meta_pte_bytes_per_frame_ub_c,
2926 v->TimePerVMGroupVBlank,
2927 v->TimePerVMGroupFlip,
2928 v->TimePerVMRequestVBlank,
2929 v->TimePerVMRequestFlip);
2930
2931
2932 // Min TTUVBlank
2933 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2934 if (PrefetchMode == 0) {
2935 v->AllowDRAMClockChangeDuringVBlank[k] = true;
2936 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2937 v->MinTTUVBlank[k] = dml_max(
2938 v->DRAMClockChangeWatermark,
2939 dml_max(
2940 v->StutterEnterPlusExitWatermark,
2941 v->UrgentWatermark));
2942 } else if (PrefetchMode == 1) {
2943 v->AllowDRAMClockChangeDuringVBlank[k] = false;
2944 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2945 v->MinTTUVBlank[k] = dml_max(
2946 v->StutterEnterPlusExitWatermark,
2947 v->UrgentWatermark);
2948 } else {
2949 v->AllowDRAMClockChangeDuringVBlank[k] = false;
2950 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
2951 v->MinTTUVBlank[k] = v->UrgentWatermark;
2952 }
2953 if (!v->DynamicMetadataEnable[k])
2954 v->MinTTUVBlank[k] = v->TCalc
2955 + v->MinTTUVBlank[k];
2956 }
2957
2958 // DCC Configuration
2959 v->ActiveDPPs = 0;
2960 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2961 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
2962 v->SourcePixelFormat[k],
2963 v->SurfaceWidthY[k],
2964 v->SurfaceWidthC[k],
2965 v->SurfaceHeightY[k],
2966 v->SurfaceHeightC[k],
2967 v->DETBufferSizeInKByte[0] * 1024,
2968 v->BlockHeight256BytesY[k],
2969 v->BlockHeight256BytesC[k],
2970 v->SurfaceTiling[k],
2971 v->BytePerPixelY[k],
2972 v->BytePerPixelC[k],
2973 v->BytePerPixelDETY[k],
2974 v->BytePerPixelDETC[k],
2975 v->SourceScan[k],
2976 &v->DCCYMaxUncompressedBlock[k],
2977 &v->DCCCMaxUncompressedBlock[k],
2978 &v->DCCYMaxCompressedBlock[k],
2979 &v->DCCCMaxCompressedBlock[k],
2980 &v->DCCYIndependentBlock[k],
2981 &v->DCCCIndependentBlock[k]);
2982 }
2983
2984 {
2985 //Maximum Bandwidth Used
2986 v->TotalDataReadBandwidth = 0;
2987 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2988 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth
2989 + v->ReadBandwidthPlaneLuma[k]
2990 + v->ReadBandwidthPlaneChroma[k];
2991 }
2992 }
2993
2994 // VStartup Margin
2995 v->VStartupMargin = 0;
2996 v->FirstMainPlane = true;
2997 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2998 if (v->BlendingAndTiming[k] == k) {
2999 double margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k]
3000 / v->PixelClock[k];
3001 if (v->FirstMainPlane == true) {
3002 v->VStartupMargin = margin;
3003 v->FirstMainPlane = false;
3004 } else {
3005 v->VStartupMargin = dml_min(v->VStartupMargin, margin);
3006 }
3007 }
3008 }
3009
3010 // Stutter Efficiency
3011 CalculateStutterEfficiency(
3012 v->NumberOfActivePlanes,
3013 v->ROBBufferSizeInKByte,
3014 v->TotalDataReadBandwidth,
3015 v->DCFCLK,
3016 v->ReturnBW,
3017 v->SRExitTime,
3018 v->SynchronizedVBlank,
3019 v->DPPPerPlane,
3020 v->DETBufferSizeY,
3021 v->BytePerPixelY,
3022 v->BytePerPixelDETY,
3023 v->SwathWidthY,
3024 v->SwathHeightY,
3025 v->SwathHeightC,
3026 v->DCCRateLuma,
3027 v->DCCRateChroma,
3028 v->HTotal,
3029 v->VTotal,
3030 v->PixelClock,
3031 v->VRatio,
3032 v->SourceScan,
3033 v->BlockHeight256BytesY,
3034 v->BlockWidth256BytesY,
3035 v->BlockHeight256BytesC,
3036 v->BlockWidth256BytesC,
3037 v->DCCYMaxUncompressedBlock,
3038 v->DCCCMaxUncompressedBlock,
3039 v->VActive,
3040 v->DCCEnable,
3041 v->WritebackEnable,
3042 v->ReadBandwidthPlaneLuma,
3043 v->ReadBandwidthPlaneChroma,
3044 v->meta_row_bw,
3045 v->dpte_row_bw,
3046 &v->StutterEfficiencyNotIncludingVBlank,
3047 &v->StutterEfficiency,
3048 &v->StutterPeriod);
3049 }
3050
DisplayPipeConfiguration(struct display_mode_lib * mode_lib)3051 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3052 {
3053 // Display Pipe Configuration
3054 double BytePerPixDETY[DC__NUM_DPP__MAX] = { 0 };
3055 double BytePerPixDETC[DC__NUM_DPP__MAX] = { 0 };
3056 int BytePerPixY[DC__NUM_DPP__MAX] = { 0 };
3057 int BytePerPixC[DC__NUM_DPP__MAX] = { 0 };
3058 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX] = { 0 };
3059 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX] = { 0 };
3060 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX] = { 0 };
3061 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX] = { 0 };
3062 double dummy1[DC__NUM_DPP__MAX] = { 0 };
3063 double dummy2[DC__NUM_DPP__MAX] = { 0 };
3064 double dummy3[DC__NUM_DPP__MAX] = { 0 };
3065 double dummy4[DC__NUM_DPP__MAX] = { 0 };
3066 int dummy5[DC__NUM_DPP__MAX] = { 0 };
3067 int dummy6[DC__NUM_DPP__MAX] = { 0 };
3068 bool dummy7[DC__NUM_DPP__MAX] = { 0 };
3069 bool dummysinglestring = 0;
3070 unsigned int k;
3071
3072 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
3073
3074 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3075 mode_lib->vba.SourcePixelFormat[k],
3076 mode_lib->vba.SurfaceTiling[k],
3077 &BytePerPixY[k],
3078 &BytePerPixC[k],
3079 &BytePerPixDETY[k],
3080 &BytePerPixDETC[k],
3081 &Read256BytesBlockHeightY[k],
3082 &Read256BytesBlockHeightC[k],
3083 &Read256BytesBlockWidthY[k],
3084 &Read256BytesBlockWidthC[k]);
3085 }
3086 CalculateSwathAndDETConfiguration(
3087 false,
3088 mode_lib->vba.NumberOfActivePlanes,
3089 mode_lib->vba.DETBufferSizeInKByte[0],
3090 dummy1,
3091 dummy2,
3092 mode_lib->vba.SourceScan,
3093 mode_lib->vba.SourcePixelFormat,
3094 mode_lib->vba.SurfaceTiling,
3095 mode_lib->vba.ViewportWidth,
3096 mode_lib->vba.ViewportHeight,
3097 mode_lib->vba.SurfaceWidthY,
3098 mode_lib->vba.SurfaceWidthC,
3099 mode_lib->vba.SurfaceHeightY,
3100 mode_lib->vba.SurfaceHeightC,
3101 Read256BytesBlockHeightY,
3102 Read256BytesBlockHeightC,
3103 Read256BytesBlockWidthY,
3104 Read256BytesBlockWidthC,
3105 mode_lib->vba.ODMCombineEnabled,
3106 mode_lib->vba.BlendingAndTiming,
3107 BytePerPixY,
3108 BytePerPixC,
3109 BytePerPixDETY,
3110 BytePerPixDETC,
3111 mode_lib->vba.HActive,
3112 mode_lib->vba.HRatio,
3113 mode_lib->vba.HRatioChroma,
3114 mode_lib->vba.DPPPerPlane,
3115 dummy5,
3116 dummy6,
3117 dummy3,
3118 dummy4,
3119 mode_lib->vba.SwathHeightY,
3120 mode_lib->vba.SwathHeightC,
3121 mode_lib->vba.DETBufferSizeY,
3122 mode_lib->vba.DETBufferSizeC,
3123 dummy7,
3124 &dummysinglestring);
3125 }
3126
dml30_CalculateBytePerPixelAnd256BBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC)3127 void dml30_CalculateBytePerPixelAnd256BBlockSizes(
3128 enum source_format_class SourcePixelFormat,
3129 enum dm_swizzle_mode SurfaceTiling,
3130 unsigned int *BytePerPixelY,
3131 unsigned int *BytePerPixelC,
3132 double *BytePerPixelDETY,
3133 double *BytePerPixelDETC,
3134 unsigned int *BlockHeight256BytesY,
3135 unsigned int *BlockHeight256BytesC,
3136 unsigned int *BlockWidth256BytesY,
3137 unsigned int *BlockWidth256BytesC)
3138 {
3139 if (SourcePixelFormat == dm_444_64) {
3140 *BytePerPixelDETY = 8;
3141 *BytePerPixelDETC = 0;
3142 *BytePerPixelY = 8;
3143 *BytePerPixelC = 0;
3144 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3145 *BytePerPixelDETY = 4;
3146 *BytePerPixelDETC = 0;
3147 *BytePerPixelY = 4;
3148 *BytePerPixelC = 0;
3149 } else if (SourcePixelFormat == dm_444_16) {
3150 *BytePerPixelDETY = 2;
3151 *BytePerPixelDETC = 0;
3152 *BytePerPixelY = 2;
3153 *BytePerPixelC = 0;
3154 } else if (SourcePixelFormat == dm_444_8) {
3155 *BytePerPixelDETY = 1;
3156 *BytePerPixelDETC = 0;
3157 *BytePerPixelY = 1;
3158 *BytePerPixelC = 0;
3159 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3160 *BytePerPixelDETY = 4;
3161 *BytePerPixelDETC = 1;
3162 *BytePerPixelY = 4;
3163 *BytePerPixelC = 1;
3164 } else if (SourcePixelFormat == dm_420_8) {
3165 *BytePerPixelDETY = 1;
3166 *BytePerPixelDETC = 2;
3167 *BytePerPixelY = 1;
3168 *BytePerPixelC = 2;
3169 } else if (SourcePixelFormat == dm_420_12) {
3170 *BytePerPixelDETY = 2;
3171 *BytePerPixelDETC = 4;
3172 *BytePerPixelY = 2;
3173 *BytePerPixelC = 4;
3174 } else {
3175 *BytePerPixelDETY = 4.0 / 3;
3176 *BytePerPixelDETC = 8.0 / 3;
3177 *BytePerPixelY = 2;
3178 *BytePerPixelC = 4;
3179 }
3180
3181 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
3182 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8
3183 || SourcePixelFormat == dm_mono_16 || SourcePixelFormat == dm_mono_8
3184 || SourcePixelFormat == dm_rgbe)) {
3185 if (SurfaceTiling == dm_sw_linear) {
3186 *BlockHeight256BytesY = 1;
3187 } else if (SourcePixelFormat == dm_444_64) {
3188 *BlockHeight256BytesY = 4;
3189 } else if (SourcePixelFormat == dm_444_8) {
3190 *BlockHeight256BytesY = 16;
3191 } else {
3192 *BlockHeight256BytesY = 8;
3193 }
3194 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3195 *BlockHeight256BytesC = 0;
3196 *BlockWidth256BytesC = 0;
3197 } else {
3198 if (SurfaceTiling == dm_sw_linear) {
3199 *BlockHeight256BytesY = 1;
3200 *BlockHeight256BytesC = 1;
3201 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3202 *BlockHeight256BytesY = 8;
3203 *BlockHeight256BytesC = 16;
3204 } else if (SourcePixelFormat == dm_420_8) {
3205 *BlockHeight256BytesY = 16;
3206 *BlockHeight256BytesC = 8;
3207 } else {
3208 *BlockHeight256BytesY = 8;
3209 *BlockHeight256BytesC = 8;
3210 }
3211 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3212 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3213 }
3214 }
3215
CalculateTWait(unsigned int PrefetchMode,double DRAMClockChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3216 static double CalculateTWait(
3217 unsigned int PrefetchMode,
3218 double DRAMClockChangeLatency,
3219 double UrgentLatency,
3220 double SREnterPlusExitTime)
3221 {
3222 if (PrefetchMode == 0) {
3223 return dml_max(DRAMClockChangeLatency + UrgentLatency,
3224 dml_max(SREnterPlusExitTime, UrgentLatency));
3225 } else if (PrefetchMode == 1) {
3226 return dml_max(SREnterPlusExitTime, UrgentLatency);
3227 } else {
3228 return UrgentLatency;
3229 }
3230 }
3231
dml30_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,long WritebackSourceWidth,long WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize)3232 double dml30_CalculateWriteBackDISPCLK(
3233 enum source_format_class WritebackPixelFormat,
3234 double PixelClock,
3235 double WritebackHRatio,
3236 double WritebackVRatio,
3237 unsigned int WritebackHTaps,
3238 unsigned int WritebackVTaps,
3239 long WritebackSourceWidth,
3240 long WritebackDestinationWidth,
3241 unsigned int HTotal,
3242 unsigned int WritebackLineBufferSize)
3243 {
3244 double DISPCLK_H = 0, DISPCLK_V = 0, DISPCLK_HB = 0;
3245
3246 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3247 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3248 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3249 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3250 }
3251
CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,long WritebackDestinationWidth,long WritebackDestinationHeight,long WritebackSourceHeight,unsigned int HTotal)3252 static double CalculateWriteBackDelay(
3253 enum source_format_class WritebackPixelFormat,
3254 double WritebackHRatio,
3255 double WritebackVRatio,
3256 unsigned int WritebackVTaps,
3257 long WritebackDestinationWidth,
3258 long WritebackDestinationHeight,
3259 long WritebackSourceHeight,
3260 unsigned int HTotal)
3261 {
3262 double CalculateWriteBackDelay = 0;
3263 double Line_length = 0;
3264 double Output_lines_last_notclamped = 0;
3265 double WritebackVInit = 0;
3266
3267 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3268 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3269 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3270 if (Output_lines_last_notclamped < 0) {
3271 CalculateWriteBackDelay = 0;
3272 } else {
3273 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3274 }
3275 return CalculateWriteBackDelay;
3276 }
3277
3278
CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters,double DPPCLK,double DISPCLK,double DCFClkDeepSleep,double PixelClock,long HTotal,long VBlank,long DynamicMetadataTransmittedBytes,long DynamicMetadataLinesBeforeActiveRequired,int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * Tsetup,double * Tdmbf,double * Tdmec,double * Tdmsks)3279 static void CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK,
3280 double DCFClkDeepSleep, double PixelClock, long HTotal, long VBlank, long DynamicMetadataTransmittedBytes,
3281 long DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
3282 double *Tsetup, double *Tdmbf, double *Tdmec, double *Tdmsks)
3283 {
3284 double TotalRepeaterDelayTime = 0;
3285 double VUpdateWidthPix = 0;
3286 double VReadyOffsetPix = 0;
3287 double VUpdateOffsetPix = 0;
3288 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3289 VUpdateWidthPix = (14 / DCFClkDeepSleep + 12 / DPPCLK + TotalRepeaterDelayTime) * PixelClock;
3290 VReadyOffsetPix = dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20 / DCFClkDeepSleep + 10 / DPPCLK) * PixelClock;
3291 VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3292 *Tsetup = (VUpdateOffsetPix + VUpdateWidthPix + VReadyOffsetPix) / PixelClock;
3293 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3294 *Tdmec = HTotal / PixelClock;
3295 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3296 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3297 } else {
3298 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3299 }
3300 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3301 *Tdmsks = *Tdmsks / 2;
3302 }
3303 }
3304
CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)3305 static void CalculateRowBandwidth(
3306 bool GPUVMEnable,
3307 enum source_format_class SourcePixelFormat,
3308 double VRatio,
3309 double VRatioChroma,
3310 bool DCCEnable,
3311 double LineTime,
3312 unsigned int MetaRowByteLuma,
3313 unsigned int MetaRowByteChroma,
3314 unsigned int meta_row_height_luma,
3315 unsigned int meta_row_height_chroma,
3316 unsigned int PixelPTEBytesPerRowLuma,
3317 unsigned int PixelPTEBytesPerRowChroma,
3318 unsigned int dpte_row_height_luma,
3319 unsigned int dpte_row_height_chroma,
3320 double *meta_row_bw,
3321 double *dpte_row_bw)
3322 {
3323 if (DCCEnable != true) {
3324 *meta_row_bw = 0;
3325 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3326 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3327 + VRatioChroma * MetaRowByteChroma
3328 / (meta_row_height_chroma * LineTime);
3329 } else {
3330 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3331 }
3332
3333 if (GPUVMEnable != true) {
3334 *dpte_row_bw = 0;
3335 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3336 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3337 + VRatioChroma * PixelPTEBytesPerRowChroma
3338 / (dpte_row_height_chroma * LineTime);
3339 } else {
3340 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3341 }
3342 }
3343
CalculateFlipSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)3344 static void CalculateFlipSchedule(
3345 struct display_mode_lib *mode_lib,
3346 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3347 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3348 double UrgentExtraLatency,
3349 double UrgentLatency,
3350 unsigned int GPUVMMaxPageTableLevels,
3351 bool HostVMEnable,
3352 unsigned int HostVMMaxNonCachedPageTableLevels,
3353 bool GPUVMEnable,
3354 double HostVMMinPageSize,
3355 double PDEAndMetaPTEBytesPerFrame,
3356 double MetaRowBytes,
3357 double DPTEBytesPerRow,
3358 double BandwidthAvailableForImmediateFlip,
3359 unsigned int TotImmediateFlipBytes,
3360 enum source_format_class SourcePixelFormat,
3361 double LineTime,
3362 double VRatio,
3363 double VRatioChroma,
3364 double Tno_bw,
3365 bool DCCEnable,
3366 unsigned int dpte_row_height,
3367 unsigned int meta_row_height,
3368 unsigned int dpte_row_height_chroma,
3369 unsigned int meta_row_height_chroma,
3370 double *DestinationLinesToRequestVMInImmediateFlip,
3371 double *DestinationLinesToRequestRowInImmediateFlip,
3372 double *final_flip_bw,
3373 bool *ImmediateFlipSupportedForPipe)
3374 {
3375 double min_row_time = 0.0;
3376 unsigned int HostVMDynamicLevelsTrips = 0;
3377 double TimeForFetchingMetaPTEImmediateFlip = 0;
3378 double TimeForFetchingRowInVBlankImmediateFlip = 0;
3379 double ImmediateFlipBW = 0;
3380 double HostVMInefficiencyFactor = 0;
3381
3382 if (GPUVMEnable == true && HostVMEnable == true) {
3383 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3384 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3385 } else {
3386 HostVMInefficiencyFactor = 1;
3387 HostVMDynamicLevelsTrips = 0;
3388 }
3389
3390 if (GPUVMEnable == true || DCCEnable == true) {
3391 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3392 }
3393
3394 if (GPUVMEnable == true) {
3395 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3396 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), LineTime / 4.0);
3397 } else {
3398 TimeForFetchingMetaPTEImmediateFlip = 0;
3399 }
3400
3401 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3402 if ((GPUVMEnable == true || DCCEnable == true)) {
3403 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3404 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4);
3405 } else {
3406 TimeForFetchingRowInVBlankImmediateFlip = 0;
3407 }
3408
3409 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3410
3411 if (GPUVMEnable == true) {
3412 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3413 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3414 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3415 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3416 } else {
3417 *final_flip_bw = 0;
3418 }
3419
3420
3421 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3422 if (GPUVMEnable == true && DCCEnable != true) {
3423 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3424 } else if (GPUVMEnable != true && DCCEnable == true) {
3425 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3426 } else {
3427 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio,
3428 dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
3429 }
3430 } else {
3431 if (GPUVMEnable == true && DCCEnable != true) {
3432 min_row_time = dpte_row_height * LineTime / VRatio;
3433 } else if (GPUVMEnable != true && DCCEnable == true) {
3434 min_row_time = meta_row_height * LineTime / VRatio;
3435 } else {
3436 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3437 }
3438 }
3439
3440 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3441 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3442 *ImmediateFlipSupportedForPipe = false;
3443 } else {
3444 *ImmediateFlipSupportedForPipe = true;
3445 }
3446 }
3447
TruncToValidBPP(double LinkBitRate,int Lanes,long HTotal,long HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,int DSCSlices,int AudioRate,int AudioLayout,enum odm_combine_mode ODMCombine)3448 static double TruncToValidBPP(
3449 double LinkBitRate,
3450 int Lanes,
3451 long HTotal,
3452 long HActive,
3453 double PixelClock,
3454 double DesiredBPP,
3455 bool DSCEnable,
3456 enum output_encoder_class Output,
3457 enum output_format_class Format,
3458 unsigned int DSCInputBitPerComponent,
3459 int DSCSlices,
3460 int AudioRate,
3461 int AudioLayout,
3462 enum odm_combine_mode ODMCombine)
3463 {
3464 double MaxLinkBPP = 0;
3465 int MinDSCBPP = 0;
3466 double MaxDSCBPP = 0;
3467 int NonDSCBPP0 = 0;
3468 int NonDSCBPP1 = 0;
3469 int NonDSCBPP2 = 0;
3470
3471 if (Format == dm_420) {
3472 NonDSCBPP0 = 12;
3473 NonDSCBPP1 = 15;
3474 NonDSCBPP2 = 18;
3475 MinDSCBPP = 6;
3476 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3477 } else if (Format == dm_444) {
3478 NonDSCBPP0 = 24;
3479 NonDSCBPP1 = 30;
3480 NonDSCBPP2 = 36;
3481 MinDSCBPP = 8;
3482 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3483 } else {
3484 NonDSCBPP0 = 16;
3485 NonDSCBPP1 = 20;
3486 NonDSCBPP2 = 24;
3487
3488 if (Format == dm_n422) {
3489 MinDSCBPP = 7;
3490 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3491 }
3492 else {
3493 MinDSCBPP = 8;
3494 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3495 }
3496 }
3497
3498 if (DSCEnable && Output == dm_dp) {
3499 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3500 } else {
3501 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3502 }
3503
3504 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3505 MaxLinkBPP = 16;
3506 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3507 MaxLinkBPP = 32;
3508 }
3509
3510
3511 if (DesiredBPP == 0) {
3512 if (DSCEnable) {
3513 if (MaxLinkBPP < MinDSCBPP) {
3514 return BPP_INVALID;
3515 } else if (MaxLinkBPP >= MaxDSCBPP) {
3516 return MaxDSCBPP;
3517 } else {
3518 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3519 }
3520 } else {
3521 if (MaxLinkBPP >= NonDSCBPP2) {
3522 return NonDSCBPP2;
3523 } else if (MaxLinkBPP >= NonDSCBPP1) {
3524 return NonDSCBPP1;
3525 } else if (MaxLinkBPP >= NonDSCBPP0) {
3526 return NonDSCBPP0;
3527 } else {
3528 return BPP_INVALID;
3529 }
3530 }
3531 } else {
3532 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0 || DesiredBPP == 18)) ||
3533 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3534 return BPP_INVALID;
3535 } else {
3536 return DesiredBPP;
3537 }
3538 }
3539 return BPP_INVALID;
3540 }
3541
dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib * mode_lib)3542 void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3543 {
3544 struct vba_vars_st *v = &mode_lib->vba;
3545 int MinPrefetchMode, MaxPrefetchMode;
3546 int i;
3547 unsigned int j, k, m;
3548 bool EnoughWritebackUnits = true;
3549 bool WritebackModeSupport = true;
3550 bool ViewportExceedsSurface = false;
3551 double MaxTotalVActiveRDBandwidth = 0;
3552 long ReorderingBytes = 0;
3553 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX] = { 0 };
3554
3555 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3556
3557 CalculateMinAndMaxPrefetchMode(
3558 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3559 &MinPrefetchMode, &MaxPrefetchMode);
3560
3561 /*Scale Ratio, taps Support Check*/
3562
3563 v->ScaleRatioAndTapsSupport = true;
3564 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3565 if (v->ScalerEnabled[k] == false
3566 && ((v->SourcePixelFormat[k] != dm_444_64
3567 && v->SourcePixelFormat[k] != dm_444_32
3568 && v->SourcePixelFormat[k] != dm_444_16
3569 && v->SourcePixelFormat[k] != dm_mono_16
3570 && v->SourcePixelFormat[k] != dm_mono_8
3571 && v->SourcePixelFormat[k] != dm_rgbe
3572 && v->SourcePixelFormat[k] != dm_rgbe_alpha)
3573 || v->HRatio[k] != 1.0
3574 || v->htaps[k] != 1.0
3575 || v->VRatio[k] != 1.0
3576 || v->vtaps[k] != 1.0)) {
3577 v->ScaleRatioAndTapsSupport = false;
3578 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0
3579 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3580 || (v->htaps[k] > 1.0
3581 && (v->htaps[k] % 2) == 1)
3582 || v->HRatio[k] > v->MaxHSCLRatio
3583 || v->VRatio[k] > v->MaxVSCLRatio
3584 || v->HRatio[k] > v->htaps[k]
3585 || v->VRatio[k] > v->vtaps[k]
3586 || (v->SourcePixelFormat[k] != dm_444_64
3587 && v->SourcePixelFormat[k] != dm_444_32
3588 && v->SourcePixelFormat[k] != dm_444_16
3589 && v->SourcePixelFormat[k] != dm_mono_16
3590 && v->SourcePixelFormat[k] != dm_mono_8
3591 && v->SourcePixelFormat[k] != dm_rgbe
3592 && (v->VTAPsChroma[k] < 1
3593 || v->VTAPsChroma[k] > 8
3594 || v->HTAPsChroma[k] < 1
3595 || v->HTAPsChroma[k] > 8
3596 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3597 || v->HRatioChroma[k] > v->MaxHSCLRatio
3598 || v->VRatioChroma[k] > v->MaxVSCLRatio
3599 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3600 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3601 v->ScaleRatioAndTapsSupport = false;
3602 }
3603 }
3604 /*Source Format, Pixel Format and Scan Support Check*/
3605
3606 v->SourceFormatPixelAndScanSupport = true;
3607 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3608 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3609 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t || v->SurfaceTiling[k] == dm_sw_64kb_d_x)
3610 && !(v->SourcePixelFormat[k] == dm_444_64))) {
3611 v->SourceFormatPixelAndScanSupport = false;
3612 }
3613 }
3614 /*Bandwidth Support Check*/
3615
3616 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3617 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3618 v->SourcePixelFormat[k],
3619 v->SurfaceTiling[k],
3620 &v->BytePerPixelY[k],
3621 &v->BytePerPixelC[k],
3622 &v->BytePerPixelInDETY[k],
3623 &v->BytePerPixelInDETC[k],
3624 &v->Read256BlockHeightY[k],
3625 &v->Read256BlockHeightC[k],
3626 &v->Read256BlockWidthY[k],
3627 &v->Read256BlockWidthC[k]);
3628 }
3629 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3630 if (v->SourceScan[k] != dm_vert) {
3631 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3632 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3633 } else {
3634 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3635 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3636 }
3637 }
3638 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3639 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3640 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3641 }
3642 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3643 if (v->WritebackEnable[k] == true
3644 && v->WritebackPixelFormat[k] == dm_444_64) {
3645 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3646 * v->WritebackDestinationHeight[k]
3647 / (v->WritebackSourceHeight[k]
3648 * v->HTotal[k]
3649 / v->PixelClock[k]) * 8.0;
3650 } else if (v->WritebackEnable[k] == true) {
3651 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3652 * v->WritebackDestinationHeight[k]
3653 / (v->WritebackSourceHeight[k]
3654 * v->HTotal[k]
3655 / v->PixelClock[k]) * 4.0;
3656 } else {
3657 v->WriteBandwidth[k] = 0.0;
3658 }
3659 }
3660
3661 /*Writeback Latency support check*/
3662
3663 v->WritebackLatencySupport = true;
3664 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3665 if (v->WritebackEnable[k] == true) {
3666 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave ||
3667 v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
3668 if (v->WriteBandwidth[k]
3669 > 2.0 * v->WritebackInterfaceBufferSize * 1024
3670 / v->WritebackLatency) {
3671 v->WritebackLatencySupport = false;
3672 }
3673 } else {
3674 if (v->WriteBandwidth[k]
3675 > v->WritebackInterfaceBufferSize * 1024
3676 / v->WritebackLatency) {
3677 v->WritebackLatencySupport = false;
3678 }
3679 }
3680 }
3681 }
3682
3683 /*Writeback Mode Support Check*/
3684
3685 v->TotalNumberOfActiveWriteback = 0;
3686 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3687 if (v->WritebackEnable[k] == true) {
3688 v->TotalNumberOfActiveWriteback =
3689 v->TotalNumberOfActiveWriteback + 1;
3690 }
3691 }
3692
3693 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3694 EnoughWritebackUnits = false;
3695 }
3696 if (!v->WritebackSupportInterleaveAndUsingWholeBufferForASingleStream
3697 && (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave
3698 || v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave)) {
3699
3700 WritebackModeSupport = false;
3701 }
3702 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave && v->TotalNumberOfActiveWriteback > 1) {
3703 WritebackModeSupport = false;
3704 }
3705
3706 /*Writeback Scale Ratio and Taps Support Check*/
3707
3708 v->WritebackScaleRatioAndTapsSupport = true;
3709 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3710 if (v->WritebackEnable[k] == true) {
3711 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio
3712 || v->WritebackVRatio[k]
3713 > v->WritebackMaxVSCLRatio
3714 || v->WritebackHRatio[k]
3715 < v->WritebackMinHSCLRatio
3716 || v->WritebackVRatio[k]
3717 < v->WritebackMinVSCLRatio
3718 || v->WritebackHTaps[k]
3719 > v->WritebackMaxHSCLTaps
3720 || v->WritebackVTaps[k]
3721 > v->WritebackMaxVSCLTaps
3722 || v->WritebackHRatio[k]
3723 > v->WritebackHTaps[k]
3724 || v->WritebackVRatio[k]
3725 > v->WritebackVTaps[k]
3726 || (v->WritebackHTaps[k] > 2.0
3727 && ((v->WritebackHTaps[k] % 2)
3728 == 1))) {
3729 v->WritebackScaleRatioAndTapsSupport = false;
3730 }
3731 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3732 v->WritebackScaleRatioAndTapsSupport = false;
3733 }
3734 }
3735 }
3736 /*Maximum DISPCLK/DPPCLK Support check*/
3737
3738 v->WritebackRequiredDISPCLK = 0.0;
3739 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3740 if (v->WritebackEnable[k] == true) {
3741 v->WritebackRequiredDISPCLK = dml_max(v->WritebackRequiredDISPCLK,
3742 dml30_CalculateWriteBackDISPCLK(
3743 v->WritebackPixelFormat[k],
3744 v->PixelClock[k],
3745 v->WritebackHRatio[k],
3746 v->WritebackVRatio[k],
3747 v->WritebackHTaps[k],
3748 v->WritebackVTaps[k],
3749 v->WritebackSourceWidth[k],
3750 v->WritebackDestinationWidth[k],
3751 v->HTotal[k],
3752 v->WritebackLineBufferSize));
3753 }
3754 }
3755 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3756 if (v->HRatio[k] > 1.0) {
3757 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3758 } else {
3759 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3760 }
3761 if (v->BytePerPixelC[k] == 0.0) {
3762 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3763 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3764 * dml_max3(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 1.0);
3765 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3766 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3767 }
3768 } else {
3769 if (v->HRatioChroma[k] > 1.0) {
3770 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
3771 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3772 } else {
3773 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3774 }
3775 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] * dml_max5(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3776 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3777 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3778 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3779 1.0);
3780 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3781 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3782 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3783 }
3784 }
3785 }
3786 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3787 int MaximumSwathWidthSupportLuma = 0;
3788 int MaximumSwathWidthSupportChroma = 0;
3789
3790 if (v->SurfaceTiling[k] == dm_sw_linear) {
3791 MaximumSwathWidthSupportLuma = 8192.0;
3792 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
3793 MaximumSwathWidthSupportLuma = 2880.0;
3794 } else {
3795 MaximumSwathWidthSupportLuma = 5760.0;
3796 }
3797
3798 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
3799 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
3800 } else {
3801 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
3802 }
3803 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
3804 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
3805 if (v->BytePerPixelC[k] == 0.0) {
3806 v->MaximumSwathWidthInLineBufferChroma = 0;
3807 } else {
3808 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
3809 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
3810 }
3811 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
3812 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
3813 }
3814
3815 CalculateSwathAndDETConfiguration(
3816 true,
3817 v->NumberOfActivePlanes,
3818 v->DETBufferSizeInKByte[0],
3819 v->MaximumSwathWidthLuma,
3820 v->MaximumSwathWidthChroma,
3821 v->SourceScan,
3822 v->SourcePixelFormat,
3823 v->SurfaceTiling,
3824 v->ViewportWidth,
3825 v->ViewportHeight,
3826 v->SurfaceWidthY,
3827 v->SurfaceWidthC,
3828 v->SurfaceHeightY,
3829 v->SurfaceHeightC,
3830 v->Read256BlockHeightY,
3831 v->Read256BlockHeightC,
3832 v->Read256BlockWidthY,
3833 v->Read256BlockWidthC,
3834 v->odm_combine_dummy,
3835 v->BlendingAndTiming,
3836 v->BytePerPixelY,
3837 v->BytePerPixelC,
3838 v->BytePerPixelInDETY,
3839 v->BytePerPixelInDETC,
3840 v->HActive,
3841 v->HRatio,
3842 v->HRatioChroma,
3843 v->DPPPerPlane,
3844 v->swath_width_luma_ub,
3845 v->swath_width_chroma_ub,
3846 v->SwathWidthY,
3847 v->SwathWidthC,
3848 v->SwathHeightY,
3849 v->SwathHeightC,
3850 v->DETBufferSizeY,
3851 v->DETBufferSizeC,
3852 v->SingleDPPViewportSizeSupportPerPlane,
3853 &v->ViewportSizeSupport[0][0]);
3854
3855 for (i = 0; i < v->soc.num_states; i++) {
3856 for (j = 0; j < 2; j++) {
3857 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
3858 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
3859 v->RequiredDISPCLK[i][j] = 0.0;
3860 v->DISPCLK_DPPCLK_Support[i][j] = true;
3861 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3862 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3863 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3864 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3865 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3866 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3867 }
3868 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3869 * (1 + v->DISPCLKRampingMargin / 100.0);
3870 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3871 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3872 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3873 }
3874 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3875 * (1 + v->DISPCLKRampingMargin / 100.0);
3876 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3877 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3878 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3879 }
3880
3881 if (v->ODMCombinePolicy == dm_odm_combine_policy_none) {
3882 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3883 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3884 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
3885 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3886 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3887 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
3888 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
3889 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3890 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3891 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
3892 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3893 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3894 } else {
3895 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3896 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3897 }
3898 if (v->DSCEnabled[k] && v->HActive[k] > DCN30_MAX_DSC_IMAGE_WIDTH
3899 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3900 if (v->HActive[k] / 2 > DCN30_MAX_DSC_IMAGE_WIDTH) {
3901 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3902 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3903 } else {
3904 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3905 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3906 }
3907 }
3908 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN30_MAX_FMT_420_BUFFER_WIDTH
3909 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3910 if (v->HActive[k] / 2 > DCN30_MAX_FMT_420_BUFFER_WIDTH) {
3911 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3912 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3913 } else {
3914 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3915 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3916 }
3917 }
3918 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
3919 v->MPCCombine[i][j][k] = false;
3920 v->NoOfDPP[i][j][k] = 4;
3921 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
3922 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
3923 v->MPCCombine[i][j][k] = false;
3924 v->NoOfDPP[i][j][k] = 2;
3925 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
3926 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
3927 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= v->MaxDppclkRoundedDownToDFSGranularity
3928 && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
3929 v->MPCCombine[i][j][k] = false;
3930 v->NoOfDPP[i][j][k] = 1;
3931 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3932 } else {
3933 v->MPCCombine[i][j][k] = true;
3934 v->NoOfDPP[i][j][k] = 2;
3935 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
3936 }
3937 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
3938 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3939 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
3940 v->DISPCLK_DPPCLK_Support[i][j] = false;
3941 }
3942 }
3943 v->TotalNumberOfActiveDPP[i][j] = 0;
3944 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
3945 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3946 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
3947 if (v->NoOfDPP[i][j][k] == 1)
3948 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
3949 }
3950 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never) {
3951 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
3952 double BWOfNonSplitPlaneOfMaximumBandwidth = 0;
3953 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
3954 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
3955 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
3956 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3957 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
3958 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
3959 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
3960 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
3961 }
3962 }
3963 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
3964 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
3965 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
3966 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
3967 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
3968 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
3969 }
3970 }
3971 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
3972 v->RequiredDISPCLK[i][j] = 0.0;
3973 v->DISPCLK_DPPCLK_Support[i][j] = true;
3974 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3975 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3976 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
3977 v->MPCCombine[i][j][k] = true;
3978 v->NoOfDPP[i][j][k] = 2;
3979 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
3980 } else {
3981 v->MPCCombine[i][j][k] = false;
3982 v->NoOfDPP[i][j][k] = 1;
3983 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3984 }
3985 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
3986 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3987 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3988 } else {
3989 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3990 }
3991 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
3992 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3993 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
3994 v->DISPCLK_DPPCLK_Support[i][j] = false;
3995 }
3996 }
3997 v->TotalNumberOfActiveDPP[i][j] = 0.0;
3998 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3999 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4000 }
4001 }
4002 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4003 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4004 v->DISPCLK_DPPCLK_Support[i][j] = false;
4005 }
4006 }
4007 }
4008
4009 /*Total Available Pipes Support Check*/
4010
4011 for (i = 0; i < v->soc.num_states; i++) {
4012 for (j = 0; j < 2; j++) {
4013 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4014 v->TotalAvailablePipesSupport[i][j] = true;
4015 } else {
4016 v->TotalAvailablePipesSupport[i][j] = false;
4017 }
4018 }
4019 }
4020 /*Display IO and DSC Support Check*/
4021
4022 v->NonsupportedDSCInputBPC = false;
4023 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4024 if (!(v->DSCInputBitPerComponent[k] == 12.0
4025 || v->DSCInputBitPerComponent[k] == 10.0
4026 || v->DSCInputBitPerComponent[k] == 8.0)) {
4027 v->NonsupportedDSCInputBPC = true;
4028 }
4029 }
4030
4031 /*Number Of DSC Slices*/
4032 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4033 if (v->BlendingAndTiming[k] == k) {
4034 if (v->PixelClockBackEnd[k] > 3200) {
4035 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4036 } else if (v->PixelClockBackEnd[k] > 1360) {
4037 v->NumberOfDSCSlices[k] = 8;
4038 } else if (v->PixelClockBackEnd[k] > 680) {
4039 v->NumberOfDSCSlices[k] = 4;
4040 } else if (v->PixelClockBackEnd[k] > 340) {
4041 v->NumberOfDSCSlices[k] = 2;
4042 } else {
4043 v->NumberOfDSCSlices[k] = 1;
4044 }
4045 } else {
4046 v->NumberOfDSCSlices[k] = 0;
4047 }
4048 }
4049
4050 for (i = 0; i < v->soc.num_states; i++) {
4051 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4052 v->RequiresDSC[i][k] = false;
4053 v->RequiresFEC[i][k] = false;
4054 if (v->BlendingAndTiming[k] == k) {
4055 if (v->Output[k] == dm_hdmi) {
4056 v->RequiresDSC[i][k] = false;
4057 v->RequiresFEC[i][k] = false;
4058 v->OutputBppPerState[i][k] = TruncToValidBPP(
4059 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4060 3,
4061 v->HTotal[k],
4062 v->HActive[k],
4063 v->PixelClockBackEnd[k],
4064 v->ForcedOutputLinkBPP[k],
4065 false,
4066 v->Output[k],
4067 v->OutputFormat[k],
4068 v->DSCInputBitPerComponent[k],
4069 v->NumberOfDSCSlices[k],
4070 v->AudioSampleRate[k],
4071 v->AudioSampleLayout[k],
4072 v->ODMCombineEnablePerState[i][k]);
4073 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4074 if (v->DSCEnable[k] == true) {
4075 v->RequiresDSC[i][k] = true;
4076 v->LinkDSCEnable = true;
4077 if (v->Output[k] == dm_dp) {
4078 v->RequiresFEC[i][k] = true;
4079 } else {
4080 v->RequiresFEC[i][k] = false;
4081 }
4082 } else {
4083 v->RequiresDSC[i][k] = false;
4084 v->LinkDSCEnable = false;
4085 v->RequiresFEC[i][k] = false;
4086 }
4087
4088 v->Outbpp = BPP_INVALID;
4089 if (v->PHYCLKPerState[i] >= 270.0) {
4090 v->Outbpp = TruncToValidBPP(
4091 (1.0 - v->Downspreading / 100.0) * 2700,
4092 v->OutputLinkDPLanes[k],
4093 v->HTotal[k],
4094 v->HActive[k],
4095 v->PixelClockBackEnd[k],
4096 v->ForcedOutputLinkBPP[k],
4097 v->LinkDSCEnable,
4098 v->Output[k],
4099 v->OutputFormat[k],
4100 v->DSCInputBitPerComponent[k],
4101 v->NumberOfDSCSlices[k],
4102 v->AudioSampleRate[k],
4103 v->AudioSampleLayout[k],
4104 v->ODMCombineEnablePerState[i][k]);
4105 v->OutputBppPerState[i][k] = v->Outbpp;
4106 // TODO: Need some other way to handle this nonsense
4107 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4108 }
4109 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4110 v->Outbpp = TruncToValidBPP(
4111 (1.0 - v->Downspreading / 100.0) * 5400,
4112 v->OutputLinkDPLanes[k],
4113 v->HTotal[k],
4114 v->HActive[k],
4115 v->PixelClockBackEnd[k],
4116 v->ForcedOutputLinkBPP[k],
4117 v->LinkDSCEnable,
4118 v->Output[k],
4119 v->OutputFormat[k],
4120 v->DSCInputBitPerComponent[k],
4121 v->NumberOfDSCSlices[k],
4122 v->AudioSampleRate[k],
4123 v->AudioSampleLayout[k],
4124 v->ODMCombineEnablePerState[i][k]);
4125 v->OutputBppPerState[i][k] = v->Outbpp;
4126 // TODO: Need some other way to handle this nonsense
4127 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4128 }
4129 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4130 v->Outbpp = TruncToValidBPP(
4131 (1.0 - v->Downspreading / 100.0) * 8100,
4132 v->OutputLinkDPLanes[k],
4133 v->HTotal[k],
4134 v->HActive[k],
4135 v->PixelClockBackEnd[k],
4136 v->ForcedOutputLinkBPP[k],
4137 v->LinkDSCEnable,
4138 v->Output[k],
4139 v->OutputFormat[k],
4140 v->DSCInputBitPerComponent[k],
4141 v->NumberOfDSCSlices[k],
4142 v->AudioSampleRate[k],
4143 v->AudioSampleLayout[k],
4144 v->ODMCombineEnablePerState[i][k]);
4145 if (v->Outbpp == BPP_INVALID && v->ForcedOutputLinkBPP[k] == 0) {
4146 //if (v->Outbpp == BPP_INVALID && v->DSCEnabled[k] == dm_dsc_enable_only_if_necessary && v->ForcedOutputLinkBPP[k] == 0) {
4147 v->RequiresDSC[i][k] = true;
4148 v->LinkDSCEnable = true;
4149 if (v->Output[k] == dm_dp) {
4150 v->RequiresFEC[i][k] = true;
4151 }
4152 v->Outbpp = TruncToValidBPP(
4153 (1.0 - v->Downspreading / 100.0) * 8100,
4154 v->OutputLinkDPLanes[k],
4155 v->HTotal[k],
4156 v->HActive[k],
4157 v->PixelClockBackEnd[k],
4158 v->ForcedOutputLinkBPP[k],
4159 v->LinkDSCEnable,
4160 v->Output[k],
4161 v->OutputFormat[k],
4162 v->DSCInputBitPerComponent[k],
4163 v->NumberOfDSCSlices[k],
4164 v->AudioSampleRate[k],
4165 v->AudioSampleLayout[k],
4166 v->ODMCombineEnablePerState[i][k]);
4167 }
4168 v->OutputBppPerState[i][k] = v->Outbpp;
4169 // TODO: Need some other way to handle this nonsense
4170 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4171 }
4172 }
4173 } else {
4174 v->OutputBppPerState[i][k] = 0;
4175 }
4176 }
4177 }
4178 for (i = 0; i < v->soc.num_states; i++) {
4179 v->DIOSupport[i] = true;
4180 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4181 if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
4182 && (v->OutputBppPerState[i][k] == 0
4183 || (v->OutputFormat[k] == dm_420 && v->Interlace[k] == true && v->ProgressiveToInterlaceUnitInOPP == true))) {
4184 v->DIOSupport[i] = false;
4185 }
4186 }
4187 }
4188
4189 for (i = 0; i < v->soc.num_states; ++i) {
4190 v->ODMCombine4To1SupportCheckOK[i] = true;
4191 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4192 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4193 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)) {
4194 v->ODMCombine4To1SupportCheckOK[i] = false;
4195 }
4196 }
4197 }
4198
4199 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4200
4201 for (i = 0; i < v->soc.num_states; i++) {
4202 v->NotEnoughDSCUnits[i] = false;
4203 v->TotalDSCUnitsRequired = 0.0;
4204 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4205 if (v->RequiresDSC[i][k] == true) {
4206 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4207 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4208 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4209 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4210 } else {
4211 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4212 }
4213 }
4214 }
4215 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4216 v->NotEnoughDSCUnits[i] = true;
4217 }
4218 }
4219 /*DSC Delay per state*/
4220
4221 for (i = 0; i < v->soc.num_states; i++) {
4222 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4223 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4224 v->BPP = 0.0;
4225 } else {
4226 v->BPP = v->OutputBppPerState[i][k];
4227 }
4228 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4229 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4230 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4231 v->DSCInputBitPerComponent[k],
4232 v->BPP,
4233 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4234 v->NumberOfDSCSlices[k],
4235 v->OutputFormat[k],
4236 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4237 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4238 v->DSCDelayPerState[i][k] = 2.0
4239 * dscceComputeDelay(
4240 v->DSCInputBitPerComponent[k],
4241 v->BPP,
4242 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4243 v->NumberOfDSCSlices[k] / 2,
4244 v->OutputFormat[k],
4245 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4246 } else {
4247 v->DSCDelayPerState[i][k] = 4.0
4248 * (dscceComputeDelay(
4249 v->DSCInputBitPerComponent[k],
4250 v->BPP,
4251 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4252 v->NumberOfDSCSlices[k] / 4,
4253 v->OutputFormat[k],
4254 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4255 }
4256 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4257 } else {
4258 v->DSCDelayPerState[i][k] = 0.0;
4259 }
4260 }
4261 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4262 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4263 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4264 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4265 }
4266 }
4267 }
4268 }
4269
4270 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4271 //
4272 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4273 for (j = 0; j <= 1; ++j) {
4274 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4275 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4276 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4277 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4278 }
4279
4280 CalculateSwathAndDETConfiguration(
4281 false,
4282 v->NumberOfActivePlanes,
4283 v->DETBufferSizeInKByte[0],
4284 v->MaximumSwathWidthLuma,
4285 v->MaximumSwathWidthChroma,
4286 v->SourceScan,
4287 v->SourcePixelFormat,
4288 v->SurfaceTiling,
4289 v->ViewportWidth,
4290 v->ViewportHeight,
4291 v->SurfaceWidthY,
4292 v->SurfaceWidthC,
4293 v->SurfaceHeightY,
4294 v->SurfaceHeightC,
4295 v->Read256BlockHeightY,
4296 v->Read256BlockHeightC,
4297 v->Read256BlockWidthY,
4298 v->Read256BlockWidthC,
4299 v->ODMCombineEnableThisState,
4300 v->BlendingAndTiming,
4301 v->BytePerPixelY,
4302 v->BytePerPixelC,
4303 v->BytePerPixelInDETY,
4304 v->BytePerPixelInDETC,
4305 v->HActive,
4306 v->HRatio,
4307 v->HRatioChroma,
4308 v->NoOfDPPThisState,
4309 v->swath_width_luma_ub_this_state,
4310 v->swath_width_chroma_ub_this_state,
4311 v->SwathWidthYThisState,
4312 v->SwathWidthCThisState,
4313 v->SwathHeightYThisState,
4314 v->SwathHeightCThisState,
4315 v->DETBufferSizeYThisState,
4316 v->DETBufferSizeCThisState,
4317 v->dummystring,
4318 &v->ViewportSizeSupport[i][j]);
4319
4320 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4321 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4322 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4323 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4324 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4325 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4326 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4327 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4328 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4329 }
4330
4331 }
4332 }
4333 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4334 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4335 }
4336
4337 for (i = 0; i < v->soc.num_states; i++) {
4338 for (j = 0; j < 2; j++) {
4339 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4340 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4341 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4342 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4343 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4344 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4345 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4346 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4347 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4348 }
4349
4350 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4351 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4352 if (v->DCCEnable[k] == true) {
4353 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4354 }
4355 }
4356
4357 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4358 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
4359 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4360
4361 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
4362 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
4363 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4364 } else {
4365 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4366 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4367 }
4368
4369 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4370 mode_lib,
4371 v->DCCEnable[k],
4372 v->Read256BlockHeightC[k],
4373 v->Read256BlockWidthY[k],
4374 v->SourcePixelFormat[k],
4375 v->SurfaceTiling[k],
4376 v->BytePerPixelC[k],
4377 v->SourceScan[k],
4378 v->SwathWidthCThisState[k],
4379 v->ViewportHeightChroma[k],
4380 v->GPUVMEnable,
4381 v->HostVMEnable,
4382 v->HostVMMaxNonCachedPageTableLevels,
4383 v->GPUVMMinPageSize,
4384 v->HostVMMinPageSize,
4385 v->PTEBufferSizeInRequestsForChroma,
4386 v->PitchC[k],
4387 0.0,
4388 &v->MacroTileWidthC[k],
4389 &v->MetaRowBytesC,
4390 &v->DPTEBytesPerRowC,
4391 &v->PTEBufferSizeNotExceededC[i][j][k],
4392 &v->dummyinteger7,
4393 &v->dpte_row_height_chroma[k],
4394 &v->dummyinteger28,
4395 &v->dummyinteger26,
4396 &v->dummyinteger23,
4397 &v->meta_row_height_chroma[k],
4398 &v->dummyinteger8,
4399 &v->dummyinteger9,
4400 &v->dummyinteger19,
4401 &v->dummyinteger20,
4402 &v->dummyinteger17,
4403 &v->dummyinteger10,
4404 &v->dummyinteger11);
4405
4406 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4407 mode_lib,
4408 v->VRatioChroma[k],
4409 v->VTAPsChroma[k],
4410 v->Interlace[k],
4411 v->ProgressiveToInterlaceUnitInOPP,
4412 v->SwathHeightCThisState[k],
4413 v->ViewportYStartC[k],
4414 &v->PrefillC[k],
4415 &v->MaxNumSwC[k]);
4416 } else {
4417 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4418 v->PTEBufferSizeInRequestsForChroma = 0;
4419 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4420 v->MetaRowBytesC = 0.0;
4421 v->DPTEBytesPerRowC = 0.0;
4422 v->PrefetchLinesC[i][j][k] = 0.0;
4423 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4424 }
4425 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4426 mode_lib,
4427 v->DCCEnable[k],
4428 v->Read256BlockHeightY[k],
4429 v->Read256BlockWidthY[k],
4430 v->SourcePixelFormat[k],
4431 v->SurfaceTiling[k],
4432 v->BytePerPixelY[k],
4433 v->SourceScan[k],
4434 v->SwathWidthYThisState[k],
4435 v->ViewportHeight[k],
4436 v->GPUVMEnable,
4437 v->HostVMEnable,
4438 v->HostVMMaxNonCachedPageTableLevels,
4439 v->GPUVMMinPageSize,
4440 v->HostVMMinPageSize,
4441 v->PTEBufferSizeInRequestsForLuma,
4442 v->PitchY[k],
4443 v->DCCMetaPitchY[k],
4444 &v->MacroTileWidthY[k],
4445 &v->MetaRowBytesY,
4446 &v->DPTEBytesPerRowY,
4447 &v->PTEBufferSizeNotExceededY[i][j][k],
4448 v->dummyinteger4,
4449 &v->dpte_row_height[k],
4450 &v->dummyinteger29,
4451 &v->dummyinteger27,
4452 &v->dummyinteger24,
4453 &v->meta_row_height[k],
4454 &v->dummyinteger25,
4455 &v->dpte_group_bytes[k],
4456 &v->dummyinteger21,
4457 &v->dummyinteger22,
4458 &v->dummyinteger18,
4459 &v->dummyinteger5,
4460 &v->dummyinteger6);
4461 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4462 mode_lib,
4463 v->VRatio[k],
4464 v->vtaps[k],
4465 v->Interlace[k],
4466 v->ProgressiveToInterlaceUnitInOPP,
4467 v->SwathHeightYThisState[k],
4468 v->ViewportYStartY[k],
4469 &v->PrefillY[k],
4470 &v->MaxNumSwY[k]);
4471 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4472 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4473 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4474
4475 CalculateRowBandwidth(
4476 v->GPUVMEnable,
4477 v->SourcePixelFormat[k],
4478 v->VRatio[k],
4479 v->VRatioChroma[k],
4480 v->DCCEnable[k],
4481 v->HTotal[k] / v->PixelClock[k],
4482 v->MetaRowBytesY,
4483 v->MetaRowBytesC,
4484 v->meta_row_height[k],
4485 v->meta_row_height_chroma[k],
4486 v->DPTEBytesPerRowY,
4487 v->DPTEBytesPerRowC,
4488 v->dpte_row_height[k],
4489 v->dpte_row_height_chroma[k],
4490 &v->meta_row_bandwidth[i][j][k],
4491 &v->dpte_row_bandwidth[i][j][k]);
4492 }
4493 v->UrgLatency[i] = CalculateUrgentLatency(
4494 v->UrgentLatencyPixelDataOnly,
4495 v->UrgentLatencyPixelMixedWithVMData,
4496 v->UrgentLatencyVMDataOnly,
4497 v->DoUrgentLatencyAdjustment,
4498 v->UrgentLatencyAdjustmentFabricClockComponent,
4499 v->UrgentLatencyAdjustmentFabricClockReference,
4500 v->FabricClockPerState[i]);
4501
4502 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4503 CalculateUrgentBurstFactor(
4504 v->swath_width_luma_ub_this_state[k],
4505 v->swath_width_chroma_ub_this_state[k],
4506 v->DETBufferSizeInKByte[0],
4507 v->SwathHeightYThisState[k],
4508 v->SwathHeightCThisState[k],
4509 v->HTotal[k] / v->PixelClock[k],
4510 v->UrgLatency[i],
4511 v->CursorBufferSize,
4512 v->CursorWidth[k][0],
4513 v->CursorBPP[k][0],
4514 v->VRatio[k],
4515 v->VRatioChroma[k],
4516 v->BytePerPixelInDETY[k],
4517 v->BytePerPixelInDETC[k],
4518 v->DETBufferSizeYThisState[k],
4519 v->DETBufferSizeCThisState[k],
4520 &v->UrgentBurstFactorCursor[k],
4521 &v->UrgentBurstFactorLuma[k],
4522 &v->UrgentBurstFactorChroma[k],
4523 &NotUrgentLatencyHiding[k]);
4524 }
4525
4526 v->NotUrgentLatencyHiding[i][j] = false;
4527 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4528 if (NotUrgentLatencyHiding[k]) {
4529 v->NotUrgentLatencyHiding[i][j] = true;
4530 }
4531 }
4532
4533 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4534 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4535 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4536 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4537 }
4538
4539 v->TotalVActivePixelBandwidth[i][j] = 0;
4540 v->TotalVActiveCursorBandwidth[i][j] = 0;
4541 v->TotalMetaRowBandwidth[i][j] = 0;
4542 v->TotalDPTERowBandwidth[i][j] = 0;
4543 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4544 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4545 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4546 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4547 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4548 }
4549
4550 CalculateDCFCLKDeepSleep(
4551 mode_lib,
4552 v->NumberOfActivePlanes,
4553 v->BytePerPixelY,
4554 v->BytePerPixelC,
4555 v->VRatio,
4556 v->VRatioChroma,
4557 v->SwathWidthYThisState,
4558 v->SwathWidthCThisState,
4559 v->NoOfDPPThisState,
4560 v->HRatio,
4561 v->HRatioChroma,
4562 v->PixelClock,
4563 v->PSCL_FACTOR,
4564 v->PSCL_FACTOR_CHROMA,
4565 v->RequiredDPPCLKThisState,
4566 v->ReadBandwidthLuma,
4567 v->ReadBandwidthChroma,
4568 v->ReturnBusWidth,
4569 &v->ProjectedDCFCLKDeepSleep[i][j]);
4570 }
4571 }
4572
4573 //Calculate Return BW
4574
4575 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4576 for (j = 0; j <= 1; ++j) {
4577 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4578 if (v->BlendingAndTiming[k] == k) {
4579 if (v->WritebackEnable[k] == true) {
4580 v->WritebackDelayTime[k] = v->WritebackLatency
4581 + CalculateWriteBackDelay(
4582 v->WritebackPixelFormat[k],
4583 v->WritebackHRatio[k],
4584 v->WritebackVRatio[k],
4585 v->WritebackVTaps[k],
4586 v->WritebackDestinationWidth[k],
4587 v->WritebackDestinationHeight[k],
4588 v->WritebackSourceHeight[k],
4589 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4590 } else {
4591 v->WritebackDelayTime[k] = 0.0;
4592 }
4593 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4594 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4595 v->WritebackDelayTime[k] = dml_max(
4596 v->WritebackDelayTime[k],
4597 v->WritebackLatency
4598 + CalculateWriteBackDelay(
4599 v->WritebackPixelFormat[m],
4600 v->WritebackHRatio[m],
4601 v->WritebackVRatio[m],
4602 v->WritebackVTaps[m],
4603 v->WritebackDestinationWidth[m],
4604 v->WritebackDestinationHeight[m],
4605 v->WritebackSourceHeight[m],
4606 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4607 }
4608 }
4609 }
4610 }
4611 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4612 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4613 if (v->BlendingAndTiming[k] == m) {
4614 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4615 }
4616 }
4617 }
4618 v->MaxMaxVStartup[i][j] = 0;
4619 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4620 v->MaximumVStartup[i][j][k] = v->VTotal[k] - v->VActive[k]
4621 - dml_max(1.0, dml_ceil(1.0 * v->WritebackDelayTime[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0));
4622 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
4623 }
4624 }
4625 }
4626
4627 ReorderingBytes = v->NumberOfChannels
4628 * dml_max3(
4629 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4630 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4631 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
4632 v->FinalDRAMClockChangeLatency = (v->DRAMClockChangeLatencyOverride > 0 ? v->DRAMClockChangeLatencyOverride : v->DRAMClockChangeLatency);
4633
4634 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4635 for (j = 0; j <= 1; ++j) {
4636 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
4637 }
4638 }
4639
4640 if (v->UseMinimumRequiredDCFCLK == true) {
4641 UseMinimumDCFCLK(mode_lib, v, MaxPrefetchMode, ReorderingBytes);
4642
4643 if (v->ClampMinDCFCLK) {
4644 /* Clamp calculated values to actual minimum */
4645 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4646 for (j = 0; j <= 1; ++j) {
4647 if (v->DCFCLKState[i][j] < mode_lib->soc.min_dcfclk) {
4648 v->DCFCLKState[i][j] = mode_lib->soc.min_dcfclk;
4649 }
4650 }
4651 }
4652 }
4653 }
4654
4655 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4656 for (j = 0; j <= 1; ++j) {
4657 v->IdealSDPPortBandwidthPerState[i][j] = dml_min3(
4658 v->ReturnBusWidth * v->DCFCLKState[i][j],
4659 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth,
4660 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
4661 if (v->HostVMEnable != true) {
4662 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
4663 / 100;
4664 } else {
4665 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j]
4666 * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
4667 }
4668 }
4669 }
4670
4671 //Re-ordering Buffer Support Check
4672
4673 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4674 for (j = 0; j <= 1; ++j) {
4675 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
4676 > (v->RoundTripPingLatencyCycles + 32) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
4677 v->ROBSupport[i][j] = true;
4678 } else {
4679 v->ROBSupport[i][j] = false;
4680 }
4681 }
4682 }
4683
4684 //Vertical Active BW support check
4685
4686 MaxTotalVActiveRDBandwidth = 0;
4687 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4688 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4689 }
4690
4691 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4692 for (j = 0; j <= 1; ++j) {
4693 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
4694 v->IdealSDPPortBandwidthPerState[i][j] * v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
4695 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
4696 / 100);
4697 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
4698 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
4699 } else {
4700 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
4701 }
4702 }
4703 }
4704
4705 //Prefetch Check
4706
4707 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4708 for (j = 0; j <= 1; ++j) {
4709 int NextPrefetchModeState = MinPrefetchMode;
4710
4711 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
4712
4713 v->BandwidthWithoutPrefetchSupported[i][j] = true;
4714 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] + v->TotalDPTERowBandwidth[i][j]
4715 > v->ReturnBWPerState[i][j] || v->NotUrgentLatencyHiding[i][j]) {
4716 v->BandwidthWithoutPrefetchSupported[i][j] = false;
4717 }
4718
4719 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4720 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4721 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4722 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4723 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4724 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4725 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4726 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4727 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4728 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4729 v->ODMCombineEnabled[k] = v->ODMCombineEnablePerState[i][k];
4730 }
4731
4732 v->ExtraLatency = CalculateExtraLatency(
4733 v->RoundTripPingLatencyCycles,
4734 ReorderingBytes,
4735 v->DCFCLKState[i][j],
4736 v->TotalNumberOfActiveDPP[i][j],
4737 v->PixelChunkSizeInKByte,
4738 v->TotalNumberOfDCCActiveDPP[i][j],
4739 v->MetaChunkSize,
4740 v->ReturnBWPerState[i][j],
4741 v->GPUVMEnable,
4742 v->HostVMEnable,
4743 v->NumberOfActivePlanes,
4744 v->NoOfDPPThisState,
4745 v->dpte_group_bytes,
4746 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4747 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4748 v->HostVMMinPageSize,
4749 v->HostVMMaxNonCachedPageTableLevels);
4750
4751 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
4752 do {
4753 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
4754 v->MaxVStartup = v->NextMaxVStartup;
4755
4756 v->TWait = CalculateTWait(v->PrefetchModePerState[i][j], v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
4757
4758 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4759 Pipe myPipe = { 0 };
4760
4761 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
4762 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
4763 myPipe.PixelClock = v->PixelClock[k];
4764 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
4765 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
4766 myPipe.ScalerEnabled = v->ScalerEnabled[k];
4767 myPipe.SourceScan = v->SourceScan[k];
4768 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
4769 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
4770 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
4771 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
4772 myPipe.InterlaceEnable = v->Interlace[k];
4773 myPipe.NumberOfCursors = v->NumberOfCursors[k];
4774 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
4775 myPipe.HTotal = v->HTotal[k];
4776 myPipe.DCCEnable = v->DCCEnable[k];
4777 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
4778
4779 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
4780 mode_lib,
4781 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4782 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4783 &myPipe,
4784 v->DSCDelayPerState[i][k],
4785 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
4786 v->DPPCLKDelaySCL,
4787 v->DPPCLKDelaySCLLBOnly,
4788 v->DPPCLKDelayCNVCCursor,
4789 v->DISPCLKDelaySubtotal,
4790 v->SwathWidthYThisState[k] / v->HRatio[k],
4791 v->OutputFormat[k],
4792 v->MaxInterDCNTileRepeaters,
4793 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
4794 v->MaximumVStartup[i][j][k],
4795 v->GPUVMMaxPageTableLevels,
4796 v->GPUVMEnable,
4797 v->HostVMEnable,
4798 v->HostVMMaxNonCachedPageTableLevels,
4799 v->HostVMMinPageSize,
4800 v->DynamicMetadataEnable[k],
4801 v->DynamicMetadataVMEnabled,
4802 v->DynamicMetadataLinesBeforeActiveRequired[k],
4803 v->DynamicMetadataTransmittedBytes[k],
4804 v->UrgLatency[i],
4805 v->ExtraLatency,
4806 v->TimeCalc,
4807 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4808 v->MetaRowBytes[i][j][k],
4809 v->DPTEBytesPerRow[i][j][k],
4810 v->PrefetchLinesY[i][j][k],
4811 v->SwathWidthYThisState[k],
4812 v->BytePerPixelY[k],
4813 v->PrefillY[k],
4814 v->MaxNumSwY[k],
4815 v->PrefetchLinesC[i][j][k],
4816 v->SwathWidthCThisState[k],
4817 v->BytePerPixelC[k],
4818 v->PrefillC[k],
4819 v->MaxNumSwC[k],
4820 v->swath_width_luma_ub_this_state[k],
4821 v->swath_width_chroma_ub_this_state[k],
4822 v->SwathHeightYThisState[k],
4823 v->SwathHeightCThisState[k],
4824 v->TWait,
4825 v->ProgressiveToInterlaceUnitInOPP,
4826 &v->DSTXAfterScaler[k],
4827 &v->DSTYAfterScaler[k],
4828 &v->LineTimesForPrefetch[k],
4829 &v->PrefetchBW[k],
4830 &v->LinesForMetaPTE[k],
4831 &v->LinesForMetaAndDPTERow[k],
4832 &v->VRatioPreY[i][j][k],
4833 &v->VRatioPreC[i][j][k],
4834 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
4835 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
4836 &v->NoTimeForDynamicMetadata[i][j][k],
4837 &v->Tno_bw[k],
4838 &v->prefetch_vmrow_bw[k],
4839 &v->Tdmdl_vm[k],
4840 &v->Tdmdl[k],
4841 &v->VUpdateOffsetPix[k],
4842 &v->VUpdateWidthPix[k],
4843 &v->VReadyOffsetPix[k]);
4844 }
4845
4846 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4847 CalculateUrgentBurstFactor(
4848 v->swath_width_luma_ub_this_state[k],
4849 v->swath_width_chroma_ub_this_state[k],
4850 v->DETBufferSizeInKByte[0],
4851 v->SwathHeightYThisState[k],
4852 v->SwathHeightCThisState[k],
4853 v->HTotal[k] / v->PixelClock[k],
4854 v->UrgentLatency,
4855 v->CursorBufferSize,
4856 v->CursorWidth[k][0],
4857 v->CursorBPP[k][0],
4858 v->VRatioPreY[i][j][k],
4859 v->VRatioPreC[i][j][k],
4860 v->BytePerPixelInDETY[k],
4861 v->BytePerPixelInDETC[k],
4862 v->DETBufferSizeYThisState[k],
4863 v->DETBufferSizeCThisState[k],
4864 &v->UrgentBurstFactorCursorPre[k],
4865 &v->UrgentBurstFactorLumaPre[k],
4866 &v->UrgentBurstFactorChroma[k],
4867 &v->NoUrgentLatencyHidingPre[k]);
4868 }
4869
4870 v->MaximumReadBandwidthWithPrefetch = 0.0;
4871 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4872 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k])
4873 * v->VRatioPreY[i][j][k];
4874
4875 v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch
4876 + dml_max4(
4877 v->VActivePixelBandwidth[i][j][k],
4878 v->VActiveCursorBandwidth[i][j][k]
4879 + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]),
4880 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
4881 v->NoOfDPP[i][j][k]
4882 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
4883 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4884 * v->UrgentBurstFactorChromaPre[k])
4885 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4886 }
4887
4888 v->NotEnoughUrgentLatencyHidingPre = false;
4889 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4890 if (v->NoUrgentLatencyHidingPre[k] == true) {
4891 v->NotEnoughUrgentLatencyHidingPre = true;
4892 }
4893 }
4894
4895 v->PrefetchSupported[i][j] = true;
4896 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
4897 || v->NotEnoughUrgentLatencyHidingPre == 1) {
4898 v->PrefetchSupported[i][j] = false;
4899 }
4900 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4901 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
4902 || v->NoTimeForPrefetch[i][j][k] == true) {
4903 v->PrefetchSupported[i][j] = false;
4904 }
4905 }
4906
4907 v->DynamicMetadataSupported[i][j] = true;
4908 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4909 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
4910 v->DynamicMetadataSupported[i][j] = false;
4911 }
4912 }
4913
4914 v->VRatioInPrefetchSupported[i][j] = true;
4915 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4916 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
4917 v->VRatioInPrefetchSupported[i][j] = false;
4918 }
4919 }
4920 v->AnyLinesForVMOrRowTooLarge = false;
4921 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4922 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
4923 v->AnyLinesForVMOrRowTooLarge = true;
4924 }
4925 }
4926
4927 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
4928 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
4929 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4930 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
4931 - dml_max(
4932 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
4933 v->NoOfDPP[i][j][k]
4934 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
4935 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4936 * v->UrgentBurstFactorChromaPre[k])
4937 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4938 }
4939 v->TotImmediateFlipBytes = 0.0;
4940 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4941 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k]
4942 + v->MetaRowBytes[i][j][k] + v->DPTEBytesPerRow[i][j][k];
4943 }
4944
4945 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4946 CalculateFlipSchedule(
4947 mode_lib,
4948 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4949 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4950 v->ExtraLatency,
4951 v->UrgLatency[i],
4952 v->GPUVMMaxPageTableLevels,
4953 v->HostVMEnable,
4954 v->HostVMMaxNonCachedPageTableLevels,
4955 v->GPUVMEnable,
4956 v->HostVMMinPageSize,
4957 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4958 v->MetaRowBytes[i][j][k],
4959 v->DPTEBytesPerRow[i][j][k],
4960 v->BandwidthAvailableForImmediateFlip,
4961 v->TotImmediateFlipBytes,
4962 v->SourcePixelFormat[k],
4963 v->HTotal[k] / v->PixelClock[k],
4964 v->VRatio[k],
4965 v->VRatioChroma[k],
4966 v->Tno_bw[k],
4967 v->DCCEnable[k],
4968 v->dpte_row_height[k],
4969 v->meta_row_height[k],
4970 v->dpte_row_height_chroma[k],
4971 v->meta_row_height_chroma[k],
4972 &v->DestinationLinesToRequestVMInImmediateFlip[k],
4973 &v->DestinationLinesToRequestRowInImmediateFlip[k],
4974 &v->final_flip_bw[k],
4975 &v->ImmediateFlipSupportedForPipe[k]);
4976 }
4977 v->total_dcn_read_bw_with_flip = 0.0;
4978 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4979 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
4980 + dml_max3(
4981 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
4982 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
4983 + v->VActiveCursorBandwidth[i][j][k],
4984 v->NoOfDPP[i][j][k]
4985 * (v->final_flip_bw[k]
4986 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
4987 * v->UrgentBurstFactorLumaPre[k]
4988 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4989 * v->UrgentBurstFactorChromaPre[k])
4990 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4991 }
4992 v->ImmediateFlipSupportedForState[i][j] = true;
4993 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
4994 v->ImmediateFlipSupportedForState[i][j] = false;
4995 }
4996 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4997 if (v->ImmediateFlipSupportedForPipe[k] == false) {
4998 v->ImmediateFlipSupportedForState[i][j] = false;
4999 }
5000 }
5001 } else {
5002 v->ImmediateFlipSupportedForState[i][j] = false;
5003 }
5004 if (v->MaxVStartup <= 13 || v->AnyLinesForVMOrRowTooLarge == false) {
5005 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5006 NextPrefetchModeState = NextPrefetchModeState + 1;
5007 } else {
5008 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5009 }
5010 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5011 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5012 || v->ImmediateFlipSupportedForState[i][j] == true))
5013 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5014
5015 CalculateWatermarksAndDRAMSpeedChangeSupport(
5016 mode_lib,
5017 v->PrefetchModePerState[i][j],
5018 v->NumberOfActivePlanes,
5019 v->MaxLineBufferLines,
5020 v->LineBufferSize,
5021 v->DPPOutputBufferPixels,
5022 v->DETBufferSizeInKByte[0],
5023 v->WritebackInterfaceBufferSize,
5024 v->DCFCLKState[i][j],
5025 v->ReturnBWPerState[i][j],
5026 v->GPUVMEnable,
5027 v->dpte_group_bytes,
5028 v->MetaChunkSize,
5029 v->UrgLatency[i],
5030 v->ExtraLatency,
5031 v->WritebackLatency,
5032 v->WritebackChunkSize,
5033 v->SOCCLKPerState[i],
5034 v->FinalDRAMClockChangeLatency,
5035 v->SRExitTime,
5036 v->SREnterPlusExitTime,
5037 v->ProjectedDCFCLKDeepSleep[i][j],
5038 v->NoOfDPPThisState,
5039 v->DCCEnable,
5040 v->RequiredDPPCLKThisState,
5041 v->DETBufferSizeYThisState,
5042 v->DETBufferSizeCThisState,
5043 v->SwathHeightYThisState,
5044 v->SwathHeightCThisState,
5045 v->LBBitPerPixel,
5046 v->SwathWidthYThisState,
5047 v->SwathWidthCThisState,
5048 v->HRatio,
5049 v->HRatioChroma,
5050 v->vtaps,
5051 v->VTAPsChroma,
5052 v->VRatio,
5053 v->VRatioChroma,
5054 v->HTotal,
5055 v->PixelClock,
5056 v->BlendingAndTiming,
5057 v->BytePerPixelInDETY,
5058 v->BytePerPixelInDETC,
5059 v->DSTXAfterScaler,
5060 v->DSTYAfterScaler,
5061 v->WritebackEnable,
5062 v->WritebackPixelFormat,
5063 v->WritebackDestinationWidth,
5064 v->WritebackDestinationHeight,
5065 v->WritebackSourceHeight,
5066 &v->DRAMClockChangeSupport[i][j],
5067 &v->UrgentWatermark,
5068 &v->WritebackUrgentWatermark,
5069 &v->DRAMClockChangeWatermark,
5070 &v->WritebackDRAMClockChangeWatermark,
5071 &v->StutterExitWatermark,
5072 &v->StutterEnterPlusExitWatermark,
5073 &v->MinActiveDRAMClockChangeLatencySupported);
5074 }
5075 }
5076
5077 /*PTE Buffer Size Check*/
5078
5079 for (i = 0; i < v->soc.num_states; i++) {
5080 for (j = 0; j < 2; j++) {
5081 v->PTEBufferSizeNotExceeded[i][j] = true;
5082 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5083 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5084 v->PTEBufferSizeNotExceeded[i][j] = false;
5085 }
5086 }
5087 }
5088 }
5089 /*Cursor Support Check*/
5090
5091 v->CursorSupport = true;
5092 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5093 if (v->CursorWidth[k][0] > 0.0) {
5094 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5095 v->CursorSupport = false;
5096 }
5097 }
5098 }
5099 /*Valid Pitch Check*/
5100
5101 v->PitchSupport = true;
5102 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5103 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5104 if (v->DCCEnable[k] == true) {
5105 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5106 } else {
5107 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5108 }
5109 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
5110 && v->SourcePixelFormat[k] != dm_rgbe && v->SourcePixelFormat[k] != dm_mono_8) {
5111 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5112 if (v->DCCEnable[k] == true) {
5113 v->AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 64.0 * v->Read256BlockWidthC[k]);
5114 } else {
5115 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5116 }
5117 } else {
5118 v->AlignedCPitch[k] = v->PitchC[k];
5119 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5120 }
5121 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k]
5122 || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5123 v->PitchSupport = false;
5124 }
5125 }
5126
5127 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5128 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k])
5129 ViewportExceedsSurface = true;
5130
5131 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5132 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
5133 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5134 ViewportExceedsSurface = true;
5135 }
5136 }
5137 }
5138 /*Mode Support, Voltage State and SOC Configuration*/
5139
5140 for (i = v->soc.num_states - 1; i >= 0; i--) {
5141 for (j = 0; j < 2; j++) {
5142 if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
5143 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
5144 && v->NotEnoughDSCUnits[i] == 0
5145 && v->DTBCLKRequiredMoreThanSupported[i] == 0
5146 && v->ROBSupport[i][j] == 1 && v->DISPCLK_DPPCLK_Support[i][j] == 1 && v->TotalAvailablePipesSupport[i][j] == 1
5147 && EnoughWritebackUnits == 1 && WritebackModeSupport == 1
5148 && v->WritebackLatencySupport == 1 && v->WritebackScaleRatioAndTapsSupport == 1 && v->CursorSupport == 1 && v->PitchSupport == 1
5149 && ViewportExceedsSurface == 0 && v->PrefetchSupported[i][j] == 1 && v->DynamicMetadataSupported[i][j] == 1
5150 && v->TotalVerticalActiveBandwidthSupport[i][j] == 1 && v->VRatioInPrefetchSupported[i][j] == 1
5151 && v->PTEBufferSizeNotExceeded[i][j] == 1 && v->NonsupportedDSCInputBPC == 0
5152 && ((v->HostVMEnable == 0 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5153 || v->ImmediateFlipSupportedForState[i][j] == true)) {
5154 v->ModeSupport[i][j] = true;
5155 } else {
5156 v->ModeSupport[i][j] = false;
5157 }
5158 }
5159 }
5160 {
5161 unsigned int MaximumMPCCombine = 0;
5162 for (i = v->soc.num_states; i >= 0; i--) {
5163 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5164 v->VoltageLevel = i;
5165 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5166 if (v->ModeSupport[i][1] == true) {
5167 MaximumMPCCombine = 1;
5168 } else {
5169 MaximumMPCCombine = 0;
5170 }
5171 }
5172 }
5173 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5174 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5175 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5176 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5177 }
5178 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5179 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5180 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5181 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5182 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5183 v->maxMpcComb = MaximumMPCCombine;
5184 }
5185 }
5186
CalculateWatermarksAndDRAMSpeedChangeSupport(struct display_mode_lib * mode_lib,unsigned int PrefetchMode,unsigned int NumberOfActivePlanes,unsigned int MaxLineBufferLines,unsigned int LineBufferSize,unsigned int DPPOutputBufferPixels,unsigned int DETBufferSizeInKByte,unsigned int WritebackInterfaceBufferSize,double DCFCLK,double ReturnBW,bool GPUVMEnable,unsigned int dpte_group_bytes[],unsigned int MetaChunkSize,double UrgentLatency,double ExtraLatency,double WritebackLatency,double WritebackChunkSize,double SOCCLK,double DRAMClockChangeLatency,double SRExitTime,double SREnterPlusExitTime,double DCFCLKDeepSleep,unsigned int DPPPerPlane[],bool DCCEnable[],double DPPCLK[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int LBBitPerPixel[],double SwathWidthY[],double SwathWidthC[],double HRatio[],double HRatioChroma[],unsigned int vtaps[],unsigned int VTAPsChroma[],double VRatio[],double VRatioChroma[],unsigned int HTotal[],double PixelClock[],unsigned int BlendingAndTiming[],double BytePerPixelDETY[],double BytePerPixelDETC[],double DSTXAfterScaler[],double DSTYAfterScaler[],bool WritebackEnable[],enum source_format_class WritebackPixelFormat[],double WritebackDestinationWidth[],double WritebackDestinationHeight[],double WritebackSourceHeight[],enum clock_change_support * DRAMClockChangeSupport,double * UrgentWatermark,double * WritebackUrgentWatermark,double * DRAMClockChangeWatermark,double * WritebackDRAMClockChangeWatermark,double * StutterExitWatermark,double * StutterEnterPlusExitWatermark,double * MinActiveDRAMClockChangeLatencySupported)5187 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5188 struct display_mode_lib *mode_lib,
5189 unsigned int PrefetchMode,
5190 unsigned int NumberOfActivePlanes,
5191 unsigned int MaxLineBufferLines,
5192 unsigned int LineBufferSize,
5193 unsigned int DPPOutputBufferPixels,
5194 unsigned int DETBufferSizeInKByte,
5195 unsigned int WritebackInterfaceBufferSize,
5196 double DCFCLK,
5197 double ReturnBW,
5198 bool GPUVMEnable,
5199 unsigned int dpte_group_bytes[],
5200 unsigned int MetaChunkSize,
5201 double UrgentLatency,
5202 double ExtraLatency,
5203 double WritebackLatency,
5204 double WritebackChunkSize,
5205 double SOCCLK,
5206 double DRAMClockChangeLatency,
5207 double SRExitTime,
5208 double SREnterPlusExitTime,
5209 double DCFCLKDeepSleep,
5210 unsigned int DPPPerPlane[],
5211 bool DCCEnable[],
5212 double DPPCLK[],
5213 unsigned int DETBufferSizeY[],
5214 unsigned int DETBufferSizeC[],
5215 unsigned int SwathHeightY[],
5216 unsigned int SwathHeightC[],
5217 unsigned int LBBitPerPixel[],
5218 double SwathWidthY[],
5219 double SwathWidthC[],
5220 double HRatio[],
5221 double HRatioChroma[],
5222 unsigned int vtaps[],
5223 unsigned int VTAPsChroma[],
5224 double VRatio[],
5225 double VRatioChroma[],
5226 unsigned int HTotal[],
5227 double PixelClock[],
5228 unsigned int BlendingAndTiming[],
5229 double BytePerPixelDETY[],
5230 double BytePerPixelDETC[],
5231 double DSTXAfterScaler[],
5232 double DSTYAfterScaler[],
5233 bool WritebackEnable[],
5234 enum source_format_class WritebackPixelFormat[],
5235 double WritebackDestinationWidth[],
5236 double WritebackDestinationHeight[],
5237 double WritebackSourceHeight[],
5238 enum clock_change_support *DRAMClockChangeSupport,
5239 double *UrgentWatermark,
5240 double *WritebackUrgentWatermark,
5241 double *DRAMClockChangeWatermark,
5242 double *WritebackDRAMClockChangeWatermark,
5243 double *StutterExitWatermark,
5244 double *StutterEnterPlusExitWatermark,
5245 double *MinActiveDRAMClockChangeLatencySupported)
5246 {
5247 double EffectiveLBLatencyHidingY = 0;
5248 double EffectiveLBLatencyHidingC = 0;
5249 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5250 double LinesInDETC = 0;
5251 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5252 unsigned int LinesInDETCRoundedDownToSwath = 0;
5253 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5254 double FullDETBufferingTimeC = 0;
5255 double ActiveDRAMClockChangeLatencyMarginY = 0;
5256 double ActiveDRAMClockChangeLatencyMarginC = 0;
5257 double WritebackDRAMClockChangeLatencyMargin = 0;
5258 double PlaneWithMinActiveDRAMClockChangeMargin = 0;
5259 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 0;
5260 double FullDETBufferingTimeYStutterCriticalPlane = 0;
5261 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
5262 double WritebackDRAMClockChangeLatencyHiding = 0;
5263 unsigned int k, j;
5264
5265 mode_lib->vba.TotalActiveDPP = 0;
5266 mode_lib->vba.TotalDCCActiveDPP = 0;
5267 for (k = 0; k < NumberOfActivePlanes; ++k) {
5268 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
5269 if (DCCEnable[k] == true) {
5270 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
5271 }
5272 }
5273
5274 *UrgentWatermark = UrgentLatency + ExtraLatency;
5275
5276 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5277
5278 mode_lib->vba.TotalActiveWriteback = 0;
5279 for (k = 0; k < NumberOfActivePlanes; ++k) {
5280 if (WritebackEnable[k] == true) {
5281 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
5282 }
5283 }
5284
5285 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5286 *WritebackUrgentWatermark = WritebackLatency;
5287 } else {
5288 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5289 }
5290
5291 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5292 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5293 } else {
5294 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5295 }
5296
5297 for (k = 0; k < NumberOfActivePlanes; ++k) {
5298
5299 mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5300
5301 mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5302
5303 EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5304
5305 EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5306
5307 LinesInDETY[k] = (double) DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
5308 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5309 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5310 if (BytePerPixelDETC[k] > 0) {
5311 LinesInDETC = mode_lib->vba.DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5312 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5313 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5314 } else {
5315 LinesInDETC = 0;
5316 FullDETBufferingTimeC = 999999;
5317 }
5318
5319 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5320
5321 if (NumberOfActivePlanes > 1) {
5322 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5323 }
5324
5325 if (BytePerPixelDETC[k] > 0) {
5326 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5327
5328 if (NumberOfActivePlanes > 1) {
5329 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5330 }
5331 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5332 } else {
5333 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5334 }
5335
5336 if (WritebackEnable[k] == true) {
5337
5338 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5339 if (WritebackPixelFormat[k] == dm_444_64) {
5340 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5341 }
5342 if (mode_lib->vba.WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
5343 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding * 2;
5344 }
5345 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - mode_lib->vba.WritebackDRAMClockChangeWatermark;
5346 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5347 }
5348 }
5349
5350 mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
5351 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5352 for (k = 0; k < NumberOfActivePlanes; ++k) {
5353 if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
5354 mode_lib->vba.MinActiveDRAMClockChangeMargin = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5355 if (BlendingAndTiming[k] == k) {
5356 PlaneWithMinActiveDRAMClockChangeMargin = k;
5357 } else {
5358 for (j = 0; j < NumberOfActivePlanes; ++j) {
5359 if (BlendingAndTiming[k] == j) {
5360 PlaneWithMinActiveDRAMClockChangeMargin = j;
5361 }
5362 }
5363 }
5364 }
5365 }
5366
5367 *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5368
5369 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5370 for (k = 0; k < NumberOfActivePlanes; ++k) {
5371 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5372 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5373 }
5374 }
5375
5376 mode_lib->vba.TotalNumberOfActiveOTG = 0;
5377 for (k = 0; k < NumberOfActivePlanes; ++k) {
5378 if (BlendingAndTiming[k] == k) {
5379 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
5380 }
5381 }
5382
5383 if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
5384 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5385 } else if (((mode_lib->vba.SynchronizedVBlank == true || mode_lib->vba.TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) {
5386 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5387 } else {
5388 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5389 }
5390
5391 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5392 for (k = 0; k < NumberOfActivePlanes; ++k) {
5393 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5394 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[k];
5395 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (HTotal[k] / PixelClock[k]) / VRatio[k];
5396 }
5397 }
5398
5399 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5400 *StutterEnterPlusExitWatermark = dml_max(SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane);
5401
5402 }
5403
CalculateDCFCLKDeepSleep(struct display_mode_lib * mode_lib,unsigned int NumberOfActivePlanes,int BytePerPixelY[],int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerPlane[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],int ReturnBusWidth,double * DCFCLKDeepSleep)5404 static void CalculateDCFCLKDeepSleep(
5405 struct display_mode_lib *mode_lib,
5406 unsigned int NumberOfActivePlanes,
5407 int BytePerPixelY[],
5408 int BytePerPixelC[],
5409 double VRatio[],
5410 double VRatioChroma[],
5411 double SwathWidthY[],
5412 double SwathWidthC[],
5413 unsigned int DPPPerPlane[],
5414 double HRatio[],
5415 double HRatioChroma[],
5416 double PixelClock[],
5417 double PSCL_THROUGHPUT[],
5418 double PSCL_THROUGHPUT_CHROMA[],
5419 double DPPCLK[],
5420 double ReadBandwidthLuma[],
5421 double ReadBandwidthChroma[],
5422 int ReturnBusWidth,
5423 double *DCFCLKDeepSleep)
5424 {
5425 double DisplayPipeLineDeliveryTimeLuma = 0;
5426 double DisplayPipeLineDeliveryTimeChroma = 0;
5427 unsigned int k;
5428 double ReadBandwidth = 0.0;
5429
5430 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5431 for (k = 0; k < NumberOfActivePlanes; ++k) {
5432
5433 if (VRatio[k] <= 1) {
5434 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5435 } else {
5436 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5437 }
5438 if (BytePerPixelC[k] == 0) {
5439 DisplayPipeLineDeliveryTimeChroma = 0;
5440 } else {
5441 if (VRatioChroma[k] <= 1) {
5442 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5443 } else {
5444 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5445 }
5446 }
5447
5448 if (BytePerPixelC[k] > 0) {
5449 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(1.1 * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 1.1 * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5450 } else {
5451 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5452 }
5453 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(mode_lib->vba.DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5454
5455 }
5456
5457 for (k = 0; k < NumberOfActivePlanes; ++k) {
5458 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5459 }
5460
5461 *DCFCLKDeepSleep = dml_max(8.0, ReadBandwidth / ReturnBusWidth);
5462
5463 for (k = 0; k < NumberOfActivePlanes; ++k) {
5464 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5465 }
5466 }
5467
CalculateUrgentBurstFactor(long swath_width_luma_ub,long swath_width_chroma_ub,unsigned int DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,double DETBufferSizeY,double DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)5468 static void CalculateUrgentBurstFactor(
5469 long swath_width_luma_ub,
5470 long swath_width_chroma_ub,
5471 unsigned int DETBufferSizeInKByte,
5472 unsigned int SwathHeightY,
5473 unsigned int SwathHeightC,
5474 double LineTime,
5475 double UrgentLatency,
5476 double CursorBufferSize,
5477 unsigned int CursorWidth,
5478 unsigned int CursorBPP,
5479 double VRatio,
5480 double VRatioC,
5481 double BytePerPixelInDETY,
5482 double BytePerPixelInDETC,
5483 double DETBufferSizeY,
5484 double DETBufferSizeC,
5485 double *UrgentBurstFactorCursor,
5486 double *UrgentBurstFactorLuma,
5487 double *UrgentBurstFactorChroma,
5488 bool *NotEnoughUrgentLatencyHiding)
5489 {
5490 double LinesInDETLuma = 0;
5491 double LinesInDETChroma = 0;
5492 unsigned int LinesInCursorBuffer = 0;
5493 double CursorBufferSizeInTime = 0;
5494 double DETBufferSizeInTimeLuma = 0;
5495 double DETBufferSizeInTimeChroma = 0;
5496
5497 *NotEnoughUrgentLatencyHiding = 0;
5498
5499 if (CursorWidth > 0) {
5500 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5501 if (VRatio > 0) {
5502 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5503 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5504 *NotEnoughUrgentLatencyHiding = 1;
5505 *UrgentBurstFactorCursor = 0;
5506 } else {
5507 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5508 }
5509 } else {
5510 *UrgentBurstFactorCursor = 1;
5511 }
5512 }
5513
5514 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5515 if (VRatio > 0) {
5516 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5517 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5518 *NotEnoughUrgentLatencyHiding = 1;
5519 *UrgentBurstFactorLuma = 0;
5520 } else {
5521 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5522 }
5523 } else {
5524 *UrgentBurstFactorLuma = 1;
5525 }
5526
5527 if (BytePerPixelInDETC > 0) {
5528 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5529 if (VRatio > 0) {
5530 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5531 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5532 *NotEnoughUrgentLatencyHiding = 1;
5533 *UrgentBurstFactorChroma = 0;
5534 } else {
5535 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5536 }
5537 } else {
5538 *UrgentBurstFactorChroma = 1;
5539 }
5540 }
5541 }
5542
CalculatePixelDeliveryTimes(unsigned int NumberOfActivePlanes,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerPlane[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],int BytePerPixelC[],enum scan_direction_class SourceScan[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][2],unsigned int CursorBPP[][2],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])5543 static void CalculatePixelDeliveryTimes(
5544 unsigned int NumberOfActivePlanes,
5545 double VRatio[],
5546 double VRatioChroma[],
5547 double VRatioPrefetchY[],
5548 double VRatioPrefetchC[],
5549 unsigned int swath_width_luma_ub[],
5550 unsigned int swath_width_chroma_ub[],
5551 unsigned int DPPPerPlane[],
5552 double HRatio[],
5553 double HRatioChroma[],
5554 double PixelClock[],
5555 double PSCL_THROUGHPUT[],
5556 double PSCL_THROUGHPUT_CHROMA[],
5557 double DPPCLK[],
5558 int BytePerPixelC[],
5559 enum scan_direction_class SourceScan[],
5560 unsigned int NumberOfCursors[],
5561 unsigned int CursorWidth[][2],
5562 unsigned int CursorBPP[][2],
5563 unsigned int BlockWidth256BytesY[],
5564 unsigned int BlockHeight256BytesY[],
5565 unsigned int BlockWidth256BytesC[],
5566 unsigned int BlockHeight256BytesC[],
5567 double DisplayPipeLineDeliveryTimeLuma[],
5568 double DisplayPipeLineDeliveryTimeChroma[],
5569 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5570 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5571 double DisplayPipeRequestDeliveryTimeLuma[],
5572 double DisplayPipeRequestDeliveryTimeChroma[],
5573 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5574 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5575 double CursorRequestDeliveryTime[],
5576 double CursorRequestDeliveryTimePrefetch[])
5577 {
5578 double req_per_swath_ub = 0;
5579 unsigned int k;
5580
5581 for (k = 0; k < NumberOfActivePlanes; ++k) {
5582 if (VRatio[k] <= 1) {
5583 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5584 } else {
5585 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5586 }
5587
5588 if (BytePerPixelC[k] == 0) {
5589 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5590 } else {
5591 if (VRatioChroma[k] <= 1) {
5592 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5593 } else {
5594 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5595 }
5596 }
5597
5598 if (VRatioPrefetchY[k] <= 1) {
5599 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5600 } else {
5601 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5602 }
5603
5604 if (BytePerPixelC[k] == 0) {
5605 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5606 } else {
5607 if (VRatioPrefetchC[k] <= 1) {
5608 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5609 } else {
5610 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5611 }
5612 }
5613 }
5614
5615 for (k = 0; k < NumberOfActivePlanes; ++k) {
5616 if (SourceScan[k] != dm_vert) {
5617 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5618 } else {
5619 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5620 }
5621 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5622 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5623 if (BytePerPixelC[k] == 0) {
5624 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5625 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5626 } else {
5627 if (SourceScan[k] != dm_vert) {
5628 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5629 } else {
5630 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5631 }
5632 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5633 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5634 }
5635 }
5636
5637 for (k = 0; k < NumberOfActivePlanes; ++k) {
5638 int cursor_req_per_width = 0;
5639 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5640 if (NumberOfCursors[k] > 0) {
5641 if (VRatio[k] <= 1) {
5642 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5643 } else {
5644 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5645 }
5646 if (VRatioPrefetchY[k] <= 1) {
5647 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5648 } else {
5649 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5650 }
5651 } else {
5652 CursorRequestDeliveryTime[k] = 0;
5653 CursorRequestDeliveryTimePrefetch[k] = 0;
5654 }
5655 }
5656 }
5657
CalculateMetaAndPTETimes(int NumberOfActivePlanes,bool GPUVMEnable,int MetaChunkSize,int MinMetaChunkSizeBytes,int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],int BytePerPixelY[],int BytePerPixelC[],enum scan_direction_class SourceScan[],int dpte_row_height[],int dpte_row_height_chroma[],int meta_row_width[],int meta_row_width_chroma[],int meta_row_height[],int meta_row_height_chroma[],int meta_req_width[],int meta_req_width_chroma[],int meta_req_height[],int meta_req_height_chroma[],int dpte_group_bytes[],int PTERequestSizeY[],int PTERequestSizeC[],int PixelPTEReqWidthY[],int PixelPTEReqHeightY[],int PixelPTEReqWidthC[],int PixelPTEReqHeightC[],int dpte_row_width_luma_ub[],int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])5658 static void CalculateMetaAndPTETimes(
5659 int NumberOfActivePlanes,
5660 bool GPUVMEnable,
5661 int MetaChunkSize,
5662 int MinMetaChunkSizeBytes,
5663 int HTotal[],
5664 double VRatio[],
5665 double VRatioChroma[],
5666 double DestinationLinesToRequestRowInVBlank[],
5667 double DestinationLinesToRequestRowInImmediateFlip[],
5668 bool DCCEnable[],
5669 double PixelClock[],
5670 int BytePerPixelY[],
5671 int BytePerPixelC[],
5672 enum scan_direction_class SourceScan[],
5673 int dpte_row_height[],
5674 int dpte_row_height_chroma[],
5675 int meta_row_width[],
5676 int meta_row_width_chroma[],
5677 int meta_row_height[],
5678 int meta_row_height_chroma[],
5679 int meta_req_width[],
5680 int meta_req_width_chroma[],
5681 int meta_req_height[],
5682 int meta_req_height_chroma[],
5683 int dpte_group_bytes[],
5684 int PTERequestSizeY[],
5685 int PTERequestSizeC[],
5686 int PixelPTEReqWidthY[],
5687 int PixelPTEReqHeightY[],
5688 int PixelPTEReqWidthC[],
5689 int PixelPTEReqHeightC[],
5690 int dpte_row_width_luma_ub[],
5691 int dpte_row_width_chroma_ub[],
5692 double DST_Y_PER_PTE_ROW_NOM_L[],
5693 double DST_Y_PER_PTE_ROW_NOM_C[],
5694 double DST_Y_PER_META_ROW_NOM_L[],
5695 double DST_Y_PER_META_ROW_NOM_C[],
5696 double TimePerMetaChunkNominal[],
5697 double TimePerChromaMetaChunkNominal[],
5698 double TimePerMetaChunkVBlank[],
5699 double TimePerChromaMetaChunkVBlank[],
5700 double TimePerMetaChunkFlip[],
5701 double TimePerChromaMetaChunkFlip[],
5702 double time_per_pte_group_nom_luma[],
5703 double time_per_pte_group_vblank_luma[],
5704 double time_per_pte_group_flip_luma[],
5705 double time_per_pte_group_nom_chroma[],
5706 double time_per_pte_group_vblank_chroma[],
5707 double time_per_pte_group_flip_chroma[])
5708 {
5709 unsigned int meta_chunk_width = 0;
5710 unsigned int min_meta_chunk_width = 0;
5711 unsigned int meta_chunk_per_row_int = 0;
5712 unsigned int meta_row_remainder = 0;
5713 unsigned int meta_chunk_threshold = 0;
5714 unsigned int meta_chunks_per_row_ub = 0;
5715 unsigned int meta_chunk_width_chroma = 0;
5716 unsigned int min_meta_chunk_width_chroma = 0;
5717 unsigned int meta_chunk_per_row_int_chroma = 0;
5718 unsigned int meta_row_remainder_chroma = 0;
5719 unsigned int meta_chunk_threshold_chroma = 0;
5720 unsigned int meta_chunks_per_row_ub_chroma = 0;
5721 unsigned int dpte_group_width_luma = 0;
5722 unsigned int dpte_groups_per_row_luma_ub = 0;
5723 unsigned int dpte_group_width_chroma = 0;
5724 unsigned int dpte_groups_per_row_chroma_ub = 0;
5725 unsigned int k;
5726
5727 for (k = 0; k < NumberOfActivePlanes; ++k) {
5728 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5729 if (BytePerPixelC[k] == 0) {
5730 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5731 } else {
5732 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
5733 }
5734 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5735 if (BytePerPixelC[k] == 0) {
5736 DST_Y_PER_META_ROW_NOM_C[k] = 0;
5737 } else {
5738 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
5739 }
5740 }
5741
5742 for (k = 0; k < NumberOfActivePlanes; ++k) {
5743 if (DCCEnable[k] == true) {
5744 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
5745 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
5746 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5747 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5748 if (SourceScan[k] != dm_vert) {
5749 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5750 } else {
5751 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
5752 }
5753 if (meta_row_remainder <= meta_chunk_threshold) {
5754 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5755 } else {
5756 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5757 }
5758 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5759 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5760 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5761 if (BytePerPixelC[k] == 0) {
5762 TimePerChromaMetaChunkNominal[k] = 0;
5763 TimePerChromaMetaChunkVBlank[k] = 0;
5764 TimePerChromaMetaChunkFlip[k] = 0;
5765 } else {
5766 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5767 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5768 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
5769 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
5770 if (SourceScan[k] != dm_vert) {
5771 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
5772 } else {
5773 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
5774 }
5775 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
5776 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5777 } else {
5778 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5779 }
5780 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5781 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5782 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5783 }
5784 } else {
5785 TimePerMetaChunkNominal[k] = 0;
5786 TimePerMetaChunkVBlank[k] = 0;
5787 TimePerMetaChunkFlip[k] = 0;
5788 TimePerChromaMetaChunkNominal[k] = 0;
5789 TimePerChromaMetaChunkVBlank[k] = 0;
5790 TimePerChromaMetaChunkFlip[k] = 0;
5791 }
5792 }
5793
5794 for (k = 0; k < NumberOfActivePlanes; ++k) {
5795 if (GPUVMEnable == true) {
5796 if (SourceScan[k] != dm_vert) {
5797 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5798 } else {
5799 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5800 }
5801 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
5802 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5803 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5804 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5805 if (BytePerPixelC[k] == 0) {
5806 time_per_pte_group_nom_chroma[k] = 0;
5807 time_per_pte_group_vblank_chroma[k] = 0;
5808 time_per_pte_group_flip_chroma[k] = 0;
5809 } else {
5810 if (SourceScan[k] != dm_vert) {
5811 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5812 } else {
5813 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5814 }
5815 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
5816 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5817 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5818 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5819 }
5820 } else {
5821 time_per_pte_group_nom_luma[k] = 0;
5822 time_per_pte_group_vblank_luma[k] = 0;
5823 time_per_pte_group_flip_luma[k] = 0;
5824 time_per_pte_group_nom_chroma[k] = 0;
5825 time_per_pte_group_vblank_chroma[k] = 0;
5826 time_per_pte_group_flip_chroma[k] = 0;
5827 }
5828 }
5829 }
5830
CalculateVMGroupAndRequestTimes(unsigned int NumberOfActivePlanes,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],int dpte_row_width_luma_ub[],int dpte_row_width_chroma_ub[],int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],int meta_pte_bytes_per_frame_ub_l[],int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5831 static void CalculateVMGroupAndRequestTimes(
5832 unsigned int NumberOfActivePlanes,
5833 bool GPUVMEnable,
5834 unsigned int GPUVMMaxPageTableLevels,
5835 unsigned int HTotal[],
5836 int BytePerPixelC[],
5837 double DestinationLinesToRequestVMInVBlank[],
5838 double DestinationLinesToRequestVMInImmediateFlip[],
5839 bool DCCEnable[],
5840 double PixelClock[],
5841 int dpte_row_width_luma_ub[],
5842 int dpte_row_width_chroma_ub[],
5843 int vm_group_bytes[],
5844 unsigned int dpde0_bytes_per_frame_ub_l[],
5845 unsigned int dpde0_bytes_per_frame_ub_c[],
5846 int meta_pte_bytes_per_frame_ub_l[],
5847 int meta_pte_bytes_per_frame_ub_c[],
5848 double TimePerVMGroupVBlank[],
5849 double TimePerVMGroupFlip[],
5850 double TimePerVMRequestVBlank[],
5851 double TimePerVMRequestFlip[])
5852 {
5853 int num_group_per_lower_vm_stage = 0;
5854 int num_req_per_lower_vm_stage = 0;
5855 unsigned int k;
5856
5857 for (k = 0; k < NumberOfActivePlanes; ++k) {
5858 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5859 if (DCCEnable[k] == false) {
5860 if (BytePerPixelC[k] > 0) {
5861 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
5862 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k])
5863 / (double) (vm_group_bytes[k]), 1);
5864 } else {
5865 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
5866 / (double) (vm_group_bytes[k]), 1);
5867 }
5868 } else {
5869 if (GPUVMMaxPageTableLevels == 1) {
5870 if (BytePerPixelC[k] > 0) {
5871 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
5872 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k])
5873 / (double) (vm_group_bytes[k]), 1);
5874 } else {
5875 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
5876 / (double) (vm_group_bytes[k]), 1);
5877 }
5878 } else {
5879 if (BytePerPixelC[k] > 0) {
5880 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5881 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
5882 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5883 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
5884 } else {
5885 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5886 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
5887 }
5888 }
5889 }
5890
5891 if (DCCEnable[k] == false) {
5892 if (BytePerPixelC[k] > 0) {
5893 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
5894 } else {
5895 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5896 }
5897 } else {
5898 if (GPUVMMaxPageTableLevels == 1) {
5899 if (BytePerPixelC[k] > 0) {
5900 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
5901 + meta_pte_bytes_per_frame_ub_c[k] / 64;
5902 } else {
5903 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5904 }
5905 } else {
5906 if (BytePerPixelC[k] > 0) {
5907 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
5908 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k]
5909 / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
5910 } else {
5911 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
5912 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5913 }
5914 }
5915 }
5916
5917 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
5918 / num_group_per_lower_vm_stage;
5919 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
5920 / num_group_per_lower_vm_stage;
5921 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
5922 / num_req_per_lower_vm_stage;
5923 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
5924 / num_req_per_lower_vm_stage;
5925
5926 if (GPUVMMaxPageTableLevels > 2) {
5927 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5928 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5929 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5930 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5931 }
5932
5933 } else {
5934 TimePerVMGroupVBlank[k] = 0;
5935 TimePerVMGroupFlip[k] = 0;
5936 TimePerVMRequestVBlank[k] = 0;
5937 TimePerVMRequestFlip[k] = 0;
5938 }
5939 }
5940 }
5941
CalculateStutterEfficiency(int NumberOfActivePlanes,long ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,double SRExitTime,bool SynchronizedVBlank,int DPPPerPlane[],unsigned int DETBufferSizeY[],int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],int SwathHeightY[],int SwathHeightC[],double DCCRateLuma[],double DCCRateChroma[],int HTotal[],int VTotal[],double PixelClock[],double VRatio[],enum scan_direction_class SourceScan[],int BlockHeight256BytesY[],int BlockWidth256BytesY[],int BlockHeight256BytesC[],int BlockWidth256BytesC[],int DCCYMaxUncompressedBlock[],int DCCCMaxUncompressedBlock[],int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthPlaneLuma[],double ReadBandwidthPlaneChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,double * StutterPeriodOut)5942 static void CalculateStutterEfficiency(
5943 int NumberOfActivePlanes,
5944 long ROBBufferSizeInKByte,
5945 double TotalDataReadBandwidth,
5946 double DCFCLK,
5947 double ReturnBW,
5948 double SRExitTime,
5949 bool SynchronizedVBlank,
5950 int DPPPerPlane[],
5951 unsigned int DETBufferSizeY[],
5952 int BytePerPixelY[],
5953 double BytePerPixelDETY[],
5954 double SwathWidthY[],
5955 int SwathHeightY[],
5956 int SwathHeightC[],
5957 double DCCRateLuma[],
5958 double DCCRateChroma[],
5959 int HTotal[],
5960 int VTotal[],
5961 double PixelClock[],
5962 double VRatio[],
5963 enum scan_direction_class SourceScan[],
5964 int BlockHeight256BytesY[],
5965 int BlockWidth256BytesY[],
5966 int BlockHeight256BytesC[],
5967 int BlockWidth256BytesC[],
5968 int DCCYMaxUncompressedBlock[],
5969 int DCCCMaxUncompressedBlock[],
5970 int VActive[],
5971 bool DCCEnable[],
5972 bool WritebackEnable[],
5973 double ReadBandwidthPlaneLuma[],
5974 double ReadBandwidthPlaneChroma[],
5975 double meta_row_bw[],
5976 double dpte_row_bw[],
5977 double *StutterEfficiencyNotIncludingVBlank,
5978 double *StutterEfficiency,
5979 double *StutterPeriodOut)
5980 {
5981 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5982 double FrameTimeForMinFullDETBufferingTime = 0;
5983 double StutterPeriod = 0;
5984 double AverageReadBandwidth = 0;
5985 double TotalRowReadBandwidth = 0;
5986 double AverageDCCCompressionRate = 0;
5987 double PartOfBurstThatFitsInROB = 0;
5988 double StutterBurstTime = 0;
5989 int TotalActiveWriteback = 0;
5990 double VBlankTime = 0;
5991 double SmallestVBlank = 0;
5992 int BytePerPixelYCriticalPlane = 0;
5993 double SwathWidthYCriticalPlane = 0;
5994 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5995 double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5996 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
5997 double MaximumEffectiveCompressionLuma = 0;
5998 double MaximumEffectiveCompressionChroma = 0;
5999 unsigned int k;
6000
6001 for (k = 0; k < NumberOfActivePlanes; ++k) {
6002 LinesInDETY[k] = DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
6003 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
6004 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
6005 }
6006
6007 StutterPeriod = FullDETBufferingTimeY[0];
6008 FrameTimeForMinFullDETBufferingTime = VTotal[0] * HTotal[0] / PixelClock[0];
6009 BytePerPixelYCriticalPlane = BytePerPixelY[0];
6010 SwathWidthYCriticalPlane = SwathWidthY[0];
6011 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[0]
6012 - (LinesInDETY[0] - LinesInDETYRoundedDownToSwath[0]);
6013
6014 for (k = 0; k < NumberOfActivePlanes; ++k) {
6015 if (FullDETBufferingTimeY[k] < StutterPeriod) {
6016 StutterPeriod = FullDETBufferingTimeY[k];
6017 FrameTimeForMinFullDETBufferingTime = VTotal[k] * HTotal[k] / PixelClock[k];
6018 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6019 SwathWidthYCriticalPlane = SwathWidthY[k];
6020 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k]
6021 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]);
6022 }
6023 }
6024
6025 AverageReadBandwidth = 0;
6026 TotalRowReadBandwidth = 0;
6027 for (k = 0; k < NumberOfActivePlanes; ++k) {
6028 if (DCCEnable[k] == true) {
6029 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k])
6030 || (SourceScan[k] != dm_vert
6031 && BlockHeight256BytesY[k] > SwathHeightY[k])
6032 || DCCYMaxUncompressedBlock[k] < 256) {
6033 MaximumEffectiveCompressionLuma = 2;
6034 } else {
6035 MaximumEffectiveCompressionLuma = 4;
6036 }
6037 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(DCCRateLuma[k], MaximumEffectiveCompressionLuma);
6038
6039 if (ReadBandwidthPlaneChroma[k] > 0) {
6040 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6041 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k])
6042 || DCCCMaxUncompressedBlock[k] < 256) {
6043 MaximumEffectiveCompressionChroma = 2;
6044 } else {
6045 MaximumEffectiveCompressionChroma = 4;
6046 }
6047 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneChroma[k] / dml_min(DCCRateChroma[k], MaximumEffectiveCompressionChroma);
6048 }
6049 } else {
6050 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6051 }
6052 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6053 }
6054
6055 AverageDCCCompressionRate = TotalDataReadBandwidth / AverageReadBandwidth;
6056 PartOfBurstThatFitsInROB = dml_min(StutterPeriod * TotalDataReadBandwidth, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6057 StutterBurstTime = PartOfBurstThatFitsInROB / AverageDCCCompressionRate / ReturnBW + (StutterPeriod * TotalDataReadBandwidth
6058 - PartOfBurstThatFitsInROB) / (DCFCLK * 64) + StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6059 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6060
6061 TotalActiveWriteback = 0;
6062 for (k = 0; k < NumberOfActivePlanes; ++k) {
6063 if (WritebackEnable[k] == true) {
6064 TotalActiveWriteback = TotalActiveWriteback + 1;
6065 }
6066 }
6067
6068 if (TotalActiveWriteback == 0) {
6069 *StutterEfficiencyNotIncludingVBlank = (1
6070 - (SRExitTime + StutterBurstTime) / StutterPeriod) * 100;
6071 } else {
6072 *StutterEfficiencyNotIncludingVBlank = 0;
6073 }
6074
6075 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6076 SmallestVBlank = (VTotal[0] - VActive[0]) * HTotal[0] / PixelClock[0];
6077 } else {
6078 SmallestVBlank = 0;
6079 }
6080 for (k = 0; k < NumberOfActivePlanes; ++k) {
6081 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6082 VBlankTime = (VTotal[k] - VActive[k]) * HTotal[k] / PixelClock[k];
6083 } else {
6084 VBlankTime = 0;
6085 }
6086 SmallestVBlank = dml_min(SmallestVBlank, VBlankTime);
6087 }
6088
6089 *StutterEfficiency = (*StutterEfficiencyNotIncludingVBlank / 100.0 * (FrameTimeForMinFullDETBufferingTime - SmallestVBlank) + SmallestVBlank) / FrameTimeForMinFullDETBufferingTime * 100;
6090
6091 if (StutterPeriodOut)
6092 *StutterPeriodOut = StutterPeriod;
6093 }
6094
CalculateSwathAndDETConfiguration(bool ForceSingleDPP,int NumberOfActivePlanes,unsigned int DETBufferSizeInKByte,double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum scan_direction_class SourceScan[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],int ViewportWidth[],int ViewportHeight[],int SurfaceWidthY[],int SurfaceWidthC[],int SurfaceHeightY[],int SurfaceHeightC[],int Read256BytesBlockHeightY[],int Read256BytesBlockHeightC[],int Read256BytesBlockWidthY[],int Read256BytesBlockWidthC[],enum odm_combine_mode ODMCombineEnabled[],int BlendingAndTiming[],int BytePerPixY[],int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],int HActive[],double HRatio[],double HRatioChroma[],int DPPPerPlane[],int swath_width_luma_ub[],int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],int SwathHeightY[],int SwathHeightC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool ViewportSizeSupportPerPlane[],bool * ViewportSizeSupport)6095 static void CalculateSwathAndDETConfiguration(
6096 bool ForceSingleDPP,
6097 int NumberOfActivePlanes,
6098 unsigned int DETBufferSizeInKByte,
6099 double MaximumSwathWidthLuma[],
6100 double MaximumSwathWidthChroma[],
6101 enum scan_direction_class SourceScan[],
6102 enum source_format_class SourcePixelFormat[],
6103 enum dm_swizzle_mode SurfaceTiling[],
6104 int ViewportWidth[],
6105 int ViewportHeight[],
6106 int SurfaceWidthY[],
6107 int SurfaceWidthC[],
6108 int SurfaceHeightY[],
6109 int SurfaceHeightC[],
6110 int Read256BytesBlockHeightY[],
6111 int Read256BytesBlockHeightC[],
6112 int Read256BytesBlockWidthY[],
6113 int Read256BytesBlockWidthC[],
6114 enum odm_combine_mode ODMCombineEnabled[],
6115 int BlendingAndTiming[],
6116 int BytePerPixY[],
6117 int BytePerPixC[],
6118 double BytePerPixDETY[],
6119 double BytePerPixDETC[],
6120 int HActive[],
6121 double HRatio[],
6122 double HRatioChroma[],
6123 int DPPPerPlane[],
6124 int swath_width_luma_ub[],
6125 int swath_width_chroma_ub[],
6126 double SwathWidth[],
6127 double SwathWidthChroma[],
6128 int SwathHeightY[],
6129 int SwathHeightC[],
6130 unsigned int DETBufferSizeY[],
6131 unsigned int DETBufferSizeC[],
6132 bool ViewportSizeSupportPerPlane[],
6133 bool *ViewportSizeSupport)
6134 {
6135 int MaximumSwathHeightY[DC__NUM_DPP__MAX] = { 0 };
6136 int MaximumSwathHeightC[DC__NUM_DPP__MAX] = { 0 };
6137 int MinimumSwathHeightY = 0;
6138 int MinimumSwathHeightC = 0;
6139 long RoundedUpMaxSwathSizeBytesY = 0;
6140 long RoundedUpMaxSwathSizeBytesC = 0;
6141 long RoundedUpMinSwathSizeBytesY = 0;
6142 long RoundedUpMinSwathSizeBytesC = 0;
6143 long RoundedUpSwathSizeBytesY = 0;
6144 long RoundedUpSwathSizeBytesC = 0;
6145 double SwathWidthSingleDPP[DC__NUM_DPP__MAX] = { 0 };
6146 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX] = { 0 };
6147 int k;
6148
6149 CalculateSwathWidth(
6150 ForceSingleDPP,
6151 NumberOfActivePlanes,
6152 SourcePixelFormat,
6153 SourceScan,
6154 ViewportWidth,
6155 ViewportHeight,
6156 SurfaceWidthY,
6157 SurfaceWidthC,
6158 SurfaceHeightY,
6159 SurfaceHeightC,
6160 ODMCombineEnabled,
6161 BytePerPixY,
6162 BytePerPixC,
6163 Read256BytesBlockHeightY,
6164 Read256BytesBlockHeightC,
6165 Read256BytesBlockWidthY,
6166 Read256BytesBlockWidthC,
6167 BlendingAndTiming,
6168 HActive,
6169 HRatio,
6170 DPPPerPlane,
6171 SwathWidthSingleDPP,
6172 SwathWidthSingleDPPChroma,
6173 SwathWidth,
6174 SwathWidthChroma,
6175 MaximumSwathHeightY,
6176 MaximumSwathHeightC,
6177 swath_width_luma_ub,
6178 swath_width_chroma_ub);
6179
6180 *ViewportSizeSupport = true;
6181 for (k = 0; k < NumberOfActivePlanes; ++k) {
6182 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32
6183 || SourcePixelFormat[k] == dm_444_16
6184 || SourcePixelFormat[k] == dm_mono_16
6185 || SourcePixelFormat[k] == dm_mono_8
6186 || SourcePixelFormat[k] == dm_rgbe)) {
6187 if (SurfaceTiling[k] == dm_sw_linear
6188 || (SourcePixelFormat[k] == dm_444_64
6189 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6190 && SourceScan[k] != dm_vert)) {
6191 MinimumSwathHeightY = MaximumSwathHeightY[k];
6192 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6193 MinimumSwathHeightY = MaximumSwathHeightY[k];
6194 } else {
6195 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6196 }
6197 MinimumSwathHeightC = MaximumSwathHeightC[k];
6198 } else {
6199 if (SurfaceTiling[k] == dm_sw_linear) {
6200 MinimumSwathHeightY = MaximumSwathHeightY[k];
6201 MinimumSwathHeightC = MaximumSwathHeightC[k];
6202 } else if (SourcePixelFormat[k] == dm_rgbe_alpha
6203 && SourceScan[k] == dm_vert) {
6204 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6205 MinimumSwathHeightC = MaximumSwathHeightC[k];
6206 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6207 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6208 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6209 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6210 MinimumSwathHeightY = MaximumSwathHeightY[k];
6211 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6212 } else {
6213 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6214 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6215 }
6216 }
6217
6218 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6219 * MaximumSwathHeightY[k];
6220 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6221 * MinimumSwathHeightY;
6222 if (SourcePixelFormat[k] == dm_420_10) {
6223 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6224 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6225 }
6226 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6227 * MaximumSwathHeightC[k];
6228 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6229 * MinimumSwathHeightC;
6230 if (SourcePixelFormat[k] == dm_420_10) {
6231 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6232 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6233 }
6234
6235 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6236 <= DETBufferSizeInKByte * 1024 / 2) {
6237 SwathHeightY[k] = MaximumSwathHeightY[k];
6238 SwathHeightC[k] = MaximumSwathHeightC[k];
6239 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6240 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6241 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6242 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6243 <= DETBufferSizeInKByte * 1024 / 2) {
6244 SwathHeightY[k] = MinimumSwathHeightY;
6245 SwathHeightC[k] = MaximumSwathHeightC[k];
6246 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6247 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6248 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6249 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6250 <= DETBufferSizeInKByte * 1024 / 2) {
6251 SwathHeightY[k] = MaximumSwathHeightY[k];
6252 SwathHeightC[k] = MinimumSwathHeightC;
6253 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6254 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6255 } else {
6256 SwathHeightY[k] = MinimumSwathHeightY;
6257 SwathHeightC[k] = MinimumSwathHeightC;
6258 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6259 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6260 }
6261
6262 if (SwathHeightC[k] == 0) {
6263 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024;
6264 DETBufferSizeC[k] = 0;
6265 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6266 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 / 2;
6267 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 2;
6268 } else {
6269 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 * 2 / 3;
6270 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 3;
6271 }
6272
6273 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6274 > DETBufferSizeInKByte * 1024 / 2
6275 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6276 || (SwathHeightC[k] > 0
6277 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6278 *ViewportSizeSupport = false;
6279 ViewportSizeSupportPerPlane[k] = false;
6280 } else {
6281 ViewportSizeSupportPerPlane[k] = true;
6282 }
6283 }
6284 }
6285
CalculateSwathWidth(bool ForceSingleDPP,int NumberOfActivePlanes,enum source_format_class SourcePixelFormat[],enum scan_direction_class SourceScan[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMCombineEnabled[],int BytePerPixY[],int BytePerPixC[],int Read256BytesBlockHeightY[],int Read256BytesBlockHeightC[],int Read256BytesBlockWidthY[],int Read256BytesBlockWidthC[],int BlendingAndTiming[],unsigned int HActive[],double HRatio[],int DPPPerPlane[],double SwathWidthSingleDPPY[],double SwathWidthSingleDPPC[],double SwathWidthY[],double SwathWidthC[],int MaximumSwathHeightY[],int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])6286 static void CalculateSwathWidth(
6287 bool ForceSingleDPP,
6288 int NumberOfActivePlanes,
6289 enum source_format_class SourcePixelFormat[],
6290 enum scan_direction_class SourceScan[],
6291 unsigned int ViewportWidth[],
6292 unsigned int ViewportHeight[],
6293 unsigned int SurfaceWidthY[],
6294 unsigned int SurfaceWidthC[],
6295 unsigned int SurfaceHeightY[],
6296 unsigned int SurfaceHeightC[],
6297 enum odm_combine_mode ODMCombineEnabled[],
6298 int BytePerPixY[],
6299 int BytePerPixC[],
6300 int Read256BytesBlockHeightY[],
6301 int Read256BytesBlockHeightC[],
6302 int Read256BytesBlockWidthY[],
6303 int Read256BytesBlockWidthC[],
6304 int BlendingAndTiming[],
6305 unsigned int HActive[],
6306 double HRatio[],
6307 int DPPPerPlane[],
6308 double SwathWidthSingleDPPY[],
6309 double SwathWidthSingleDPPC[],
6310 double SwathWidthY[],
6311 double SwathWidthC[],
6312 int MaximumSwathHeightY[],
6313 int MaximumSwathHeightC[],
6314 unsigned int swath_width_luma_ub[],
6315 unsigned int swath_width_chroma_ub[])
6316 {
6317 unsigned int k, j;
6318 long surface_width_ub_l;
6319 long surface_height_ub_l;
6320 long surface_width_ub_c;
6321 long surface_height_ub_c;
6322
6323 for (k = 0; k < NumberOfActivePlanes; ++k) {
6324 enum odm_combine_mode MainPlaneODMCombine = 0;
6325
6326 if (SourceScan[k] != dm_vert) {
6327 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6328 } else {
6329 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6330 }
6331
6332 MainPlaneODMCombine = ODMCombineEnabled[k];
6333 for (j = 0; j < NumberOfActivePlanes; ++j) {
6334 if (BlendingAndTiming[k] == j) {
6335 MainPlaneODMCombine = ODMCombineEnabled[j];
6336 }
6337 }
6338
6339 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6340 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6341 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6342 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6343 } else if (DPPPerPlane[k] == 2) {
6344 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6345 } else {
6346 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6347 }
6348
6349 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6350 SwathWidthC[k] = SwathWidthY[k] / 2;
6351 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6352 } else {
6353 SwathWidthC[k] = SwathWidthY[k];
6354 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6355 }
6356
6357 if (ForceSingleDPP == true) {
6358 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6359 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6360 }
6361
6362 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6363 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6364
6365 if (SourceScan[k] != dm_vert) {
6366 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6367 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6368 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6369 Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6370 if (BytePerPixC[k] > 0) {
6371 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6372 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6373 Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6374 } else {
6375 swath_width_chroma_ub[k] = 0;
6376 }
6377 } else {
6378 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6379 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6380 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6381 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6382 if (BytePerPixC[k] > 0) {
6383 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6384 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6385 Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6386 } else {
6387 swath_width_chroma_ub[k] = 0;
6388 }
6389 }
6390 }
6391 }
6392
CalculateExtraLatency(long RoundTripPingLatencyCycles,long ReorderingBytes,double DCFCLK,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels)6393 static double CalculateExtraLatency(
6394 long RoundTripPingLatencyCycles,
6395 long ReorderingBytes,
6396 double DCFCLK,
6397 int TotalNumberOfActiveDPP,
6398 int PixelChunkSizeInKByte,
6399 int TotalNumberOfDCCActiveDPP,
6400 int MetaChunkSize,
6401 double ReturnBW,
6402 bool GPUVMEnable,
6403 bool HostVMEnable,
6404 int NumberOfActivePlanes,
6405 int NumberOfDPP[],
6406 int dpte_group_bytes[],
6407 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6408 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6409 double HostVMMinPageSize,
6410 int HostVMMaxNonCachedPageTableLevels)
6411 {
6412 double ExtraLatencyBytes = 0;
6413 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6414 ReorderingBytes,
6415 TotalNumberOfActiveDPP,
6416 PixelChunkSizeInKByte,
6417 TotalNumberOfDCCActiveDPP,
6418 MetaChunkSize,
6419 GPUVMEnable,
6420 HostVMEnable,
6421 NumberOfActivePlanes,
6422 NumberOfDPP,
6423 dpte_group_bytes,
6424 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6425 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6426 HostVMMinPageSize,
6427 HostVMMaxNonCachedPageTableLevels);
6428
6429 return (RoundTripPingLatencyCycles + 32) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6430 }
6431
CalculateExtraLatencyBytes(long ReorderingBytes,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels)6432 static double CalculateExtraLatencyBytes(
6433 long ReorderingBytes,
6434 int TotalNumberOfActiveDPP,
6435 int PixelChunkSizeInKByte,
6436 int TotalNumberOfDCCActiveDPP,
6437 int MetaChunkSize,
6438 bool GPUVMEnable,
6439 bool HostVMEnable,
6440 int NumberOfActivePlanes,
6441 int NumberOfDPP[],
6442 int dpte_group_bytes[],
6443 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6444 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6445 double HostVMMinPageSize,
6446 int HostVMMaxNonCachedPageTableLevels)
6447 {
6448 double ret = 0;
6449 double HostVMInefficiencyFactor = 0;
6450 int HostVMDynamicLevels = 0;
6451 unsigned int k;
6452
6453 if (GPUVMEnable == true && HostVMEnable == true) {
6454 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6455 if (HostVMMinPageSize < 2048) {
6456 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6457 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6458 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6459 } else {
6460 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6461 }
6462 } else {
6463 HostVMInefficiencyFactor = 1;
6464 HostVMDynamicLevels = 0;
6465 }
6466
6467 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6468
6469 if (GPUVMEnable == true) {
6470 for (k = 0; k < NumberOfActivePlanes; ++k) {
6471 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
6472 }
6473 }
6474 return ret;
6475 }
6476
6477
CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)6478 static double CalculateUrgentLatency(
6479 double UrgentLatencyPixelDataOnly,
6480 double UrgentLatencyPixelMixedWithVMData,
6481 double UrgentLatencyVMDataOnly,
6482 bool DoUrgentLatencyAdjustment,
6483 double UrgentLatencyAdjustmentFabricClockComponent,
6484 double UrgentLatencyAdjustmentFabricClockReference,
6485 double FabricClock)
6486 {
6487 double ret;
6488
6489 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
6490 if (DoUrgentLatencyAdjustment == true) {
6491 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
6492 }
6493 return ret;
6494 }
6495
UseMinimumDCFCLK(struct display_mode_lib * mode_lib,struct vba_vars_st * v,int MaxPrefetchMode,int ReorderingBytes)6496 static noinline_for_stack void UseMinimumDCFCLK(
6497 struct display_mode_lib *mode_lib,
6498 struct vba_vars_st *v,
6499 int MaxPrefetchMode,
6500 int ReorderingBytes)
6501 {
6502 double NormalEfficiency = 0;
6503 double PTEEfficiency = 0;
6504 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2] = { { 0 } };
6505 unsigned int i, j, k;
6506
6507 NormalEfficiency = (v->HostVMEnable == true ? v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6508 : v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
6509 PTEEfficiency = (v->HostVMEnable == true ? v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
6510 / v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
6511 for (i = 0; i < mode_lib->soc.num_states; ++i) {
6512 for (j = 0; j <= 1; ++j) {
6513 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX] = { 0 };
6514 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX] = { 0 };
6515 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX] = { 0 };
6516 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX] = { 0 };
6517 double MinimumTWait = 0;
6518 double NonDPTEBandwidth = 0;
6519 double DPTEBandwidth = 0;
6520 double DCFCLKRequiredForAverageBandwidth = 0;
6521 double ExtraLatencyBytes = 0;
6522 double ExtraLatencyCycles = 0;
6523 double DCFCLKRequiredForPeakBandwidth = 0;
6524 int NoOfDPPState[DC__NUM_DPP__MAX] = { 0 };
6525 double MinimumTvmPlus2Tr0 = 0;
6526
6527 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
6528 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6529 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
6530 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
6531 }
6532
6533 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
6534 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
6535 }
6536
6537 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
6538 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
6539 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
6540 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
6541 DCFCLKRequiredForAverageBandwidth = dml_max3(v->ProjectedDCFCLKDeepSleep[i][j],
6542 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth / (v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
6543 (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / v->ReturnBusWidth);
6544
6545 ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, v->TotalNumberOfActiveDPP[i][j], v->PixelChunkSizeInKByte, v->TotalNumberOfDCCActiveDPP[i][j],
6546 v->MetaChunkSize, v->GPUVMEnable, v->HostVMEnable, v->NumberOfActivePlanes, NoOfDPPState, v->dpte_group_bytes,
6547 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6548 v->HostVMMinPageSize, v->HostVMMaxNonCachedPageTableLevels);
6549 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
6550 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6551 double DCFCLKCyclesRequiredInPrefetch = { 0 };
6552 double ExpectedPrefetchBWAcceleration = { 0 };
6553 double PrefetchTime = { 0 };
6554
6555 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
6556 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
6557 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
6558 / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * v->DPTEBytesPerRow[i][j][k] / PTEEfficiency
6559 / NormalEfficiency / v->ReturnBusWidth + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
6560 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
6561 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
6562 DynamicMetadataVMExtraLatency[k] = (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
6563 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
6564 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - v->UrgLatency[i] * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels
6565 : v->GPUVMMaxPageTableLevels - 2) * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
6566
6567 if (PrefetchTime > 0) {
6568 double ExpectedVRatioPrefetch = { 0 };
6569 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
6570 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
6571 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
6572 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
6573 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
6574 + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / v->ReturnBusWidth;
6575 }
6576 } else {
6577 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
6578 }
6579 if (v->DynamicMetadataEnable[k] == true) {
6580 double TsetupPipe = { 0 };
6581 double TdmbfPipe = { 0 };
6582 double TdmsksPipe = { 0 };
6583 double TdmecPipe = { 0 };
6584 double AllowedTimeForUrgentExtraLatency = { 0 };
6585
6586 CalculateDynamicMetadataParameters(
6587 v->MaxInterDCNTileRepeaters,
6588 v->RequiredDPPCLK[i][j][k],
6589 v->RequiredDISPCLK[i][j],
6590 v->ProjectedDCFCLKDeepSleep[i][j],
6591 v->PixelClock[k],
6592 v->HTotal[k],
6593 v->VTotal[k] - v->VActive[k],
6594 v->DynamicMetadataTransmittedBytes[k],
6595 v->DynamicMetadataLinesBeforeActiveRequired[k],
6596 v->Interlace[k],
6597 v->ProgressiveToInterlaceUnitInOPP,
6598 &TsetupPipe,
6599 &TdmbfPipe,
6600 &TdmecPipe,
6601 &TdmsksPipe);
6602 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TsetupPipe
6603 - TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
6604 if (AllowedTimeForUrgentExtraLatency > 0) {
6605 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(DCFCLKRequiredForPeakBandwidthPerPlane[k],
6606 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
6607 } else {
6608 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
6609 }
6610 }
6611 }
6612 DCFCLKRequiredForPeakBandwidth = 0;
6613 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
6614 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
6615 }
6616 MinimumTvmPlus2Tr0 = v->UrgLatency[i] * (v->GPUVMEnable == true ? (v->HostVMEnable == true ?
6617 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 0);
6618 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6619 double MaximumTvmPlus2Tr0PlusTsw = { 0 };
6620 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
6621 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
6622 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
6623 } else {
6624 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 2 * ExtraLatencyCycles
6625 / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
6626 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
6627 }
6628 }
6629 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
6630 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
6631 }
6632 }
6633 }
6634
6635 #endif /* CONFIG_DRM_AMD_DC_DCN */
6636