1 /*
2 * Copyright 2022 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29
30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
31
dml32_dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)32 unsigned int dml32_dscceComputeDelay(
33 unsigned int bpc,
34 double BPP,
35 unsigned int sliceWidth,
36 unsigned int numSlices,
37 enum output_format_class pixelFormat,
38 enum output_encoder_class Output)
39 {
40 // valid bpc = source bits per component in the set of {8, 10, 12}
41 // valid bpp = increments of 1/16 of a bit
42 // min = 6/7/8 in N420/N422/444, respectively
43 // max = such that compression is 1:1
44 //valid sliceWidth = number of pixels per slice line,
45 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48
49 // fixed value
50 unsigned int rcModelSize = 8192;
51
52 // N422/N420 operate at 2 pixels per clock
53 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 Delay, pixels;
55
56 if (pixelFormat == dm_420)
57 pixelsPerClock = 2;
58 else if (pixelFormat == dm_n422)
59 pixelsPerClock = 2;
60 // #all other modes operate at 1 pixel per clock
61 else
62 pixelsPerClock = 1;
63
64 //initial transmit delay as per PPS
65 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
66
67 //compute ssm delay
68 if (bpc == 8)
69 D = 81;
70 else if (bpc == 10)
71 D = 89;
72 else
73 D = 113;
74
75 //divide by pixel per cycle to compute slice width as seen by DSC
76 w = sliceWidth / pixelsPerClock;
77
78 //422 mode has an additional cycle of delay
79 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80 s = 0;
81 else
82 s = 1;
83
84 //main calculation for the dscce
85 ix = initalXmitDelay + 45;
86 wx = (w + 2) / 3;
87 p = 3 * wx - w;
88 l0 = ix / w;
89 a = ix + p * l0;
90 ax = (a + 2) / 3 + D + 6 + 1;
91 L = (ax + wx - 1) / wx;
92 if ((ix % w) == 0 && p != 0)
93 lstall = 1;
94 else
95 lstall = 0;
96 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97
98 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99 pixels = Delay * 3 * pixelsPerClock;
100
101 #ifdef __DML_VBA_DEBUG__
102 dml_print("DML::%s: bpc: %d\n", __func__, bpc);
103 dml_print("DML::%s: BPP: %f\n", __func__, BPP);
104 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
105 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
106 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
107 dml_print("DML::%s: Output: %d\n", __func__, Output);
108 dml_print("DML::%s: pixels: %d\n", __func__, pixels);
109 #endif
110
111 return pixels;
112 }
113
dml32_dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115 {
116 unsigned int Delay = 0;
117
118 if (pixelFormat == dm_420) {
119 // sfr
120 Delay = Delay + 2;
121 // dsccif
122 Delay = Delay + 0;
123 // dscc - input deserializer
124 Delay = Delay + 3;
125 // dscc gets pixels every other cycle
126 Delay = Delay + 2;
127 // dscc - input cdc fifo
128 Delay = Delay + 12;
129 // dscc gets pixels every other cycle
130 Delay = Delay + 13;
131 // dscc - cdc uncertainty
132 Delay = Delay + 2;
133 // dscc - output cdc fifo
134 Delay = Delay + 7;
135 // dscc gets pixels every other cycle
136 Delay = Delay + 3;
137 // dscc - cdc uncertainty
138 Delay = Delay + 2;
139 // dscc - output serializer
140 Delay = Delay + 1;
141 // sft
142 Delay = Delay + 1;
143 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144 // sfr
145 Delay = Delay + 2;
146 // dsccif
147 Delay = Delay + 1;
148 // dscc - input deserializer
149 Delay = Delay + 5;
150 // dscc - input cdc fifo
151 Delay = Delay + 25;
152 // dscc - cdc uncertainty
153 Delay = Delay + 2;
154 // dscc - output cdc fifo
155 Delay = Delay + 10;
156 // dscc - cdc uncertainty
157 Delay = Delay + 2;
158 // dscc - output serializer
159 Delay = Delay + 1;
160 // sft
161 Delay = Delay + 1;
162 } else {
163 // sfr
164 Delay = Delay + 2;
165 // dsccif
166 Delay = Delay + 0;
167 // dscc - input deserializer
168 Delay = Delay + 3;
169 // dscc - input cdc fifo
170 Delay = Delay + 12;
171 // dscc - cdc uncertainty
172 Delay = Delay + 2;
173 // dscc - output cdc fifo
174 Delay = Delay + 7;
175 // dscc - output serializer
176 Delay = Delay + 1;
177 // dscc - cdc uncertainty
178 Delay = Delay + 2;
179 // sft
180 Delay = Delay + 1;
181 }
182
183 return Delay;
184 }
185
186
IsVertical(enum dm_rotation_angle Scan)187 bool IsVertical(enum dm_rotation_angle Scan)
188 {
189 bool is_vert = false;
190
191 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192 is_vert = true;
193 else
194 is_vert = false;
195 return is_vert;
196 }
197
dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(double HRatio,double HRatioChroma,double VRatio,double VRatioChroma,double MaxDCHUBToPSCLThroughput,double MaxPSCLToLBThroughput,double PixelClock,enum source_format_class SourcePixelFormat,unsigned int HTaps,unsigned int HTapsChroma,unsigned int VTaps,unsigned int VTapsChroma,double * PSCL_THROUGHPUT,double * PSCL_THROUGHPUT_CHROMA,double * DPPCLKUsingSingleDPP)198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199 double HRatio,
200 double HRatioChroma,
201 double VRatio,
202 double VRatioChroma,
203 double MaxDCHUBToPSCLThroughput,
204 double MaxPSCLToLBThroughput,
205 double PixelClock,
206 enum source_format_class SourcePixelFormat,
207 unsigned int HTaps,
208 unsigned int HTapsChroma,
209 unsigned int VTaps,
210 unsigned int VTapsChroma,
211
212 /* output */
213 double *PSCL_THROUGHPUT,
214 double *PSCL_THROUGHPUT_CHROMA,
215 double *DPPCLKUsingSingleDPP)
216 {
217 double DPPCLKUsingSingleDPPLuma;
218 double DPPCLKUsingSingleDPPChroma;
219
220 if (HRatio > 1) {
221 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
222 dml_ceil((double) HTaps / 6.0, 1.0));
223 } else {
224 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 }
226
227 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
228 *PSCL_THROUGHPUT, 1);
229
230 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232
233 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234 SourcePixelFormat != dm_rgbe_alpha)) {
235 *PSCL_THROUGHPUT_CHROMA = 0;
236 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237 } else {
238 if (HRatioChroma > 1) {
239 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
240 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
241 } else {
242 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
243 }
244 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
245 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
246 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
249 }
250 }
251
dml32_CalculateBytePerPixelAndBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC,unsigned int * MacroTileHeightY,unsigned int * MacroTileHeightC,unsigned int * MacroTileWidthY,unsigned int * MacroTileWidthC)252 void dml32_CalculateBytePerPixelAndBlockSizes(
253 enum source_format_class SourcePixelFormat,
254 enum dm_swizzle_mode SurfaceTiling,
255
256 /* Output */
257 unsigned int *BytePerPixelY,
258 unsigned int *BytePerPixelC,
259 double *BytePerPixelDETY,
260 double *BytePerPixelDETC,
261 unsigned int *BlockHeight256BytesY,
262 unsigned int *BlockHeight256BytesC,
263 unsigned int *BlockWidth256BytesY,
264 unsigned int *BlockWidth256BytesC,
265 unsigned int *MacroTileHeightY,
266 unsigned int *MacroTileHeightC,
267 unsigned int *MacroTileWidthY,
268 unsigned int *MacroTileWidthC)
269 {
270 if (SourcePixelFormat == dm_444_64) {
271 *BytePerPixelDETY = 8;
272 *BytePerPixelDETC = 0;
273 *BytePerPixelY = 8;
274 *BytePerPixelC = 0;
275 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276 *BytePerPixelDETY = 4;
277 *BytePerPixelDETC = 0;
278 *BytePerPixelY = 4;
279 *BytePerPixelC = 0;
280 } else if (SourcePixelFormat == dm_444_16) {
281 *BytePerPixelDETY = 2;
282 *BytePerPixelDETC = 0;
283 *BytePerPixelY = 2;
284 *BytePerPixelC = 0;
285 } else if (SourcePixelFormat == dm_444_8) {
286 *BytePerPixelDETY = 1;
287 *BytePerPixelDETC = 0;
288 *BytePerPixelY = 1;
289 *BytePerPixelC = 0;
290 } else if (SourcePixelFormat == dm_rgbe_alpha) {
291 *BytePerPixelDETY = 4;
292 *BytePerPixelDETC = 1;
293 *BytePerPixelY = 4;
294 *BytePerPixelC = 1;
295 } else if (SourcePixelFormat == dm_420_8) {
296 *BytePerPixelDETY = 1;
297 *BytePerPixelDETC = 2;
298 *BytePerPixelY = 1;
299 *BytePerPixelC = 2;
300 } else if (SourcePixelFormat == dm_420_12) {
301 *BytePerPixelDETY = 2;
302 *BytePerPixelDETC = 4;
303 *BytePerPixelY = 2;
304 *BytePerPixelC = 4;
305 } else {
306 *BytePerPixelDETY = 4.0 / 3;
307 *BytePerPixelDETC = 8.0 / 3;
308 *BytePerPixelY = 2;
309 *BytePerPixelC = 4;
310 }
311 #ifdef __DML_VBA_DEBUG__
312 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
313 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
314 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
315 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY);
316 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC);
317 #endif
318 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319 || SourcePixelFormat == dm_444_16
320 || SourcePixelFormat == dm_444_8
321 || SourcePixelFormat == dm_mono_16
322 || SourcePixelFormat == dm_mono_8
323 || SourcePixelFormat == dm_rgbe)) {
324 if (SurfaceTiling == dm_sw_linear)
325 *BlockHeight256BytesY = 1;
326 else if (SourcePixelFormat == dm_444_64)
327 *BlockHeight256BytesY = 4;
328 else if (SourcePixelFormat == dm_444_8)
329 *BlockHeight256BytesY = 16;
330 else
331 *BlockHeight256BytesY = 8;
332
333 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334 *BlockHeight256BytesC = 0;
335 *BlockWidth256BytesC = 0;
336 } else {
337 if (SurfaceTiling == dm_sw_linear) {
338 *BlockHeight256BytesY = 1;
339 *BlockHeight256BytesC = 1;
340 } else if (SourcePixelFormat == dm_rgbe_alpha) {
341 *BlockHeight256BytesY = 8;
342 *BlockHeight256BytesC = 16;
343 } else if (SourcePixelFormat == dm_420_8) {
344 *BlockHeight256BytesY = 16;
345 *BlockHeight256BytesC = 8;
346 } else {
347 *BlockHeight256BytesY = 8;
348 *BlockHeight256BytesC = 8;
349 }
350 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352 }
353 #ifdef __DML_VBA_DEBUG__
354 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY);
355 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
356 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC);
357 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 #endif
359
360 if (SurfaceTiling == dm_sw_linear) {
361 *MacroTileHeightY = *BlockHeight256BytesY;
362 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363 *MacroTileHeightC = *BlockHeight256BytesC;
364 if (*MacroTileHeightC == 0)
365 *MacroTileWidthC = 0;
366 else
367 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
371 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
373 if (*MacroTileHeightC == 0)
374 *MacroTileWidthC = 0;
375 else
376 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377 } else {
378 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
379 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
381 if (*MacroTileHeightC == 0)
382 *MacroTileWidthC = 0;
383 else
384 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 }
386
387 #ifdef __DML_VBA_DEBUG__
388 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY);
389 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
390 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC);
391 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
392 #endif
393 } // CalculateBytePerPixelAndBlockSizes
394
dml32_CalculateSwathAndDETConfiguration(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int ConfigReturnBufferSizeInKByte,unsigned int MaxTotalDETInKByte,unsigned int MinCompressedBufferSizeInKByte,double ForceSingleDPP,unsigned int NumberOfActiveSurfaces,unsigned int nomDETInKByte,enum unbounded_requesting_policy UseUnboundedRequestingFinal,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,unsigned int PixelChunkSizeKBytes,unsigned int ROBSizeKBytes,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum output_encoder_class Output[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],enum odm_combine_mode ODMMode[],unsigned int BlendingAndTiming[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],unsigned int HActive[],double HRatio[],double HRatioChroma[],unsigned int DPPPerSurface[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int DETBufferSizeInKByte[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool * UnboundedRequestEnabled,unsigned int * CompressedBufferSizeInkByte,unsigned int * CompBufReservedSpaceKBytes,bool * CompBufReservedSpaceNeedAdjustment,bool ViewportSizeSupportPerSurface[],bool * ViewportSizeSupport)395 void dml32_CalculateSwathAndDETConfiguration(
396 unsigned int DETSizeOverride[],
397 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398 unsigned int ConfigReturnBufferSizeInKByte,
399 unsigned int MaxTotalDETInKByte,
400 unsigned int MinCompressedBufferSizeInKByte,
401 double ForceSingleDPP,
402 unsigned int NumberOfActiveSurfaces,
403 unsigned int nomDETInKByte,
404 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406 unsigned int PixelChunkSizeKBytes,
407 unsigned int ROBSizeKBytes,
408 unsigned int CompressedBufferSegmentSizeInkByteFinal,
409 enum output_encoder_class Output[],
410 double ReadBandwidthLuma[],
411 double ReadBandwidthChroma[],
412 double MaximumSwathWidthLuma[],
413 double MaximumSwathWidthChroma[],
414 enum dm_rotation_angle SourceRotation[],
415 bool ViewportStationary[],
416 enum source_format_class SourcePixelFormat[],
417 enum dm_swizzle_mode SurfaceTiling[],
418 unsigned int ViewportWidth[],
419 unsigned int ViewportHeight[],
420 unsigned int ViewportXStart[],
421 unsigned int ViewportYStart[],
422 unsigned int ViewportXStartC[],
423 unsigned int ViewportYStartC[],
424 unsigned int SurfaceWidthY[],
425 unsigned int SurfaceWidthC[],
426 unsigned int SurfaceHeightY[],
427 unsigned int SurfaceHeightC[],
428 unsigned int Read256BytesBlockHeightY[],
429 unsigned int Read256BytesBlockHeightC[],
430 unsigned int Read256BytesBlockWidthY[],
431 unsigned int Read256BytesBlockWidthC[],
432 enum odm_combine_mode ODMMode[],
433 unsigned int BlendingAndTiming[],
434 unsigned int BytePerPixY[],
435 unsigned int BytePerPixC[],
436 double BytePerPixDETY[],
437 double BytePerPixDETC[],
438 unsigned int HActive[],
439 double HRatio[],
440 double HRatioChroma[],
441 unsigned int DPPPerSurface[],
442
443 /* Output */
444 unsigned int swath_width_luma_ub[],
445 unsigned int swath_width_chroma_ub[],
446 double SwathWidth[],
447 double SwathWidthChroma[],
448 unsigned int SwathHeightY[],
449 unsigned int SwathHeightC[],
450 unsigned int DETBufferSizeInKByte[],
451 unsigned int DETBufferSizeY[],
452 unsigned int DETBufferSizeC[],
453 bool *UnboundedRequestEnabled,
454 unsigned int *CompressedBufferSizeInkByte,
455 unsigned int *CompBufReservedSpaceKBytes,
456 bool *CompBufReservedSpaceNeedAdjustment,
457 bool ViewportSizeSupportPerSurface[],
458 bool *ViewportSizeSupport)
459 {
460 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
461 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
462 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
463 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
464 unsigned int RoundedUpSwathSizeBytesY;
465 unsigned int RoundedUpSwathSizeBytesC;
466 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
467 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
468 unsigned int k;
469 unsigned int TotalActiveDPP = 0;
470 bool NoChromaSurfaces = true;
471 unsigned int DETBufferSizeInKByteForSwathCalculation;
472
473 #ifdef __DML_VBA_DEBUG__
474 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
475 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
476 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
477 #endif
478 dml32_CalculateSwathWidth(ForceSingleDPP,
479 NumberOfActiveSurfaces,
480 SourcePixelFormat,
481 SourceRotation,
482 ViewportStationary,
483 ViewportWidth,
484 ViewportHeight,
485 ViewportXStart,
486 ViewportYStart,
487 ViewportXStartC,
488 ViewportYStartC,
489 SurfaceWidthY,
490 SurfaceWidthC,
491 SurfaceHeightY,
492 SurfaceHeightC,
493 ODMMode,
494 BytePerPixY,
495 BytePerPixC,
496 Read256BytesBlockHeightY,
497 Read256BytesBlockHeightC,
498 Read256BytesBlockWidthY,
499 Read256BytesBlockWidthC,
500 BlendingAndTiming,
501 HActive,
502 HRatio,
503 DPPPerSurface,
504
505 /* Output */
506 SwathWidthdoubleDPP,
507 SwathWidthdoubleDPPChroma,
508 SwathWidth,
509 SwathWidthChroma,
510 MaximumSwathHeightY,
511 MaximumSwathHeightC,
512 swath_width_luma_ub,
513 swath_width_chroma_ub);
514
515 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518 #ifdef __DML_VBA_DEBUG__
519 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
520 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
521 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
522 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
523 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
524 RoundedUpMaxSwathSizeBytesY[k]);
525 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
526 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
527 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
528 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
529 RoundedUpMaxSwathSizeBytesC[k]);
530 #endif
531
532 if (SourcePixelFormat[k] == dm_420_10) {
533 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
534 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
535 }
536 }
537
538 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542 NoChromaSurfaces = false;
543 }
544 }
545
546 // By default, just set the reserved space to 2 pixel chunks size
547 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548
549 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553
554 if (*CompBufReservedSpaceNeedAdjustment == 1) {
555 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556 }
557
558 #ifdef __DML_VBA_DEBUG__
559 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes);
560 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment);
561 #endif
562
563 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564
565 dml32_CalculateDETBufferSize(DETSizeOverride,
566 UseMALLForPStateChange,
567 ForceSingleDPP,
568 NumberOfActiveSurfaces,
569 *UnboundedRequestEnabled,
570 nomDETInKByte,
571 MaxTotalDETInKByte,
572 ConfigReturnBufferSizeInKByte,
573 MinCompressedBufferSizeInKByte,
574 CompressedBufferSegmentSizeInkByteFinal,
575 SourcePixelFormat,
576 ReadBandwidthLuma,
577 ReadBandwidthChroma,
578 RoundedUpMaxSwathSizeBytesY,
579 RoundedUpMaxSwathSizeBytesC,
580 DPPPerSurface,
581
582 /* Output */
583 DETBufferSizeInKByte, // per hubp pipe
584 CompressedBufferSizeInkByte);
585
586 #ifdef __DML_VBA_DEBUG__
587 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
588 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
589 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
590 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
591 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
592 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593 #endif
594
595 *ViewportSizeSupport = true;
596 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597
598 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600 #ifdef __DML_VBA_DEBUG__
601 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
602 DETBufferSizeInKByteForSwathCalculation);
603 #endif
604
605 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607 SwathHeightY[k] = MaximumSwathHeightY[k];
608 SwathHeightC[k] = MaximumSwathHeightC[k];
609 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615 SwathHeightC[k] = MaximumSwathHeightC[k];
616 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621 SwathHeightY[k] = MaximumSwathHeightY[k];
622 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625 } else {
626 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630 }
631
632 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633 DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636 *ViewportSizeSupport = false;
637 ViewportSizeSupportPerSurface[k] = false;
638 } else {
639 ViewportSizeSupportPerSurface[k] = true;
640 }
641
642 if (SwathHeightC[k] == 0) {
643 #ifdef __DML_VBA_DEBUG__
644 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
645 #endif
646 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647 DETBufferSizeC[k] = 0;
648 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649 #ifdef __DML_VBA_DEBUG__
650 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
651 #endif
652 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654 } else {
655 #ifdef __DML_VBA_DEBUG__
656 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
657 #endif
658 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
659 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660 }
661
662 #ifdef __DML_VBA_DEBUG__
663 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
664 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
665 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
666 k, RoundedUpMaxSwathSizeBytesY[k]);
667 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
668 k, RoundedUpMaxSwathSizeBytesC[k]);
669 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
670 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
671 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
672 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
673 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
674 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
675 ViewportSizeSupportPerSurface[k]);
676 #endif
677
678 }
679 } // CalculateSwathAndDETConfiguration
680
dml32_CalculateSwathWidth(bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,enum source_format_class SourcePixelFormat[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMMode[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int BlendingAndTiming[],unsigned int HActive[],double HRatio[],unsigned int DPPPerSurface[],double SwathWidthdoubleDPPY[],double SwathWidthdoubleDPPC[],double SwathWidthY[],double SwathWidthC[],unsigned int MaximumSwathHeightY[],unsigned int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])681 void dml32_CalculateSwathWidth(
682 bool ForceSingleDPP,
683 unsigned int NumberOfActiveSurfaces,
684 enum source_format_class SourcePixelFormat[],
685 enum dm_rotation_angle SourceRotation[],
686 bool ViewportStationary[],
687 unsigned int ViewportWidth[],
688 unsigned int ViewportHeight[],
689 unsigned int ViewportXStart[],
690 unsigned int ViewportYStart[],
691 unsigned int ViewportXStartC[],
692 unsigned int ViewportYStartC[],
693 unsigned int SurfaceWidthY[],
694 unsigned int SurfaceWidthC[],
695 unsigned int SurfaceHeightY[],
696 unsigned int SurfaceHeightC[],
697 enum odm_combine_mode ODMMode[],
698 unsigned int BytePerPixY[],
699 unsigned int BytePerPixC[],
700 unsigned int Read256BytesBlockHeightY[],
701 unsigned int Read256BytesBlockHeightC[],
702 unsigned int Read256BytesBlockWidthY[],
703 unsigned int Read256BytesBlockWidthC[],
704 unsigned int BlendingAndTiming[],
705 unsigned int HActive[],
706 double HRatio[],
707 unsigned int DPPPerSurface[],
708
709 /* Output */
710 double SwathWidthdoubleDPPY[],
711 double SwathWidthdoubleDPPC[],
712 double SwathWidthY[], // per-pipe
713 double SwathWidthC[], // per-pipe
714 unsigned int MaximumSwathHeightY[],
715 unsigned int MaximumSwathHeightC[],
716 unsigned int swath_width_luma_ub[], // per-pipe
717 unsigned int swath_width_chroma_ub[]) // per-pipe
718 {
719 unsigned int k, j;
720 enum odm_combine_mode MainSurfaceODMMode;
721
722 unsigned int surface_width_ub_l;
723 unsigned int surface_height_ub_l;
724 unsigned int surface_width_ub_c = 0;
725 unsigned int surface_height_ub_c = 0;
726
727 #ifdef __DML_VBA_DEBUG__
728 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
729 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730 #endif
731
732 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733 if (!IsVertical(SourceRotation[k]))
734 SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735 else
736 SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737
738 #ifdef __DML_VBA_DEBUG__
739 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
740 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741 #endif
742
743 MainSurfaceODMMode = ODMMode[k];
744 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745 if (BlendingAndTiming[k] == j)
746 MainSurfaceODMMode = ODMMode[j];
747 }
748
749 if (ForceSingleDPP) {
750 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751 } else {
752 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
754 dml_round(HActive[k] / 4.0 * HRatio[k]));
755 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
757 dml_round(HActive[k] / 2.0 * HRatio[k]));
758 } else if (DPPPerSurface[k] == 2) {
759 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760 } else {
761 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762 }
763 }
764
765 #ifdef __DML_VBA_DEBUG__
766 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
767 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
768 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
769 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
770 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771 #endif
772
773 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774 SourcePixelFormat[k] == dm_420_12) {
775 SwathWidthC[k] = SwathWidthY[k] / 2;
776 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777 } else {
778 SwathWidthC[k] = SwathWidthY[k];
779 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780 }
781
782 if (ForceSingleDPP == true) {
783 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784 SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785 }
786
787 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
788 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
789
790 if (!IsVertical(SourceRotation[k])) {
791 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
795 dml_floor(ViewportXStart[k] +
796 SwathWidthY[k] +
797 Read256BytesBlockWidthY[k] - 1,
798 Read256BytesBlockWidthY[k]) -
799 dml_floor(ViewportXStart[k],
800 Read256BytesBlockWidthY[k]));
801 } else {
802 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
803 dml_ceil(SwathWidthY[k] - 1,
804 Read256BytesBlockWidthY[k]) +
805 Read256BytesBlockWidthY[k]);
806 }
807 if (BytePerPixC[k] > 0) {
808 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
809 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
811 dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
812 Read256BytesBlockWidthC[k] - 1,
813 Read256BytesBlockWidthC[k]) -
814 dml_floor(ViewportXStartC[k],
815 Read256BytesBlockWidthC[k]));
816 } else {
817 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
818 dml_ceil(SwathWidthC[k] - 1,
819 Read256BytesBlockWidthC[k]) +
820 Read256BytesBlockWidthC[k]);
821 }
822 } else {
823 swath_width_chroma_ub[k] = 0;
824 }
825 } else {
826 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828
829 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
831 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832 Read256BytesBlockHeightY[k]) -
833 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
834 } else {
835 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
836 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837 }
838 if (BytePerPixC[k] > 0) {
839 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
840 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
842 dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
843 Read256BytesBlockHeightC[k] - 1,
844 Read256BytesBlockHeightC[k]) -
845 dml_floor(ViewportYStartC[k],
846 Read256BytesBlockHeightC[k]));
847 } else {
848 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
849 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
850 Read256BytesBlockHeightC[k]);
851 }
852 } else {
853 swath_width_chroma_ub[k] = 0;
854 }
855 }
856
857 #ifdef __DML_VBA_DEBUG__
858 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
859 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
860 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
861 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
862 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
863 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
864 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
865 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
866 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
867 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
868 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
869 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
870 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
871 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
872 #endif
873
874 }
875 } // CalculateSwathWidth
876
dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,unsigned int TotalNumberOfActiveDPP,bool NoChroma,enum output_encoder_class Output,enum dm_swizzle_mode SurfaceTiling,bool CompBufReservedSpaceNeedAdjustment,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878 unsigned int TotalNumberOfActiveDPP,
879 bool NoChroma,
880 enum output_encoder_class Output,
881 enum dm_swizzle_mode SurfaceTiling,
882 bool CompBufReservedSpaceNeedAdjustment,
883 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884 {
885 bool ret_val = false;
886
887 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888 TotalNumberOfActiveDPP == 1 && NoChroma);
889 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890 ret_val = false;
891
892 if (SurfaceTiling == dm_sw_linear)
893 ret_val = false;
894
895 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896 ret_val = false;
897
898 #ifdef __DML_VBA_DEBUG__
899 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment);
900 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
901 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val);
902 #endif
903
904 return (ret_val);
905 }
906
dml32_CalculateDETBufferSize(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,bool UnboundedRequestEnabled,unsigned int nomDETInKByte,unsigned int MaxTotalDETInKByte,unsigned int ConfigReturnBufferSizeInKByte,unsigned int MinCompressedBufferSizeInKByte,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum source_format_class SourcePixelFormat[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int RoundedUpMaxSwathSizeBytesY[],unsigned int RoundedUpMaxSwathSizeBytesC[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeInKByte[],unsigned int * CompressedBufferSizeInkByte)907 void dml32_CalculateDETBufferSize(
908 unsigned int DETSizeOverride[],
909 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910 bool ForceSingleDPP,
911 unsigned int NumberOfActiveSurfaces,
912 bool UnboundedRequestEnabled,
913 unsigned int nomDETInKByte,
914 unsigned int MaxTotalDETInKByte,
915 unsigned int ConfigReturnBufferSizeInKByte,
916 unsigned int MinCompressedBufferSizeInKByte,
917 unsigned int CompressedBufferSegmentSizeInkByteFinal,
918 enum source_format_class SourcePixelFormat[],
919 double ReadBandwidthLuma[],
920 double ReadBandwidthChroma[],
921 unsigned int RoundedUpMaxSwathSizeBytesY[],
922 unsigned int RoundedUpMaxSwathSizeBytesC[],
923 unsigned int DPPPerSurface[],
924 /* Output */
925 unsigned int DETBufferSizeInKByte[],
926 unsigned int *CompressedBufferSizeInkByte)
927 {
928 unsigned int DETBufferSizePoolInKByte;
929 unsigned int NextDETBufferPieceInKByte;
930 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
931 bool NextPotentialSurfaceToAssignDETPieceFound;
932 unsigned int NextSurfaceToAssignDETPiece;
933 double TotalBandwidth;
934 double BandwidthOfSurfacesNotAssignedDETPiece;
935 unsigned int max_minDET;
936 unsigned int minDET;
937 unsigned int minDET_pipe;
938 unsigned int j, k;
939
940 #ifdef __DML_VBA_DEBUG__
941 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
942 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
943 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
944 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
945 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
946 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
947 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
948 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
949 CompressedBufferSegmentSizeInkByteFinal);
950 #endif
951
952 // Note: Will use default det size if that fits 2 swaths
953 if (UnboundedRequestEnabled) {
954 if (DETSizeOverride[0] > 0) {
955 DETBufferSizeInKByte[0] = DETSizeOverride[0];
956 } else {
957 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
958 ((double) RoundedUpMaxSwathSizeBytesY[0] +
959 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
960 }
961 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962 } else {
963 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965 DETBufferSizeInKByte[k] = nomDETInKByte;
966 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967 SourcePixelFormat[k] == dm_420_12) {
968 max_minDET = nomDETInKByte - 64;
969 } else {
970 max_minDET = nomDETInKByte;
971 }
972 minDET = 128;
973 minDET_pipe = 0;
974
975 // add DET resource until can hold 2 full swaths
976 while (minDET <= max_minDET && minDET_pipe == 0) {
977 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979 minDET_pipe = minDET;
980 minDET = minDET + 64;
981 }
982
983 #ifdef __DML_VBA_DEBUG__
984 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET);
985 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET);
986 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe);
987 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
988 RoundedUpMaxSwathSizeBytesY[k]);
989 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
990 RoundedUpMaxSwathSizeBytesC[k]);
991 #endif
992
993 if (minDET_pipe == 0) {
994 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
995 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
996 #ifdef __DML_VBA_DEBUG__
997 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
998 __func__, k, minDET_pipe);
999 #endif
1000 }
1001
1002 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003 DETBufferSizeInKByte[k] = 0;
1004 } else if (DETSizeOverride[k] > 0) {
1005 DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009 DETBufferSizeInKByte[k] = minDET_pipe;
1010 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012 }
1013
1014 #ifdef __DML_VBA_DEBUG__
1015 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1016 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1017 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1018 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1019 #endif
1020 }
1021
1022 TotalBandwidth = 0;
1023 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026 }
1027 #ifdef __DML_VBA_DEBUG__
1028 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1029 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1031 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1032 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1033 #endif
1034 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036
1037 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038 DETPieceAssignedToThisSurfaceAlready[k] = true;
1039 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042 DETPieceAssignedToThisSurfaceAlready[k] = true;
1043 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045 } else {
1046 DETPieceAssignedToThisSurfaceAlready[k] = false;
1047 }
1048 #ifdef __DML_VBA_DEBUG__
1049 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1050 DETPieceAssignedToThisSurfaceAlready[k]);
1051 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1052 BandwidthOfSurfacesNotAssignedDETPiece);
1053 #endif
1054 }
1055
1056 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057 NextPotentialSurfaceToAssignDETPieceFound = false;
1058 NextSurfaceToAssignDETPiece = 0;
1059
1060 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061 #ifdef __DML_VBA_DEBUG__
1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1063 ReadBandwidthLuma[k]);
1064 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1065 ReadBandwidthChroma[k]);
1066 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1067 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1068 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1069 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1070 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1071 NextSurfaceToAssignDETPiece);
1072 #endif
1073 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074 (!NextPotentialSurfaceToAssignDETPieceFound ||
1075 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078 NextSurfaceToAssignDETPiece = k;
1079 NextPotentialSurfaceToAssignDETPieceFound = true;
1080 }
1081 #ifdef __DML_VBA_DEBUG__
1082 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1083 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1084 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1085 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1086 #endif
1087 }
1088
1089 if (NextPotentialSurfaceToAssignDETPieceFound) {
1090 // Note: To show the banker's rounding behavior in VBA and also the fact
1091 // that the DET buffer size varies due to precision issue
1092 //
1093 //double tmp1 = ((double) DETBufferSizePoolInKByte *
1094 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096 // BandwidthOfSurfacesNotAssignedDETPiece /
1097 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte *
1099 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101 //BandwidthOfSurfacesNotAssignedDETPiece /
1102 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103 //
1104 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106
1107 NextDETBufferPieceInKByte = dml_min(
1108 dml_round((double) DETBufferSizePoolInKByte *
1109 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111 BandwidthOfSurfacesNotAssignedDETPiece /
1112 ((ForceSingleDPP ? 1 :
1113 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114 (ForceSingleDPP ? 1 :
1115 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116 dml_floor((double) DETBufferSizePoolInKByte,
1117 (ForceSingleDPP ? 1 :
1118 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119
1120 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1121 // We should limit the per-pipe DET size to the nominal / max per pipe.
1122 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127 } else {
1128 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129 // already has the max per-pipe value
1130 NextDETBufferPieceInKByte = 0;
1131 }
1132 }
1133
1134 #ifdef __DML_VBA_DEBUG__
1135 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1136 DETBufferSizePoolInKByte);
1137 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1138 NextSurfaceToAssignDETPiece);
1139 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1140 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1141 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1142 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1143 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1144 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1145 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1146 NextDETBufferPieceInKByte);
1147 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1148 __func__, j, NextSurfaceToAssignDETPiece,
1149 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1150 #endif
1151
1152 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154 + NextDETBufferPieceInKByte
1155 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156 #ifdef __DML_VBA_DEBUG__
1157 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1158 #endif
1159
1160 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1162 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165 }
1166 }
1167 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168 }
1169 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170
1171 #ifdef __DML_VBA_DEBUG__
1172 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1173 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1174 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1176 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1177 }
1178 #endif
1179 } // CalculateDETBufferSize
1180
dml32_CalculateODMMode(unsigned int MaximumPixelsPerLinePerDSCUnit,unsigned int HActive,enum output_format_class OutFormat,enum output_encoder_class Output,enum odm_combine_policy ODMUse,double StateDispclk,double MaxDispclk,bool DSCEnable,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,unsigned int NumberOfDSCSlices,bool * TotalAvailablePipesSupport,unsigned int * NumberOfDPP,enum odm_combine_mode * ODMMode,double * RequiredDISPCLKPerSurface)1181 void dml32_CalculateODMMode(
1182 unsigned int MaximumPixelsPerLinePerDSCUnit,
1183 unsigned int HActive,
1184 enum output_format_class OutFormat,
1185 enum output_encoder_class Output,
1186 enum odm_combine_policy ODMUse,
1187 double StateDispclk,
1188 double MaxDispclk,
1189 bool DSCEnable,
1190 unsigned int TotalNumberOfActiveDPP,
1191 unsigned int MaxNumDPP,
1192 double PixelClock,
1193 double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 double DISPCLKRampingMargin,
1195 double DISPCLKDPPCLKVCOSpeed,
1196 unsigned int NumberOfDSCSlices,
1197
1198 /* Output */
1199 bool *TotalAvailablePipesSupport,
1200 unsigned int *NumberOfDPP,
1201 enum odm_combine_mode *ODMMode,
1202 double *RequiredDISPCLKPerSurface)
1203 {
1204
1205 double SurfaceRequiredDISPCLKWithoutODMCombine;
1206 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208
1209 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211 MaxDispclk);
1212 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214 MaxDispclk);
1215 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217 MaxDispclk);
1218 *TotalAvailablePipesSupport = true;
1219 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220
1221 if (ODMUse == dm_odm_combine_policy_none)
1222 *ODMMode = dm_odm_combine_mode_disabled;
1223
1224 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225 *NumberOfDPP = 0;
1226
1227 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228 // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229
1230 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233 || NumberOfDSCSlices > 8)))) {
1234 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235 *ODMMode = dm_odm_combine_mode_4to1;
1236 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237 *NumberOfDPP = 4;
1238 } else {
1239 *TotalAvailablePipesSupport = false;
1240 }
1241 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245 || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247 *ODMMode = dm_odm_combine_mode_2to1;
1248 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249 *NumberOfDPP = 2;
1250 } else {
1251 *TotalAvailablePipesSupport = false;
1252 }
1253 } else {
1254 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255 *NumberOfDPP = 1;
1256 else
1257 *TotalAvailablePipesSupport = false;
1258 }
1259 if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
1260 ODMUse != dm_odm_combine_policy_4to1) {
1261 if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
1262 *ODMMode = dm_odm_combine_mode_disabled;
1263 *NumberOfDPP = 0;
1264 *TotalAvailablePipesSupport = false;
1265 } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
1266 *ODMMode == dm_odm_combine_mode_4to1) {
1267 *ODMMode = dm_odm_combine_mode_4to1;
1268 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269 *NumberOfDPP = 4;
1270 } else {
1271 *ODMMode = dm_odm_combine_mode_2to1;
1272 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273 *NumberOfDPP = 2;
1274 }
1275 }
1276 if (Output == dm_hdmi && OutFormat == dm_420 &&
1277 HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
1278 *ODMMode = dm_odm_combine_mode_disabled;
1279 *NumberOfDPP = 0;
1280 *TotalAvailablePipesSupport = false;
1281 }
1282 }
1283
dml32_CalculateRequiredDispclk(enum odm_combine_mode ODMMode,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,double MaxDispclk)1284 double dml32_CalculateRequiredDispclk(
1285 enum odm_combine_mode ODMMode,
1286 double PixelClock,
1287 double DISPCLKDPPCLKDSCCLKDownSpreading,
1288 double DISPCLKRampingMargin,
1289 double DISPCLKDPPCLKVCOSpeed,
1290 double MaxDispclk)
1291 {
1292 double RequiredDispclk = 0.;
1293 double PixelClockAfterODM;
1294 double DISPCLKWithRampingRoundedToDFSGranularity;
1295 double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296 double MaxDispclkRoundedDownToDFSGranularity;
1297
1298 if (ODMMode == dm_odm_combine_mode_4to1)
1299 PixelClockAfterODM = PixelClock / 4;
1300 else if (ODMMode == dm_odm_combine_mode_2to1)
1301 PixelClockAfterODM = PixelClock / 2;
1302 else
1303 PixelClockAfterODM = PixelClock;
1304
1305
1306 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1309
1310 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1312
1313 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1314
1315 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319 else
1320 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321
1322 return RequiredDispclk;
1323 }
1324
dml32_RoundToDFSGranularity(double Clock,bool round_up,double VCOSpeed)1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1326 {
1327 if (Clock <= 0.0)
1328 return 0.0;
1329
1330 if (round_up)
1331 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1332 else
1333 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1334 }
1335
dml32_CalculateOutputLink(double PHYCLKPerState,double PHYCLKD18PerState,double PHYCLKD32PerState,double Downspreading,bool IsMainSurfaceUsingTheIndicatedTiming,enum output_encoder_class Output,enum output_format_class OutputFormat,unsigned int HTotal,unsigned int HActive,double PixelClockBackEnd,double ForcedOutputLinkBPP,unsigned int DSCInputBitPerComponent,unsigned int NumberOfDSCSlices,double AudioSampleRate,unsigned int AudioSampleLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,bool DSCEnable,unsigned int OutputLinkDPLanes,enum dm_output_link_dp_rate OutputLinkDPRate,bool * RequiresDSC,double * RequiresFEC,double * OutBpp,enum dm_output_type * OutputType,enum dm_output_rate * OutputRate,unsigned int * RequiredSlots)1336 void dml32_CalculateOutputLink(
1337 double PHYCLKPerState,
1338 double PHYCLKD18PerState,
1339 double PHYCLKD32PerState,
1340 double Downspreading,
1341 bool IsMainSurfaceUsingTheIndicatedTiming,
1342 enum output_encoder_class Output,
1343 enum output_format_class OutputFormat,
1344 unsigned int HTotal,
1345 unsigned int HActive,
1346 double PixelClockBackEnd,
1347 double ForcedOutputLinkBPP,
1348 unsigned int DSCInputBitPerComponent,
1349 unsigned int NumberOfDSCSlices,
1350 double AudioSampleRate,
1351 unsigned int AudioSampleLayout,
1352 enum odm_combine_mode ODMModeNoDSC,
1353 enum odm_combine_mode ODMModeDSC,
1354 bool DSCEnable,
1355 unsigned int OutputLinkDPLanes,
1356 enum dm_output_link_dp_rate OutputLinkDPRate,
1357
1358 /* Output */
1359 bool *RequiresDSC,
1360 double *RequiresFEC,
1361 double *OutBpp,
1362 enum dm_output_type *OutputType,
1363 enum dm_output_rate *OutputRate,
1364 unsigned int *RequiredSlots)
1365 {
1366 bool LinkDSCEnable;
1367 unsigned int dummy;
1368 *RequiresDSC = false;
1369 *RequiresFEC = false;
1370 *OutBpp = 0;
1371 *OutputType = dm_output_type_unknown;
1372 *OutputRate = dm_output_rate_unknown;
1373
1374 if (IsMainSurfaceUsingTheIndicatedTiming) {
1375 if (Output == dm_hdmi) {
1376 *RequiresDSC = false;
1377 *RequiresFEC = false;
1378 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1379 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1380 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1381 ODMModeNoDSC, ODMModeDSC, &dummy);
1382 //OutputTypeAndRate = "HDMI";
1383 *OutputType = dm_output_type_hdmi;
1384
1385 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386 if (DSCEnable == true) {
1387 *RequiresDSC = true;
1388 LinkDSCEnable = true;
1389 if (Output == dm_dp || Output == dm_dp2p0)
1390 *RequiresFEC = true;
1391 else
1392 *RequiresFEC = false;
1393 } else {
1394 *RequiresDSC = false;
1395 LinkDSCEnable = false;
1396 if (Output == dm_dp2p0)
1397 *RequiresFEC = true;
1398 else
1399 *RequiresFEC = false;
1400 }
1401 if (Output == dm_dp2p0) {
1402 *OutBpp = 0;
1403 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404 PHYCLKD32PerState >= 10000 / 32) {
1405 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1406 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1407 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1408 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1409 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1411 ForcedOutputLinkBPP == 0) {
1412 *RequiresDSC = true;
1413 LinkDSCEnable = true;
1414 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1415 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417 OutputFormat, DSCInputBitPerComponent,
1418 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420 }
1421 //OutputTypeAndRate = Output & " UHBR10";
1422 *OutputType = dm_output_type_dp2p0;
1423 *OutputRate = dm_output_rate_dp_rate_uhbr10;
1424 }
1425 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1427 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1428 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432
1433 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1434 ForcedOutputLinkBPP == 0) {
1435 *RequiresDSC = true;
1436 LinkDSCEnable = true;
1437 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1438 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1439 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1440 OutputFormat, DSCInputBitPerComponent,
1441 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1442 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443 }
1444 //OutputTypeAndRate = Output & " UHBR13p5";
1445 *OutputType = dm_output_type_dp2p0;
1446 *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447 }
1448 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1451 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1456 *RequiresDSC = true;
1457 LinkDSCEnable = true;
1458 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1459 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461 OutputFormat, DSCInputBitPerComponent,
1462 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464 }
1465 //OutputTypeAndRate = Output & " UHBR20";
1466 *OutputType = dm_output_type_dp2p0;
1467 *OutputRate = dm_output_rate_dp_rate_uhbr20;
1468 }
1469 } else {
1470 *OutBpp = 0;
1471 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472 PHYCLKPerState >= 270) {
1473 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1474 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1475 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1476 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1477 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1479 ForcedOutputLinkBPP == 0) {
1480 *RequiresDSC = true;
1481 LinkDSCEnable = true;
1482 if (Output == dm_dp)
1483 *RequiresFEC = true;
1484 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1485 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487 OutputFormat, DSCInputBitPerComponent,
1488 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490 }
1491 //OutputTypeAndRate = Output & " HBR";
1492 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493 *OutputRate = dm_output_rate_dp_rate_hbr;
1494 }
1495 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496 *OutBpp == 0 && PHYCLKPerState >= 540) {
1497 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1498 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1499 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1500 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1501 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502
1503 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1504 ForcedOutputLinkBPP == 0) {
1505 *RequiresDSC = true;
1506 LinkDSCEnable = true;
1507 if (Output == dm_dp)
1508 *RequiresFEC = true;
1509
1510 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1511 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1512 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1513 OutputFormat, DSCInputBitPerComponent,
1514 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1515 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516 }
1517 //OutputTypeAndRate = Output & " HBR2";
1518 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519 *OutputRate = dm_output_rate_dp_rate_hbr2;
1520 }
1521 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1523 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1524 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1525 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1526 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527 RequiredSlots);
1528
1529 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1530 *RequiresDSC = true;
1531 LinkDSCEnable = true;
1532 if (Output == dm_dp)
1533 *RequiresFEC = true;
1534
1535 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1536 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1537 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1538 OutputFormat, DSCInputBitPerComponent,
1539 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1540 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541 }
1542 //OutputTypeAndRate = Output & " HBR3";
1543 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544 *OutputRate = dm_output_rate_dp_rate_hbr3;
1545 }
1546 }
1547 }
1548 }
1549 }
1550
dml32_CalculateDPPCLK(unsigned int NumberOfActiveSurfaces,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKDPPCLKVCOSpeed,double DPPCLKUsingSingleDPP[],unsigned int DPPPerSurface[],double * GlobalDPPCLK,double Dppclk[])1551 void dml32_CalculateDPPCLK(
1552 unsigned int NumberOfActiveSurfaces,
1553 double DISPCLKDPPCLKDSCCLKDownSpreading,
1554 double DISPCLKDPPCLKVCOSpeed,
1555 double DPPCLKUsingSingleDPP[],
1556 unsigned int DPPPerSurface[],
1557
1558 /* output */
1559 double *GlobalDPPCLK,
1560 double Dppclk[])
1561 {
1562 unsigned int k;
1563 *GlobalDPPCLK = 0;
1564 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1567 }
1568 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1569 for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1571 }
1572
dml32_TruncToValidBPP(double LinkBitRate,unsigned int Lanes,unsigned int HTotal,unsigned int HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,unsigned int DSCSlices,unsigned int AudioRate,unsigned int AudioLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,unsigned int * RequiredSlots)1573 double dml32_TruncToValidBPP(
1574 double LinkBitRate,
1575 unsigned int Lanes,
1576 unsigned int HTotal,
1577 unsigned int HActive,
1578 double PixelClock,
1579 double DesiredBPP,
1580 bool DSCEnable,
1581 enum output_encoder_class Output,
1582 enum output_format_class Format,
1583 unsigned int DSCInputBitPerComponent,
1584 unsigned int DSCSlices,
1585 unsigned int AudioRate,
1586 unsigned int AudioLayout,
1587 enum odm_combine_mode ODMModeNoDSC,
1588 enum odm_combine_mode ODMModeDSC,
1589 /* Output */
1590 unsigned int *RequiredSlots)
1591 {
1592 double MaxLinkBPP;
1593 unsigned int MinDSCBPP;
1594 double MaxDSCBPP;
1595 unsigned int NonDSCBPP0;
1596 unsigned int NonDSCBPP1;
1597 unsigned int NonDSCBPP2;
1598
1599 if (Format == dm_420) {
1600 NonDSCBPP0 = 12;
1601 NonDSCBPP1 = 15;
1602 NonDSCBPP2 = 18;
1603 MinDSCBPP = 6;
1604 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1605 } else if (Format == dm_444) {
1606 NonDSCBPP0 = 24;
1607 NonDSCBPP1 = 30;
1608 NonDSCBPP2 = 36;
1609 MinDSCBPP = 8;
1610 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1611 } else {
1612 if (Output == dm_hdmi) {
1613 NonDSCBPP0 = 24;
1614 NonDSCBPP1 = 24;
1615 NonDSCBPP2 = 24;
1616 } else {
1617 NonDSCBPP0 = 16;
1618 NonDSCBPP1 = 20;
1619 NonDSCBPP2 = 24;
1620 }
1621 if (Format == dm_n422) {
1622 MinDSCBPP = 7;
1623 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1624 } else {
1625 MinDSCBPP = 8;
1626 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1627 }
1628 }
1629 if (Output == dm_dp2p0) {
1630 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1631 } else if (DSCEnable && Output == dm_dp) {
1632 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1633 } else {
1634 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1635 }
1636
1637 if (DSCEnable) {
1638 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1639 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1640 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1641 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1642 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1643 MaxLinkBPP = 2 * MaxLinkBPP;
1644 } else {
1645 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1646 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1647 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1648 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1649 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1650 MaxLinkBPP = 2 * MaxLinkBPP;
1651 }
1652
1653 if (DesiredBPP == 0) {
1654 if (DSCEnable) {
1655 if (MaxLinkBPP < MinDSCBPP)
1656 return BPP_INVALID;
1657 else if (MaxLinkBPP >= MaxDSCBPP)
1658 return MaxDSCBPP;
1659 else
1660 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1661 } else {
1662 if (MaxLinkBPP >= NonDSCBPP2)
1663 return NonDSCBPP2;
1664 else if (MaxLinkBPP >= NonDSCBPP1)
1665 return NonDSCBPP1;
1666 else if (MaxLinkBPP >= NonDSCBPP0)
1667 return 16.0;
1668 else
1669 return BPP_INVALID;
1670 }
1671 } else {
1672 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1673 DesiredBPP <= NonDSCBPP0)) ||
1674 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1675 return BPP_INVALID;
1676 else
1677 return DesiredBPP;
1678 }
1679
1680 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1681
1682 return BPP_INVALID;
1683 } // TruncToValidBPP
1684
dml32_RequiredDTBCLK(bool DSCEnable,double PixelClock,enum output_format_class OutputFormat,double OutputBpp,unsigned int DSCSlices,unsigned int HTotal,unsigned int HActive,unsigned int AudioRate,unsigned int AudioLayout)1685 double dml32_RequiredDTBCLK(
1686 bool DSCEnable,
1687 double PixelClock,
1688 enum output_format_class OutputFormat,
1689 double OutputBpp,
1690 unsigned int DSCSlices,
1691 unsigned int HTotal,
1692 unsigned int HActive,
1693 unsigned int AudioRate,
1694 unsigned int AudioLayout)
1695 {
1696 double PixelWordRate;
1697 double HCActive;
1698 double HCBlank;
1699 double AverageTribyteRate;
1700 double HActiveTribyteRate;
1701
1702 if (DSCEnable != true)
1703 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1704
1705 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2);
1706 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1707 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1708 HCBlank = 64 + 32 *
1709 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1710 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1711 HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1712 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1713 }
1714
dml32_DSCDelayRequirement(bool DSCEnabled,enum odm_combine_mode ODMMode,unsigned int DSCInputBitPerComponent,double OutputBpp,unsigned int HActive,unsigned int HTotal,unsigned int NumberOfDSCSlices,enum output_format_class OutputFormat,enum output_encoder_class Output,double PixelClock,double PixelClockBackEnd,double dsc_delay_factor_wa)1715 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1716 enum odm_combine_mode ODMMode,
1717 unsigned int DSCInputBitPerComponent,
1718 double OutputBpp,
1719 unsigned int HActive,
1720 unsigned int HTotal,
1721 unsigned int NumberOfDSCSlices,
1722 enum output_format_class OutputFormat,
1723 enum output_encoder_class Output,
1724 double PixelClock,
1725 double PixelClockBackEnd,
1726 double dsc_delay_factor_wa)
1727 {
1728 unsigned int DSCDelayRequirement_val;
1729
1730 if (DSCEnabled == true && OutputBpp != 0) {
1731 if (ODMMode == dm_odm_combine_mode_4to1) {
1732 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1733 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1734 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1735 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1736 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1737 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1738 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1739 } else {
1740 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1741 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1742 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1743 }
1744
1745 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1746 dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
1747
1748 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1749
1750 } else {
1751 DSCDelayRequirement_val = 0;
1752 }
1753
1754 #ifdef __DML_VBA_DEBUG__
1755 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled);
1756 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
1757 dml_print("DML::%s: HActive = %d\n", __func__, HActive);
1758 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat);
1759 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1760 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices);
1761 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1762 #endif
1763
1764 return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
1765 }
1766
dml32_CalculateSurfaceSizeInMall(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],bool DCCEnable[],bool ViewportStationary[],unsigned int ViewportXStartY[],unsigned int ViewportYStartY[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int ViewportWidthY[],unsigned int ViewportHeightY[],unsigned int BytesPerPixelY[],unsigned int ViewportWidthC[],unsigned int ViewportHeightC[],unsigned int BytesPerPixelC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int ReadBlockWidthY[],unsigned int ReadBlockWidthC[],unsigned int ReadBlockHeightY[],unsigned int ReadBlockHeightC[],unsigned int DCCMetaPitchY[],unsigned int DCCMetaPitchC[],unsigned int SurfaceSizeInMALL[],bool * ExceededMALLSize)1767 void dml32_CalculateSurfaceSizeInMall(
1768 unsigned int NumberOfActiveSurfaces,
1769 unsigned int MALLAllocatedForDCN,
1770 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1771 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
1772 bool DCCEnable[],
1773 bool ViewportStationary[],
1774 unsigned int ViewportXStartY[],
1775 unsigned int ViewportYStartY[],
1776 unsigned int ViewportXStartC[],
1777 unsigned int ViewportYStartC[],
1778 unsigned int ViewportWidthY[],
1779 unsigned int ViewportHeightY[],
1780 unsigned int BytesPerPixelY[],
1781 unsigned int ViewportWidthC[],
1782 unsigned int ViewportHeightC[],
1783 unsigned int BytesPerPixelC[],
1784 unsigned int SurfaceWidthY[],
1785 unsigned int SurfaceWidthC[],
1786 unsigned int SurfaceHeightY[],
1787 unsigned int SurfaceHeightC[],
1788 unsigned int Read256BytesBlockWidthY[],
1789 unsigned int Read256BytesBlockWidthC[],
1790 unsigned int Read256BytesBlockHeightY[],
1791 unsigned int Read256BytesBlockHeightC[],
1792 unsigned int ReadBlockWidthY[],
1793 unsigned int ReadBlockWidthC[],
1794 unsigned int ReadBlockHeightY[],
1795 unsigned int ReadBlockHeightC[],
1796 unsigned int DCCMetaPitchY[],
1797 unsigned int DCCMetaPitchC[],
1798
1799 /* Output */
1800 unsigned int SurfaceSizeInMALL[],
1801 bool *ExceededMALLSize)
1802 {
1803 unsigned int k;
1804 unsigned int TotalSurfaceSizeInMALLForSS = 0;
1805 unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
1806 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
1807
1808 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1809 if (ViewportStationary[k]) {
1810 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1811 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1812 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1813 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1814 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1815 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1816 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1817
1818 if (ReadBlockWidthC[k] > 0) {
1819 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1820 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1821 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1822 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1823 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1824 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1825 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1826 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1827 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1828 BytesPerPixelC[k];
1829 }
1830 if (DCCEnable[k] == true) {
1831 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1832 (dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]),
1833 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1834 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1835 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1836 * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1837 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1838 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1839 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 *
1840 Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024);
1841 if (Read256BytesBlockWidthC[k] > 0) {
1842 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1843 dml_min(dml_ceil(DCCMetaPitchC[k], 8 *
1844 Read256BytesBlockWidthC[k]),
1845 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1846 * Read256BytesBlockWidthC[k] - 1, 8 *
1847 Read256BytesBlockWidthC[k]) -
1848 dml_floor(ViewportXStartC[k], 8 *
1849 Read256BytesBlockWidthC[k])) *
1850 dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1851 Read256BytesBlockHeightC[k]),
1852 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1853 8 * Read256BytesBlockHeightC[k] - 1, 8 *
1854 Read256BytesBlockHeightC[k]) -
1855 dml_floor(ViewportYStartC[k], 8 *
1856 Read256BytesBlockHeightC[k])) *
1857 BytesPerPixelC[k] / 256;
1858 }
1859 }
1860 } else {
1861 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1862 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1863 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1864 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1865 BytesPerPixelY[k];
1866 if (ReadBlockWidthC[k] > 0) {
1867 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1868 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1869 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1870 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1871 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1872 BytesPerPixelC[k];
1873 }
1874 if (DCCEnable[k] == true) {
1875 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1876 (dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 *
1877 Read256BytesBlockWidthY[k] - 1), 8 *
1878 Read256BytesBlockWidthY[k]) *
1879 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1880 Read256BytesBlockHeightY[k] - 1), 8 *
1881 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024);
1882
1883 if (Read256BytesBlockWidthC[k] > 0) {
1884 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1885 dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 *
1886 Read256BytesBlockWidthC[k] - 1), 8 *
1887 Read256BytesBlockWidthC[k]) *
1888 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1889 Read256BytesBlockHeightC[k] - 1), 8 *
1890 Read256BytesBlockHeightC[k]) *
1891 BytesPerPixelC[k] / 256;
1892 }
1893 }
1894 }
1895 }
1896
1897 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1898 /* SS and Subvp counted separate as they are never used at the same time */
1899 if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
1900 TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k];
1901 else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1902 TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k];
1903 }
1904 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
1905 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
1906 } // CalculateSurfaceSizeInMall
1907
dml32_CalculateVMRowAndSwath(unsigned int NumberOfActiveSurfaces,DmlPipe myPipe[],unsigned int SurfaceSizeInMALL[],unsigned int PTEBufferSizeInRequestsLuma,unsigned int PTEBufferSizeInRequestsChroma,unsigned int DCCMetaBufferSizeBytes,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int MALLAllocatedForDCN,double SwathWidthY[],double SwathWidthC[],bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes[],unsigned int HostVMMinPageSize,bool PTEBufferSizeNotExceeded[],bool DCCMetaBufferSizeNotExceeded[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int dpte_row_height_luma[],unsigned int dpte_row_height_chroma[],unsigned int dpte_row_height_linear_luma[],unsigned int dpte_row_height_linear_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int vm_group_bytes[],unsigned int dpte_group_bytes[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PTERequestSizeY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int PTERequestSizeC[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_c[],double PrefetchSourceLinesY[],double PrefetchSourceLinesC[],double VInitPreFillY[],double VInitPreFillC[],unsigned int MaxNumSwathY[],unsigned int MaxNumSwathC[],double meta_row_bw[],double dpte_row_bw[],double PixelPTEBytesPerRow[],double PDEAndMetaPTEBytesFrame[],double MetaRowByte[],bool use_one_row_for_frame[],bool use_one_row_for_frame_flip[],bool UsesMALLForStaticScreen[],bool PTE_BUFFER_MODE[],unsigned int BIGK_FRAGMENT_SIZE[])1908 void dml32_CalculateVMRowAndSwath(
1909 unsigned int NumberOfActiveSurfaces,
1910 DmlPipe myPipe[],
1911 unsigned int SurfaceSizeInMALL[],
1912 unsigned int PTEBufferSizeInRequestsLuma,
1913 unsigned int PTEBufferSizeInRequestsChroma,
1914 unsigned int DCCMetaBufferSizeBytes,
1915 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1916 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1917 unsigned int MALLAllocatedForDCN,
1918 double SwathWidthY[],
1919 double SwathWidthC[],
1920 bool GPUVMEnable,
1921 bool HostVMEnable,
1922 unsigned int HostVMMaxNonCachedPageTableLevels,
1923 unsigned int GPUVMMaxPageTableLevels,
1924 unsigned int GPUVMMinPageSizeKBytes[],
1925 unsigned int HostVMMinPageSize,
1926
1927 /* Output */
1928 bool PTEBufferSizeNotExceeded[],
1929 bool DCCMetaBufferSizeNotExceeded[],
1930 unsigned int dpte_row_width_luma_ub[],
1931 unsigned int dpte_row_width_chroma_ub[],
1932 unsigned int dpte_row_height_luma[],
1933 unsigned int dpte_row_height_chroma[],
1934 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
1935 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
1936 unsigned int meta_req_width[],
1937 unsigned int meta_req_width_chroma[],
1938 unsigned int meta_req_height[],
1939 unsigned int meta_req_height_chroma[],
1940 unsigned int meta_row_width[],
1941 unsigned int meta_row_width_chroma[],
1942 unsigned int meta_row_height[],
1943 unsigned int meta_row_height_chroma[],
1944 unsigned int vm_group_bytes[],
1945 unsigned int dpte_group_bytes[],
1946 unsigned int PixelPTEReqWidthY[],
1947 unsigned int PixelPTEReqHeightY[],
1948 unsigned int PTERequestSizeY[],
1949 unsigned int PixelPTEReqWidthC[],
1950 unsigned int PixelPTEReqHeightC[],
1951 unsigned int PTERequestSizeC[],
1952 unsigned int dpde0_bytes_per_frame_ub_l[],
1953 unsigned int meta_pte_bytes_per_frame_ub_l[],
1954 unsigned int dpde0_bytes_per_frame_ub_c[],
1955 unsigned int meta_pte_bytes_per_frame_ub_c[],
1956 double PrefetchSourceLinesY[],
1957 double PrefetchSourceLinesC[],
1958 double VInitPreFillY[],
1959 double VInitPreFillC[],
1960 unsigned int MaxNumSwathY[],
1961 unsigned int MaxNumSwathC[],
1962 double meta_row_bw[],
1963 double dpte_row_bw[],
1964 double PixelPTEBytesPerRow[],
1965 double PDEAndMetaPTEBytesFrame[],
1966 double MetaRowByte[],
1967 bool use_one_row_for_frame[],
1968 bool use_one_row_for_frame_flip[],
1969 bool UsesMALLForStaticScreen[],
1970 bool PTE_BUFFER_MODE[],
1971 unsigned int BIGK_FRAGMENT_SIZE[])
1972 {
1973 unsigned int k;
1974 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1975 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1976 unsigned int PDEAndMetaPTEBytesFrameY;
1977 unsigned int PDEAndMetaPTEBytesFrameC;
1978 unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1979 unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1980 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1981 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1982 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1983 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1984 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1985 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1986 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1987 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1988 bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1989
1990 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1991 if (HostVMEnable == true) {
1992 vm_group_bytes[k] = 512;
1993 dpte_group_bytes[k] = 512;
1994 } else if (GPUVMEnable == true) {
1995 vm_group_bytes[k] = 2048;
1996 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1997 dpte_group_bytes[k] = 512;
1998 else
1999 dpte_group_bytes[k] = 2048;
2000 } else {
2001 vm_group_bytes[k] = 0;
2002 dpte_group_bytes[k] = 0;
2003 }
2004
2005 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2006 myPipe[k].SourcePixelFormat == dm_420_12 ||
2007 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2008 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2009 !IsVertical(myPipe[k].SourceRotation)) {
2010 PTEBufferSizeInRequestsForLuma[k] =
2011 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2012 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2013 } else {
2014 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2015 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2016 }
2017
2018 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2019 myPipe[k].ViewportStationary,
2020 myPipe[k].DCCEnable,
2021 myPipe[k].DPPPerSurface,
2022 myPipe[k].BlockHeight256BytesC,
2023 myPipe[k].BlockWidth256BytesC,
2024 myPipe[k].SourcePixelFormat,
2025 myPipe[k].SurfaceTiling,
2026 myPipe[k].BytePerPixelC,
2027 myPipe[k].SourceRotation,
2028 SwathWidthC[k],
2029 myPipe[k].ViewportHeightChroma,
2030 myPipe[k].ViewportXStartC,
2031 myPipe[k].ViewportYStartC,
2032 GPUVMEnable,
2033 HostVMEnable,
2034 HostVMMaxNonCachedPageTableLevels,
2035 GPUVMMaxPageTableLevels,
2036 GPUVMMinPageSizeKBytes[k],
2037 HostVMMinPageSize,
2038 PTEBufferSizeInRequestsForChroma[k],
2039 myPipe[k].PitchC,
2040 myPipe[k].DCCMetaPitchC,
2041 myPipe[k].BlockWidthC,
2042 myPipe[k].BlockHeightC,
2043
2044 /* Output */
2045 &MetaRowByteC[k],
2046 &PixelPTEBytesPerRowC[k],
2047 &dpte_row_width_chroma_ub[k],
2048 &dpte_row_height_chroma[k],
2049 &dpte_row_height_linear_chroma[k],
2050 &PixelPTEBytesPerRowC_one_row_per_frame[k],
2051 &dpte_row_width_chroma_ub_one_row_per_frame[k],
2052 &dpte_row_height_chroma_one_row_per_frame[k],
2053 &meta_req_width_chroma[k],
2054 &meta_req_height_chroma[k],
2055 &meta_row_width_chroma[k],
2056 &meta_row_height_chroma[k],
2057 &PixelPTEReqWidthC[k],
2058 &PixelPTEReqHeightC[k],
2059 &PTERequestSizeC[k],
2060 &dpde0_bytes_per_frame_ub_c[k],
2061 &meta_pte_bytes_per_frame_ub_c[k]);
2062
2063 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2064 myPipe[k].VRatioChroma,
2065 myPipe[k].VTapsChroma,
2066 myPipe[k].InterlaceEnable,
2067 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2068 myPipe[k].SwathHeightC,
2069 myPipe[k].SourceRotation,
2070 myPipe[k].ViewportStationary,
2071 SwathWidthC[k],
2072 myPipe[k].ViewportHeightChroma,
2073 myPipe[k].ViewportXStartC,
2074 myPipe[k].ViewportYStartC,
2075
2076 /* Output */
2077 &VInitPreFillC[k],
2078 &MaxNumSwathC[k]);
2079 } else {
2080 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2081 PTEBufferSizeInRequestsForChroma[k] = 0;
2082 PixelPTEBytesPerRowC[k] = 0;
2083 PDEAndMetaPTEBytesFrameC = 0;
2084 MetaRowByteC[k] = 0;
2085 MaxNumSwathC[k] = 0;
2086 PrefetchSourceLinesC[k] = 0;
2087 dpte_row_height_chroma_one_row_per_frame[k] = 0;
2088 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2089 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2090 }
2091
2092 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2093 myPipe[k].ViewportStationary,
2094 myPipe[k].DCCEnable,
2095 myPipe[k].DPPPerSurface,
2096 myPipe[k].BlockHeight256BytesY,
2097 myPipe[k].BlockWidth256BytesY,
2098 myPipe[k].SourcePixelFormat,
2099 myPipe[k].SurfaceTiling,
2100 myPipe[k].BytePerPixelY,
2101 myPipe[k].SourceRotation,
2102 SwathWidthY[k],
2103 myPipe[k].ViewportHeight,
2104 myPipe[k].ViewportXStart,
2105 myPipe[k].ViewportYStart,
2106 GPUVMEnable,
2107 HostVMEnable,
2108 HostVMMaxNonCachedPageTableLevels,
2109 GPUVMMaxPageTableLevels,
2110 GPUVMMinPageSizeKBytes[k],
2111 HostVMMinPageSize,
2112 PTEBufferSizeInRequestsForLuma[k],
2113 myPipe[k].PitchY,
2114 myPipe[k].DCCMetaPitchY,
2115 myPipe[k].BlockWidthY,
2116 myPipe[k].BlockHeightY,
2117
2118 /* Output */
2119 &MetaRowByteY[k],
2120 &PixelPTEBytesPerRowY[k],
2121 &dpte_row_width_luma_ub[k],
2122 &dpte_row_height_luma[k],
2123 &dpte_row_height_linear_luma[k],
2124 &PixelPTEBytesPerRowY_one_row_per_frame[k],
2125 &dpte_row_width_luma_ub_one_row_per_frame[k],
2126 &dpte_row_height_luma_one_row_per_frame[k],
2127 &meta_req_width[k],
2128 &meta_req_height[k],
2129 &meta_row_width[k],
2130 &meta_row_height[k],
2131 &PixelPTEReqWidthY[k],
2132 &PixelPTEReqHeightY[k],
2133 &PTERequestSizeY[k],
2134 &dpde0_bytes_per_frame_ub_l[k],
2135 &meta_pte_bytes_per_frame_ub_l[k]);
2136
2137 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2138 myPipe[k].VRatio,
2139 myPipe[k].VTaps,
2140 myPipe[k].InterlaceEnable,
2141 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2142 myPipe[k].SwathHeightY,
2143 myPipe[k].SourceRotation,
2144 myPipe[k].ViewportStationary,
2145 SwathWidthY[k],
2146 myPipe[k].ViewportHeight,
2147 myPipe[k].ViewportXStart,
2148 myPipe[k].ViewportYStart,
2149
2150 /* Output */
2151 &VInitPreFillY[k],
2152 &MaxNumSwathY[k]);
2153
2154 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2155 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2156
2157 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2158 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2159 PTEBufferSizeNotExceeded[k] = true;
2160 } else {
2161 PTEBufferSizeNotExceeded[k] = false;
2162 }
2163
2164 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2165 PTEBufferSizeInRequestsForLuma[k] &&
2166 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2167 }
2168
2169 dml32_CalculateMALLUseForStaticScreen(
2170 NumberOfActiveSurfaces,
2171 MALLAllocatedForDCN,
2172 UseMALLForStaticScreen, // mode
2173 SurfaceSizeInMALL,
2174 one_row_per_frame_fits_in_buffer,
2175 /* Output */
2176 UsesMALLForStaticScreen); // boolen
2177
2178 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2179 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2180 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2181 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2182 (GPUVMMinPageSizeKBytes[k] > 64);
2183 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2184 }
2185
2186 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2187 #ifdef __DML_VBA_DEBUG__
2188 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]);
2189 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2190 #endif
2191 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2192 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2193 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2194 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2195
2196 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2197 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2198
2199 if (use_one_row_for_frame[k]) {
2200 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2201 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2202 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2203 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2204 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2205 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2206 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2207 }
2208
2209 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2210 DCCMetaBufferSizeNotExceeded[k] = true;
2211 else
2212 DCCMetaBufferSizeNotExceeded[k] = false;
2213
2214 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2215 if (use_one_row_for_frame[k])
2216 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2217
2218 dml32_CalculateRowBandwidth(
2219 GPUVMEnable,
2220 myPipe[k].SourcePixelFormat,
2221 myPipe[k].VRatio,
2222 myPipe[k].VRatioChroma,
2223 myPipe[k].DCCEnable,
2224 myPipe[k].HTotal / myPipe[k].PixelClock,
2225 MetaRowByteY[k], MetaRowByteC[k],
2226 meta_row_height[k],
2227 meta_row_height_chroma[k],
2228 PixelPTEBytesPerRowY[k],
2229 PixelPTEBytesPerRowC[k],
2230 dpte_row_height_luma[k],
2231 dpte_row_height_chroma[k],
2232
2233 /* Output */
2234 &meta_row_bw[k],
2235 &dpte_row_bw[k]);
2236 #ifdef __DML_VBA_DEBUG__
2237 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]);
2238 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n",
2239 __func__, k, use_one_row_for_frame_flip[k]);
2240 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n",
2241 __func__, k, UseMALLForPStateChange[k]);
2242 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
2243 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
2244 __func__, k, dpte_row_width_luma_ub[k]);
2245 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]);
2246 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
2247 __func__, k, dpte_row_height_chroma[k]);
2248 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
2249 __func__, k, dpte_row_width_chroma_ub[k]);
2250 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]);
2251 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
2252 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
2253 __func__, k, PTEBufferSizeNotExceeded[k]);
2254 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2255 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2256 #endif
2257 }
2258 } // CalculateVMRowAndSwath
2259
dml32_CalculateVMAndRowBytes(bool ViewportStationary,bool DCCEnable,unsigned int NumberOfDPPs,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum dm_rotation_angle SourceRotation,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int MacroTileWidth,unsigned int MacroTileHeight,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * dpte_row_height_linear,unsigned int * PixelPTEBytesPerRow_one_row_per_frame,unsigned int * dpte_row_width_ub_one_row_per_frame,unsigned int * dpte_row_height_one_row_per_frame,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)2260 unsigned int dml32_CalculateVMAndRowBytes(
2261 bool ViewportStationary,
2262 bool DCCEnable,
2263 unsigned int NumberOfDPPs,
2264 unsigned int BlockHeight256Bytes,
2265 unsigned int BlockWidth256Bytes,
2266 enum source_format_class SourcePixelFormat,
2267 unsigned int SurfaceTiling,
2268 unsigned int BytePerPixel,
2269 enum dm_rotation_angle SourceRotation,
2270 double SwathWidth,
2271 unsigned int ViewportHeight,
2272 unsigned int ViewportXStart,
2273 unsigned int ViewportYStart,
2274 bool GPUVMEnable,
2275 bool HostVMEnable,
2276 unsigned int HostVMMaxNonCachedPageTableLevels,
2277 unsigned int GPUVMMaxPageTableLevels,
2278 unsigned int GPUVMMinPageSizeKBytes,
2279 unsigned int HostVMMinPageSize,
2280 unsigned int PTEBufferSizeInRequests,
2281 unsigned int Pitch,
2282 unsigned int DCCMetaPitch,
2283 unsigned int MacroTileWidth,
2284 unsigned int MacroTileHeight,
2285
2286 /* Output */
2287 unsigned int *MetaRowByte,
2288 unsigned int *PixelPTEBytesPerRow,
2289 unsigned int *dpte_row_width_ub,
2290 unsigned int *dpte_row_height,
2291 unsigned int *dpte_row_height_linear,
2292 unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
2293 unsigned int *dpte_row_width_ub_one_row_per_frame,
2294 unsigned int *dpte_row_height_one_row_per_frame,
2295 unsigned int *MetaRequestWidth,
2296 unsigned int *MetaRequestHeight,
2297 unsigned int *meta_row_width,
2298 unsigned int *meta_row_height,
2299 unsigned int *PixelPTEReqWidth,
2300 unsigned int *PixelPTEReqHeight,
2301 unsigned int *PTERequestSize,
2302 unsigned int *DPDE0BytesFrame,
2303 unsigned int *MetaPTEBytesFrame)
2304 {
2305 unsigned int MPDEBytesFrame;
2306 unsigned int DCCMetaSurfaceBytes;
2307 unsigned int ExtraDPDEBytesFrame;
2308 unsigned int PDEAndMetaPTEBytesFrame;
2309 unsigned int HostVMDynamicLevels = 0;
2310 unsigned int MacroTileSizeBytes;
2311 unsigned int vp_height_meta_ub;
2312 unsigned int vp_height_dpte_ub;
2313 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2314
2315 if (GPUVMEnable == true && HostVMEnable == true) {
2316 if (HostVMMinPageSize < 2048)
2317 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2318 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2319 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2320 else
2321 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2322 }
2323
2324 *MetaRequestHeight = 8 * BlockHeight256Bytes;
2325 *MetaRequestWidth = 8 * BlockWidth256Bytes;
2326 if (SurfaceTiling == dm_sw_linear) {
2327 *meta_row_height = 32;
2328 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2329 - dml_floor(ViewportXStart, *MetaRequestWidth);
2330 } else if (!IsVertical(SourceRotation)) {
2331 *meta_row_height = *MetaRequestHeight;
2332 if (ViewportStationary && NumberOfDPPs == 1) {
2333 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2334 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2335 } else {
2336 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2337 }
2338 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2339 } else {
2340 *meta_row_height = *MetaRequestWidth;
2341 if (ViewportStationary && NumberOfDPPs == 1) {
2342 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2343 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2344 } else {
2345 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2346 }
2347 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2348 }
2349
2350 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2351 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2352 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2353 } else if (!IsVertical(SourceRotation)) {
2354 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2355 } else {
2356 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2357 }
2358
2359 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2360
2361 if (GPUVMEnable == true) {
2362 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2363 (8 * 4.0 * 1024), 1) + 1) * 64;
2364 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2365 } else {
2366 *MetaPTEBytesFrame = 0;
2367 MPDEBytesFrame = 0;
2368 }
2369
2370 if (DCCEnable != true) {
2371 *MetaPTEBytesFrame = 0;
2372 MPDEBytesFrame = 0;
2373 *MetaRowByte = 0;
2374 }
2375
2376 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2377
2378 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2379 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2380 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2381 MacroTileHeight - 1, MacroTileHeight) -
2382 dml_floor(ViewportYStart, MacroTileHeight);
2383 } else if (!IsVertical(SourceRotation)) {
2384 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2385 } else {
2386 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2387 }
2388 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2389 (8 * 2097152), 1) + 1);
2390 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2391 } else {
2392 *DPDE0BytesFrame = 0;
2393 ExtraDPDEBytesFrame = 0;
2394 vp_height_dpte_ub = 0;
2395 }
2396
2397 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2398
2399 #ifdef __DML_VBA_DEBUG__
2400 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2401 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2402 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2403 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2404 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2405 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2406 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2407 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2408 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2409 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2410 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2411 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2412 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2413 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2414 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2415 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2416 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2417 #endif
2418
2419 if (HostVMEnable == true)
2420 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2421
2422 if (SurfaceTiling == dm_sw_linear) {
2423 *PixelPTEReqHeight = 1;
2424 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2425 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2426 *PTERequestSize = 64;
2427 } else if (GPUVMMinPageSizeKBytes == 4) {
2428 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2429 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2430 *PTERequestSize = 128;
2431 } else {
2432 *PixelPTEReqHeight = MacroTileHeight;
2433 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2434 *PTERequestSize = 64;
2435 }
2436 #ifdef __DML_VBA_DEBUG__
2437 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2438 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2439 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2440 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2441 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2442 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2443 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2444 #endif
2445
2446 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2447 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2448 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2449 (double) *PixelPTEReqWidth;
2450 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2451 *PTERequestSize;
2452
2453 if (SurfaceTiling == dm_sw_linear) {
2454 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2455 *PixelPTEReqWidth / Pitch), 1));
2456 #ifdef __DML_VBA_DEBUG__
2457 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2458 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2459 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2460 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2461 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2462 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2463 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2464 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2465 *PixelPTEReqWidth / Pitch), 1));
2466 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2467 #endif
2468 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2469 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2470 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2471
2472 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2473 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2474 PixelPTEReqWidth_linear / Pitch), 1);
2475 if (*dpte_row_height_linear > 128)
2476 *dpte_row_height_linear = 128;
2477
2478 } else if (!IsVertical(SourceRotation)) {
2479 *dpte_row_height = *PixelPTEReqHeight;
2480
2481 if (GPUVMMinPageSizeKBytes > 64) {
2482 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2483 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2484 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2485 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2486 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2487 dml_floor(ViewportXStart, *PixelPTEReqWidth);
2488 } else {
2489 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2490 *PixelPTEReqWidth;
2491 }
2492
2493 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2494 } else {
2495 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2496
2497 if (ViewportStationary && (NumberOfDPPs == 1)) {
2498 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2499 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2500 } else {
2501 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2502 * *PixelPTEReqHeight;
2503 }
2504
2505 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2506 }
2507
2508 if (GPUVMEnable != true)
2509 *PixelPTEBytesPerRow = 0;
2510 if (HostVMEnable == true)
2511 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2512
2513 #ifdef __DML_VBA_DEBUG__
2514 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2515 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2516 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2517 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2518 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2519 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2520 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2521 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2522 __func__, *dpte_row_width_ub_one_row_per_frame);
2523 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2524 __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2525 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2526 *MetaPTEBytesFrame);
2527 #endif
2528
2529 return PDEAndMetaPTEBytesFrame;
2530 } // CalculateVMAndRowBytes
2531
dml32_CalculatePrefetchSourceLines(double VRatio,unsigned int VTaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,enum dm_rotation_angle SourceRotation,bool ViewportStationary,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)2532 double dml32_CalculatePrefetchSourceLines(
2533 double VRatio,
2534 unsigned int VTaps,
2535 bool Interlace,
2536 bool ProgressiveToInterlaceUnitInOPP,
2537 unsigned int SwathHeight,
2538 enum dm_rotation_angle SourceRotation,
2539 bool ViewportStationary,
2540 double SwathWidth,
2541 unsigned int ViewportHeight,
2542 unsigned int ViewportXStart,
2543 unsigned int ViewportYStart,
2544
2545 /* Output */
2546 double *VInitPreFill,
2547 unsigned int *MaxNumSwath)
2548 {
2549
2550 unsigned int vp_start_rot;
2551 unsigned int sw0_tmp;
2552 unsigned int MaxPartialSwath;
2553 double numLines;
2554
2555 #ifdef __DML_VBA_DEBUG__
2556 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2557 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2558 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2559 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2560 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2561 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2562 #endif
2563 if (ProgressiveToInterlaceUnitInOPP)
2564 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2565 else
2566 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2567
2568 if (ViewportStationary) {
2569 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2570 vp_start_rot = SwathHeight -
2571 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2572 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2573 vp_start_rot = ViewportXStart;
2574 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2575 vp_start_rot = SwathHeight -
2576 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2577 } else {
2578 vp_start_rot = ViewportYStart;
2579 }
2580 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2581 if (sw0_tmp < *VInitPreFill)
2582 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2583 else
2584 *MaxNumSwath = 1;
2585 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2586 } else {
2587 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2588 if (*VInitPreFill > 1)
2589 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2590 else
2591 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2592 }
2593 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2594
2595 #ifdef __DML_VBA_DEBUG__
2596 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2597 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2598 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2599 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2600 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2601 #endif
2602 return numLines;
2603
2604 } // CalculatePrefetchSourceLines
2605
dml32_CalculateMALLUseForStaticScreen(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCNFinal,enum dm_use_mall_for_static_screen_mode * UseMALLForStaticScreen,unsigned int SurfaceSizeInMALL[],bool one_row_per_frame_fits_in_buffer[],bool UsesMALLForStaticScreen[])2606 void dml32_CalculateMALLUseForStaticScreen(
2607 unsigned int NumberOfActiveSurfaces,
2608 unsigned int MALLAllocatedForDCNFinal,
2609 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2610 unsigned int SurfaceSizeInMALL[],
2611 bool one_row_per_frame_fits_in_buffer[],
2612
2613 /* output */
2614 bool UsesMALLForStaticScreen[])
2615 {
2616 unsigned int k;
2617 unsigned int SurfaceToAddToMALL;
2618 bool CanAddAnotherSurfaceToMALL;
2619 unsigned int TotalSurfaceSizeInMALL;
2620
2621 TotalSurfaceSizeInMALL = 0;
2622 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2623 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2624 if (UsesMALLForStaticScreen[k])
2625 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2626 #ifdef __DML_VBA_DEBUG__
2627 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2628 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL);
2629 #endif
2630 }
2631
2632 SurfaceToAddToMALL = 0;
2633 CanAddAnotherSurfaceToMALL = true;
2634 while (CanAddAnotherSurfaceToMALL) {
2635 CanAddAnotherSurfaceToMALL = false;
2636 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2637 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2638 !UsesMALLForStaticScreen[k] &&
2639 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2640 one_row_per_frame_fits_in_buffer[k] &&
2641 (!CanAddAnotherSurfaceToMALL ||
2642 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2643 CanAddAnotherSurfaceToMALL = true;
2644 SurfaceToAddToMALL = k;
2645 #ifdef __DML_VBA_DEBUG__
2646 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2647 __func__, k, UseMALLForStaticScreen[k]);
2648 #endif
2649 }
2650 }
2651 if (CanAddAnotherSurfaceToMALL) {
2652 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2653 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2654
2655 #ifdef __DML_VBA_DEBUG__
2656 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL);
2657 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL);
2658 #endif
2659
2660 }
2661 }
2662 }
2663
dml32_CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)2664 void dml32_CalculateRowBandwidth(
2665 bool GPUVMEnable,
2666 enum source_format_class SourcePixelFormat,
2667 double VRatio,
2668 double VRatioChroma,
2669 bool DCCEnable,
2670 double LineTime,
2671 unsigned int MetaRowByteLuma,
2672 unsigned int MetaRowByteChroma,
2673 unsigned int meta_row_height_luma,
2674 unsigned int meta_row_height_chroma,
2675 unsigned int PixelPTEBytesPerRowLuma,
2676 unsigned int PixelPTEBytesPerRowChroma,
2677 unsigned int dpte_row_height_luma,
2678 unsigned int dpte_row_height_chroma,
2679 /* Output */
2680 double *meta_row_bw,
2681 double *dpte_row_bw)
2682 {
2683 if (DCCEnable != true) {
2684 *meta_row_bw = 0;
2685 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2686 SourcePixelFormat == dm_rgbe_alpha) {
2687 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2688 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2689 } else {
2690 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2691 }
2692
2693 if (GPUVMEnable != true) {
2694 *dpte_row_bw = 0;
2695 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2696 SourcePixelFormat == dm_rgbe_alpha) {
2697 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2698 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2699 } else {
2700 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2701 }
2702 }
2703
dml32_CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)2704 double dml32_CalculateUrgentLatency(
2705 double UrgentLatencyPixelDataOnly,
2706 double UrgentLatencyPixelMixedWithVMData,
2707 double UrgentLatencyVMDataOnly,
2708 bool DoUrgentLatencyAdjustment,
2709 double UrgentLatencyAdjustmentFabricClockComponent,
2710 double UrgentLatencyAdjustmentFabricClockReference,
2711 double FabricClock)
2712 {
2713 double ret;
2714
2715 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2716 if (DoUrgentLatencyAdjustment == true) {
2717 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2718 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2719 }
2720 return ret;
2721 }
2722
dml32_CalculateUrgentBurstFactor(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,unsigned int DETBufferSizeY,unsigned int DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)2723 void dml32_CalculateUrgentBurstFactor(
2724 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2725 unsigned int swath_width_luma_ub,
2726 unsigned int swath_width_chroma_ub,
2727 unsigned int SwathHeightY,
2728 unsigned int SwathHeightC,
2729 double LineTime,
2730 double UrgentLatency,
2731 double CursorBufferSize,
2732 unsigned int CursorWidth,
2733 unsigned int CursorBPP,
2734 double VRatio,
2735 double VRatioC,
2736 double BytePerPixelInDETY,
2737 double BytePerPixelInDETC,
2738 unsigned int DETBufferSizeY,
2739 unsigned int DETBufferSizeC,
2740 /* Output */
2741 double *UrgentBurstFactorCursor,
2742 double *UrgentBurstFactorLuma,
2743 double *UrgentBurstFactorChroma,
2744 bool *NotEnoughUrgentLatencyHiding)
2745 {
2746 double LinesInDETLuma;
2747 double LinesInDETChroma;
2748 unsigned int LinesInCursorBuffer;
2749 double CursorBufferSizeInTime;
2750 double DETBufferSizeInTimeLuma;
2751 double DETBufferSizeInTimeChroma;
2752
2753 *NotEnoughUrgentLatencyHiding = 0;
2754
2755 if (CursorWidth > 0) {
2756 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2757 (CursorWidth * CursorBPP / 8.0)), 1.0);
2758 if (VRatio > 0) {
2759 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2760 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2761 *NotEnoughUrgentLatencyHiding = 1;
2762 *UrgentBurstFactorCursor = 0;
2763 } else {
2764 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2765 (CursorBufferSizeInTime - UrgentLatency);
2766 }
2767 } else {
2768 *UrgentBurstFactorCursor = 1;
2769 }
2770 }
2771
2772 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2773 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2774
2775 if (VRatio > 0) {
2776 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2777 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2778 *NotEnoughUrgentLatencyHiding = 1;
2779 *UrgentBurstFactorLuma = 0;
2780 } else {
2781 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2782 }
2783 } else {
2784 *UrgentBurstFactorLuma = 1;
2785 }
2786
2787 if (BytePerPixelInDETC > 0) {
2788 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2789 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2790 / swath_width_chroma_ub;
2791
2792 if (VRatio > 0) {
2793 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2794 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2795 *NotEnoughUrgentLatencyHiding = 1;
2796 *UrgentBurstFactorChroma = 0;
2797 } else {
2798 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2799 / (DETBufferSizeInTimeChroma - UrgentLatency);
2800 }
2801 } else {
2802 *UrgentBurstFactorChroma = 1;
2803 }
2804 }
2805 } // CalculateUrgentBurstFactor
2806
dml32_CalculateDCFCLKDeepSleep(unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double * DCFClkDeepSleep)2807 void dml32_CalculateDCFCLKDeepSleep(
2808 unsigned int NumberOfActiveSurfaces,
2809 unsigned int BytePerPixelY[],
2810 unsigned int BytePerPixelC[],
2811 double VRatio[],
2812 double VRatioChroma[],
2813 double SwathWidthY[],
2814 double SwathWidthC[],
2815 unsigned int DPPPerSurface[],
2816 double HRatio[],
2817 double HRatioChroma[],
2818 double PixelClock[],
2819 double PSCL_THROUGHPUT[],
2820 double PSCL_THROUGHPUT_CHROMA[],
2821 double Dppclk[],
2822 double ReadBandwidthLuma[],
2823 double ReadBandwidthChroma[],
2824 unsigned int ReturnBusWidth,
2825
2826 /* Output */
2827 double *DCFClkDeepSleep)
2828 {
2829 unsigned int k;
2830 double DisplayPipeLineDeliveryTimeLuma;
2831 double DisplayPipeLineDeliveryTimeChroma;
2832 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2833 double ReadBandwidth = 0.0;
2834
2835 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2836
2837 if (VRatio[k] <= 1) {
2838 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2839 / PixelClock[k];
2840 } else {
2841 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2842 }
2843 if (BytePerPixelC[k] == 0) {
2844 DisplayPipeLineDeliveryTimeChroma = 0;
2845 } else {
2846 if (VRatioChroma[k] <= 1) {
2847 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2848 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2849 } else {
2850 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2851 / Dppclk[k];
2852 }
2853 }
2854
2855 if (BytePerPixelC[k] > 0) {
2856 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2857 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2858 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2859 32.0 / DisplayPipeLineDeliveryTimeChroma);
2860 } else {
2861 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2862 64.0 / DisplayPipeLineDeliveryTimeLuma;
2863 }
2864 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2865
2866 #ifdef __DML_VBA_DEBUG__
2867 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2868 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2869 #endif
2870 }
2871
2872 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2873 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2874
2875 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2876
2877 #ifdef __DML_VBA_DEBUG__
2878 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2879 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2880 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2881 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2882 #endif
2883
2884 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2885 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2886 #ifdef __DML_VBA_DEBUG__
2887 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2888 #endif
2889 } // CalculateDCFCLKDeepSleep
2890
dml32_CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,unsigned int WritebackDestinationWidth,unsigned int WritebackDestinationHeight,unsigned int WritebackSourceHeight,unsigned int HTotal)2891 double dml32_CalculateWriteBackDelay(
2892 enum source_format_class WritebackPixelFormat,
2893 double WritebackHRatio,
2894 double WritebackVRatio,
2895 unsigned int WritebackVTaps,
2896 unsigned int WritebackDestinationWidth,
2897 unsigned int WritebackDestinationHeight,
2898 unsigned int WritebackSourceHeight,
2899 unsigned int HTotal)
2900 {
2901 double CalculateWriteBackDelay;
2902 double Line_length;
2903 double Output_lines_last_notclamped;
2904 double WritebackVInit;
2905
2906 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2907 Line_length = dml_max((double) WritebackDestinationWidth,
2908 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2909 Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2910 dml_ceil(((double)WritebackSourceHeight -
2911 (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2912 if (Output_lines_last_notclamped < 0) {
2913 CalculateWriteBackDelay = 0;
2914 } else {
2915 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2916 (HTotal - WritebackDestinationWidth) + 80;
2917 }
2918 return CalculateWriteBackDelay;
2919 }
2920
dml32_UseMinimumDCFCLK(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool DRRDisplay[],bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,unsigned int MaxInterDCNTileRepeaters,unsigned int MaxPrefetchMode,double DRAMClockChangeLatencyFinal,double FCLKChangeLatency,double SREnterPlusExitTime,unsigned int ReturnBusWidth,unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,unsigned int PixelChunkSizeInKByte,unsigned int MetaChunkSize,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels,bool DynamicMetadataVMEnabled,bool ImmediateFlipRequirement,bool ProgressiveToInterlaceUnitInOPP,double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,unsigned int VTotal[],unsigned int VActive[],unsigned int DynamicMetadataTransmittedBytes[],unsigned int DynamicMetadataLinesBeforeActiveRequired[],bool Interlace[],double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],double RequiredDISPCLK[][2],double UrgLatency[],unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],double ProjectedDCFClkDeepSleep[][2],double MaximumVStartup[][2][DC__NUM_DPP__MAX],unsigned int TotalNumberOfActiveDPP[][2],unsigned int TotalNumberOfDCCActiveDPP[][2],unsigned int dpte_group_bytes[],double PrefetchLinesY[][2][DC__NUM_DPP__MAX],double PrefetchLinesC[][2][DC__NUM_DPP__MAX],unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int HTotal[],double PixelClock[],double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],double MetaRowBytes[][2][DC__NUM_DPP__MAX],bool DynamicMetadataEnable[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double DCFCLKPerState[],double DCFCLKState[][2])2921 void dml32_UseMinimumDCFCLK(
2922 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2923 bool DRRDisplay[],
2924 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2925 unsigned int MaxInterDCNTileRepeaters,
2926 unsigned int MaxPrefetchMode,
2927 double DRAMClockChangeLatencyFinal,
2928 double FCLKChangeLatency,
2929 double SREnterPlusExitTime,
2930 unsigned int ReturnBusWidth,
2931 unsigned int RoundTripPingLatencyCycles,
2932 unsigned int ReorderingBytes,
2933 unsigned int PixelChunkSizeInKByte,
2934 unsigned int MetaChunkSize,
2935 bool GPUVMEnable,
2936 unsigned int GPUVMMaxPageTableLevels,
2937 bool HostVMEnable,
2938 unsigned int NumberOfActiveSurfaces,
2939 double HostVMMinPageSize,
2940 unsigned int HostVMMaxNonCachedPageTableLevels,
2941 bool DynamicMetadataVMEnabled,
2942 bool ImmediateFlipRequirement,
2943 bool ProgressiveToInterlaceUnitInOPP,
2944 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2945 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2946 unsigned int VTotal[],
2947 unsigned int VActive[],
2948 unsigned int DynamicMetadataTransmittedBytes[],
2949 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2950 bool Interlace[],
2951 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2952 double RequiredDISPCLK[][2],
2953 double UrgLatency[],
2954 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2955 double ProjectedDCFClkDeepSleep[][2],
2956 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2957 unsigned int TotalNumberOfActiveDPP[][2],
2958 unsigned int TotalNumberOfDCCActiveDPP[][2],
2959 unsigned int dpte_group_bytes[],
2960 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2961 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2962 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2963 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2964 unsigned int BytePerPixelY[],
2965 unsigned int BytePerPixelC[],
2966 unsigned int HTotal[],
2967 double PixelClock[],
2968 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2969 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2970 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2971 bool DynamicMetadataEnable[],
2972 double ReadBandwidthLuma[],
2973 double ReadBandwidthChroma[],
2974 double DCFCLKPerState[],
2975 /* Output */
2976 double DCFCLKState[][2])
2977 {
2978 unsigned int i, j, k;
2979 unsigned int dummy1;
2980 double dummy2, dummy3;
2981 double NormalEfficiency;
2982 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2983
2984 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2985 for (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2986 for (j = 0; j <= 1; ++j) {
2987 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2988 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2989 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2990 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2991 double MinimumTWait = 0.0;
2992 double DPTEBandwidth;
2993 double DCFCLKRequiredForAverageBandwidth;
2994 unsigned int ExtraLatencyBytes;
2995 double ExtraLatencyCycles;
2996 double DCFCLKRequiredForPeakBandwidth;
2997 unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2998 double MinimumTvmPlus2Tr0;
2999
3000 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
3001 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3002 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
3003 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
3004 / (15.75 * HTotal[k] / PixelClock[k]);
3005 }
3006
3007 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3008 NoOfDPPState[k] = NoOfDPP[i][j][k];
3009
3010 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3011 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3012
3013 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3014 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3015 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3016 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
3017 HostVMMaxNonCachedPageTableLevels);
3018 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
3019 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3020 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3021 double DCFCLKCyclesRequiredInPrefetch;
3022 double PrefetchTime;
3023
3024 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3025 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3026 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3027 * BytePerPixelC[k]) / NormalEfficiency
3028 / ReturnBusWidth;
3029 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3030 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3031 / NormalEfficiency / ReturnBusWidth
3032 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3033 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3034 / ReturnBusWidth
3035 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3036 + PixelDCFCLKCyclesRequiredInPrefetch[k];
3037 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3038 * HTotal[k] / PixelClock[k];
3039 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3040 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3041 UrgLatency[i] * GPUVMMaxPageTableLevels *
3042 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3043
3044 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3045 UseMALLForPStateChange[k],
3046 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3047 DRRDisplay[k],
3048 DRAMClockChangeLatencyFinal,
3049 FCLKChangeLatency,
3050 UrgLatency[i],
3051 SREnterPlusExitTime);
3052
3053 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3054 MinimumTWait - UrgLatency[i] *
3055 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3056 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ?
3057 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3058 DynamicMetadataVMExtraLatency[k];
3059
3060 if (PrefetchTime > 0) {
3061 double ExpectedVRatioPrefetch;
3062
3063 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3064 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3065 DCFCLKCyclesRequiredInPrefetch);
3066 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3067 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3068 PrefetchPixelLinesTime[k] *
3069 dml_max(1.0, ExpectedVRatioPrefetch) *
3070 dml_max(1.0, ExpectedVRatioPrefetch / 4);
3071 if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3072 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3073 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3074 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3075 NormalEfficiency / ReturnBusWidth;
3076 }
3077 } else {
3078 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3079 }
3080 if (DynamicMetadataEnable[k] == true) {
3081 double TSetupPipe;
3082 double TdmbfPipe;
3083 double TdmsksPipe;
3084 double TdmecPipe;
3085 double AllowedTimeForUrgentExtraLatency;
3086
3087 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3088 MaxInterDCNTileRepeaters,
3089 RequiredDPPCLKPerSurface[i][j][k],
3090 RequiredDISPCLK[i][j],
3091 ProjectedDCFClkDeepSleep[i][j],
3092 PixelClock[k],
3093 HTotal[k],
3094 VTotal[k] - VActive[k],
3095 DynamicMetadataTransmittedBytes[k],
3096 DynamicMetadataLinesBeforeActiveRequired[k],
3097 Interlace[k],
3098 ProgressiveToInterlaceUnitInOPP,
3099
3100 /* output */
3101 &TSetupPipe,
3102 &TdmbfPipe,
3103 &TdmecPipe,
3104 &TdmsksPipe,
3105 &dummy1,
3106 &dummy2,
3107 &dummy3);
3108 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3109 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3110 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3111 if (AllowedTimeForUrgentExtraLatency > 0)
3112 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3113 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3114 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3115 else
3116 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3117 }
3118 }
3119 DCFCLKRequiredForPeakBandwidth = 0;
3120 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3121 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3122 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3123 }
3124 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3125 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3126 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3127 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3128 double MaximumTvmPlus2Tr0PlusTsw;
3129
3130 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3131 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3132 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3133 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3134 } else {
3135 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3136 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3137 MinimumTvmPlus2Tr0 -
3138 PrefetchPixelLinesTime[k] / 4),
3139 (2 * ExtraLatencyCycles +
3140 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3141 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3142 }
3143 }
3144 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3145 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3146 }
3147 }
3148 }
3149
dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3150 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3151 unsigned int TotalNumberOfActiveDPP,
3152 unsigned int PixelChunkSizeInKByte,
3153 unsigned int TotalNumberOfDCCActiveDPP,
3154 unsigned int MetaChunkSize,
3155 bool GPUVMEnable,
3156 bool HostVMEnable,
3157 unsigned int NumberOfActiveSurfaces,
3158 unsigned int NumberOfDPP[],
3159 unsigned int dpte_group_bytes[],
3160 double HostVMInefficiencyFactor,
3161 double HostVMMinPageSize,
3162 unsigned int HostVMMaxNonCachedPageTableLevels)
3163 {
3164 unsigned int k;
3165 double ret;
3166 unsigned int HostVMDynamicLevels;
3167
3168 if (GPUVMEnable == true && HostVMEnable == true) {
3169 if (HostVMMinPageSize < 2048)
3170 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3171 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3172 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3173 else
3174 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3175 } else {
3176 HostVMDynamicLevels = 0;
3177 }
3178
3179 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3180 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3181
3182 if (GPUVMEnable == true) {
3183 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3184 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3185 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3186 }
3187 }
3188 return ret;
3189 }
3190
dml32_CalculateVUpdateAndDynamicMetadataParameters(unsigned int MaxInterDCNTileRepeaters,double Dppclk,double Dispclk,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,unsigned int DynamicMetadataLinesBeforeActiveRequired,unsigned int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * TSetup,double * Tdmbf,double * Tdmec,double * Tdmsks,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3191 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3192 unsigned int MaxInterDCNTileRepeaters,
3193 double Dppclk,
3194 double Dispclk,
3195 double DCFClkDeepSleep,
3196 double PixelClock,
3197 unsigned int HTotal,
3198 unsigned int VBlank,
3199 unsigned int DynamicMetadataTransmittedBytes,
3200 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3201 unsigned int InterlaceEnable,
3202 bool ProgressiveToInterlaceUnitInOPP,
3203
3204 /* output */
3205 double *TSetup,
3206 double *Tdmbf,
3207 double *Tdmec,
3208 double *Tdmsks,
3209 unsigned int *VUpdateOffsetPix,
3210 double *VUpdateWidthPix,
3211 double *VReadyOffsetPix)
3212 {
3213 double TotalRepeaterDelayTime;
3214
3215 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3216 *VUpdateWidthPix =
3217 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3218 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk,
3219 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3220 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3221 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3222 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3223 *Tdmec = HTotal / PixelClock;
3224
3225 if (DynamicMetadataLinesBeforeActiveRequired == 0)
3226 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3227 else
3228 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3229
3230 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3231 *Tdmsks = *Tdmsks / 2;
3232 #ifdef __DML_VBA_DEBUG__
3233 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3234 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3235 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3236
3237 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3238 __func__, DynamicMetadataLinesBeforeActiveRequired);
3239 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3240 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3241 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3242 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3243 #endif
3244 }
3245
dml32_CalculateTWait(unsigned int PrefetchMode,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,bool DRRDisplay,double DRAMClockChangeLatency,double FCLKChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3246 double dml32_CalculateTWait(
3247 unsigned int PrefetchMode,
3248 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3249 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3250 bool DRRDisplay,
3251 double DRAMClockChangeLatency,
3252 double FCLKChangeLatency,
3253 double UrgentLatency,
3254 double SREnterPlusExitTime)
3255 {
3256 double TWait = 0.0;
3257
3258 if (PrefetchMode == 0 &&
3259 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3260 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3261 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3262 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3263 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3264 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3265 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3266 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3267 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3268 } else {
3269 TWait = UrgentLatency;
3270 }
3271
3272 #ifdef __DML_VBA_DEBUG__
3273 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3274 dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3275 #endif
3276 return TWait;
3277 } // CalculateTWait
3278
3279 // Function: get_return_bw_mbps
3280 // Megabyte per second
dml32_get_return_bw_mbps(const soc_bounding_box_st * soc,const int VoltageLevel,const bool HostVMEnable,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3281 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3282 const int VoltageLevel,
3283 const bool HostVMEnable,
3284 const double DCFCLK,
3285 const double FabricClock,
3286 const double DRAMSpeed)
3287 {
3288 double ReturnBW = 0.;
3289 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3290 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3291 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3292 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3293 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3294 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3295 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3296 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3297 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3298 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3299 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3300
3301 if (HostVMEnable != true)
3302 ReturnBW = PixelDataOnlyReturnBW;
3303 else
3304 ReturnBW = PixelMixedWithVMDataReturnBW;
3305
3306 #ifdef __DML_VBA_DEBUG__
3307 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3308 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3309 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3310 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3311 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3312 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
3313 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
3314 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
3315 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
3316 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3317 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
3318 #endif
3319 return ReturnBW;
3320 }
3321
3322 // Function: get_return_bw_mbps_vm_only
3323 // Megabyte per second
dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st * soc,const int VoltageLevel,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3324 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3325 const int VoltageLevel,
3326 const double DCFCLK,
3327 const double FabricClock,
3328 const double DRAMSpeed)
3329 {
3330 double VMDataOnlyReturnBW = dml_min3(
3331 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3332 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3333 * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3334 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3335 * (VoltageLevel < 2 ?
3336 soc->pct_ideal_dram_bw_after_urgent_strobe :
3337 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3338 #ifdef __DML_VBA_DEBUG__
3339 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3340 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3341 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3342 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3343 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3344 #endif
3345 return VMDataOnlyReturnBW;
3346 }
3347
dml32_CalculateExtraLatency(unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,double DCFCLK,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3348 double dml32_CalculateExtraLatency(
3349 unsigned int RoundTripPingLatencyCycles,
3350 unsigned int ReorderingBytes,
3351 double DCFCLK,
3352 unsigned int TotalNumberOfActiveDPP,
3353 unsigned int PixelChunkSizeInKByte,
3354 unsigned int TotalNumberOfDCCActiveDPP,
3355 unsigned int MetaChunkSize,
3356 double ReturnBW,
3357 bool GPUVMEnable,
3358 bool HostVMEnable,
3359 unsigned int NumberOfActiveSurfaces,
3360 unsigned int NumberOfDPP[],
3361 unsigned int dpte_group_bytes[],
3362 double HostVMInefficiencyFactor,
3363 double HostVMMinPageSize,
3364 unsigned int HostVMMaxNonCachedPageTableLevels)
3365 {
3366 double ExtraLatencyBytes;
3367 double ExtraLatency;
3368
3369 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3370 ReorderingBytes,
3371 TotalNumberOfActiveDPP,
3372 PixelChunkSizeInKByte,
3373 TotalNumberOfDCCActiveDPP,
3374 MetaChunkSize,
3375 GPUVMEnable,
3376 HostVMEnable,
3377 NumberOfActiveSurfaces,
3378 NumberOfDPP,
3379 dpte_group_bytes,
3380 HostVMInefficiencyFactor,
3381 HostVMMinPageSize,
3382 HostVMMaxNonCachedPageTableLevels);
3383
3384 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3385
3386 #ifdef __DML_VBA_DEBUG__
3387 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3388 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3389 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3390 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3391 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3392 #endif
3393
3394 return ExtraLatency;
3395 } // CalculateExtraLatency
3396
dml32_CalculatePrefetchSchedule(struct vba_vars_st * v,unsigned int k,double HostVMInefficiencyFactor,DmlPipe * myPipe,unsigned int DSCDelay,unsigned int DPP_RECOUT_WIDTH,unsigned int VStartup,unsigned int MaxVStartup,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,unsigned int VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,unsigned int VInitPreFillC,unsigned int MaxNumSwathC,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double TPreReq,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3397 bool dml32_CalculatePrefetchSchedule(
3398 struct vba_vars_st *v,
3399 unsigned int k,
3400 double HostVMInefficiencyFactor,
3401 DmlPipe *myPipe,
3402 unsigned int DSCDelay,
3403 unsigned int DPP_RECOUT_WIDTH,
3404 unsigned int VStartup,
3405 unsigned int MaxVStartup,
3406 double UrgentLatency,
3407 double UrgentExtraLatency,
3408 double TCalc,
3409 unsigned int PDEAndMetaPTEBytesFrame,
3410 unsigned int MetaRowByte,
3411 unsigned int PixelPTEBytesPerRow,
3412 double PrefetchSourceLinesY,
3413 unsigned int SwathWidthY,
3414 unsigned int VInitPreFillY,
3415 unsigned int MaxNumSwathY,
3416 double PrefetchSourceLinesC,
3417 unsigned int SwathWidthC,
3418 unsigned int VInitPreFillC,
3419 unsigned int MaxNumSwathC,
3420 unsigned int swath_width_luma_ub,
3421 unsigned int swath_width_chroma_ub,
3422 unsigned int SwathHeightY,
3423 unsigned int SwathHeightC,
3424 double TWait,
3425 double TPreReq,
3426 /* Output */
3427 double *DSTXAfterScaler,
3428 double *DSTYAfterScaler,
3429 double *DestinationLinesForPrefetch,
3430 double *PrefetchBandwidth,
3431 double *DestinationLinesToRequestVMInVBlank,
3432 double *DestinationLinesToRequestRowInVBlank,
3433 double *VRatioPrefetchY,
3434 double *VRatioPrefetchC,
3435 double *RequiredPrefetchPixDataBWLuma,
3436 double *RequiredPrefetchPixDataBWChroma,
3437 bool *NotEnoughTimeForDynamicMetadata,
3438 double *Tno_bw,
3439 double *prefetch_vmrow_bw,
3440 double *Tdmdl_vm,
3441 double *Tdmdl,
3442 double *TSetup,
3443 unsigned int *VUpdateOffsetPix,
3444 double *VUpdateWidthPix,
3445 double *VReadyOffsetPix)
3446 {
3447 double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3448 bool MyError = false;
3449 unsigned int DPPCycles, DISPCLKCycles;
3450 double DSTTotalPixelsAfterScaler;
3451 double LineTime;
3452 double dst_y_prefetch_equ;
3453 double prefetch_bw_oto;
3454 double Tvm_oto;
3455 double Tr0_oto;
3456 double Tvm_oto_lines;
3457 double Tr0_oto_lines;
3458 double dst_y_prefetch_oto;
3459 double TimeForFetchingMetaPTE = 0;
3460 double TimeForFetchingRowInVBlank = 0;
3461 double LinesToRequestPrefetchPixelData = 0;
3462 double LinesForPrefetchBandwidth = 0;
3463 unsigned int HostVMDynamicLevelsTrips;
3464 double trip_to_mem;
3465 double Tvm_trips;
3466 double Tr0_trips;
3467 double Tvm_trips_rounded;
3468 double Tr0_trips_rounded;
3469 double Lsw_oto;
3470 double Tpre_rounded;
3471 double prefetch_bw_equ;
3472 double Tvm_equ;
3473 double Tr0_equ;
3474 double Tdmbf;
3475 double Tdmec;
3476 double Tdmsks;
3477 double prefetch_sw_bytes;
3478 double bytes_pp;
3479 double dep_bytes;
3480 unsigned int max_vratio_pre = v->MaxVRatioPre;
3481 double min_Lsw;
3482 double Tsw_est1 = 0;
3483 double Tsw_est3 = 0;
3484
3485 if (v->GPUVMEnable == true && v->HostVMEnable == true)
3486 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3487 else
3488 HostVMDynamicLevelsTrips = 0;
3489 #ifdef __DML_VBA_DEBUG__
3490 dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
3491 dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
3492 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3493 dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3494 __func__, v->HostVMEnable, HostVMInefficiencyFactor);
3495 #endif
3496 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3497 v->MaxInterDCNTileRepeaters,
3498 myPipe->Dppclk,
3499 myPipe->Dispclk,
3500 myPipe->DCFClkDeepSleep,
3501 myPipe->PixelClock,
3502 myPipe->HTotal,
3503 myPipe->VBlank,
3504 v->DynamicMetadataTransmittedBytes[k],
3505 v->DynamicMetadataLinesBeforeActiveRequired[k],
3506 myPipe->InterlaceEnable,
3507 myPipe->ProgressiveToInterlaceUnitInOPP,
3508 TSetup,
3509
3510 /* output */
3511 &Tdmbf,
3512 &Tdmec,
3513 &Tdmsks,
3514 VUpdateOffsetPix,
3515 VUpdateWidthPix,
3516 VReadyOffsetPix);
3517
3518 LineTime = myPipe->HTotal / myPipe->PixelClock;
3519 trip_to_mem = UrgentLatency;
3520 Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3521
3522 if (v->DynamicMetadataVMEnabled == true)
3523 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
3524 else
3525 *Tdmdl = TWait + UrgentExtraLatency;
3526
3527 #ifdef __DML_VBA_ALLOW_DELTA__
3528 if (v->DynamicMetadataEnable[k] == false)
3529 *Tdmdl = 0.0;
3530 #endif
3531
3532 if (v->DynamicMetadataEnable[k] == true) {
3533 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3534 *NotEnoughTimeForDynamicMetadata = true;
3535 #ifdef __DML_VBA_DEBUG__
3536 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3537 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3538 __func__, Tdmbf);
3539 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3540 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3541 __func__, Tdmsks);
3542 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3543 __func__, *Tdmdl);
3544 #endif
3545 } else {
3546 *NotEnoughTimeForDynamicMetadata = false;
3547 }
3548 } else {
3549 *NotEnoughTimeForDynamicMetadata = false;
3550 }
3551
3552 *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
3553 v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
3554
3555 if (myPipe->ScalerEnabled)
3556 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3557 else
3558 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3559
3560 DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3561
3562 DISPCLKCycles = v->DISPCLKDelaySubtotal;
3563
3564 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3565 return true;
3566
3567 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3568 myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3569
3570 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3571 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3572 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3573 myPipe->HActive / 2 : 0)
3574 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3575
3576 #ifdef __DML_VBA_DEBUG__
3577 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3578 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3579 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3580 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3581 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
3582 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
3583 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
3584 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3585 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
3586 #endif
3587
3588 if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3589 *DSTYAfterScaler = 1;
3590 else
3591 *DSTYAfterScaler = 0;
3592
3593 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3594 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3595 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3596 #ifdef __DML_VBA_DEBUG__
3597 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
3598 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3599 #endif
3600
3601 MyError = false;
3602
3603 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3604
3605 if (v->GPUVMEnable == true) {
3606 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3607 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3608 if (v->GPUVMMaxPageTableLevels >= 3) {
3609 *Tno_bw = UrgentExtraLatency + trip_to_mem *
3610 (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3611 } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
3612 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3613 4.0 * LineTime; // VBA_ERROR
3614 *Tno_bw = UrgentExtraLatency;
3615 } else {
3616 *Tno_bw = 0;
3617 }
3618 } else if (myPipe->DCCEnable == true) {
3619 Tvm_trips_rounded = LineTime / 4.0;
3620 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3621 *Tno_bw = 0;
3622 } else {
3623 Tvm_trips_rounded = LineTime / 4.0;
3624 Tr0_trips_rounded = LineTime / 2.0;
3625 *Tno_bw = 0;
3626 }
3627 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3628 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3629
3630 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3631 || myPipe->SourcePixelFormat == dm_420_12) {
3632 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3633 } else {
3634 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3635 }
3636
3637 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3638 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3639 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3640 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3641
3642 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3643 min_Lsw = dml_max(min_Lsw, 1.0);
3644 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3645
3646 if (v->GPUVMEnable == true) {
3647 Tvm_oto = dml_max3(
3648 Tvm_trips,
3649 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3650 LineTime / 4.0);
3651 } else
3652 Tvm_oto = LineTime / 4.0;
3653
3654 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3655 Tr0_oto = dml_max4(
3656 Tr0_trips,
3657 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3658 (LineTime - Tvm_oto)/2.0,
3659 LineTime / 4.0);
3660 #ifdef __DML_VBA_DEBUG__
3661 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3662 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3663 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3664 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3665 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3666 #endif
3667 } else
3668 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3669
3670 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3671 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3672 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3673
3674 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3675 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3676
3677 dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
3678 #ifdef __DML_VBA_DEBUG__
3679 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3680 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3681 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3682 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3683 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3684 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3685 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3686 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3687 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3688 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3689 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3690 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3691 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3692 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3693 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3694 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3695 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3696 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3697 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3698 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3699 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3700 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3701 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3702 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3703 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3704 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3705 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3706 #endif
3707
3708 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3709 Tpre_rounded = dst_y_prefetch_equ * LineTime;
3710 #ifdef __DML_VBA_DEBUG__
3711 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3712 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3713 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3714 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3715 __func__, VStartup * LineTime);
3716 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3717 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3718 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3719 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3720 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3721 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3722 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3723 __func__, *DSTYAfterScaler);
3724 #endif
3725 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3726 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3727
3728 if (prefetch_sw_bytes < dep_bytes)
3729 prefetch_sw_bytes = 2 * dep_bytes;
3730
3731 *PrefetchBandwidth = 0;
3732 *DestinationLinesToRequestVMInVBlank = 0;
3733 *DestinationLinesToRequestRowInVBlank = 0;
3734 *VRatioPrefetchY = 0;
3735 *VRatioPrefetchC = 0;
3736 *RequiredPrefetchPixDataBWLuma = 0;
3737 if (dst_y_prefetch_equ > 1 &&
3738 (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
3739 double PrefetchBandwidth1;
3740 double PrefetchBandwidth2;
3741 double PrefetchBandwidth3;
3742 double PrefetchBandwidth4;
3743
3744 if (Tpre_rounded - *Tno_bw > 0) {
3745 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3746 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3747 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3748 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3749 } else
3750 PrefetchBandwidth1 = 0;
3751
3752 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3753 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3754 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3755 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3756 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3757 }
3758
3759 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3760 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3761 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3762 else
3763 PrefetchBandwidth2 = 0;
3764
3765 if (Tpre_rounded - Tvm_trips_rounded > 0) {
3766 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3767 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3768 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3769 } else
3770 PrefetchBandwidth3 = 0;
3771
3772
3773 if (VStartup == MaxVStartup &&
3774 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3775 LineTime - Tvm_trips_rounded > 0) {
3776 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3777 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3778 }
3779
3780 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3781 PrefetchBandwidth4 = prefetch_sw_bytes /
3782 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3783 } else {
3784 PrefetchBandwidth4 = 0;
3785 }
3786
3787 #ifdef __DML_VBA_DEBUG__
3788 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3789 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3790 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3791 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3792 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3793 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3794 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3795 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3796 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3797 #endif
3798 {
3799 bool Case1OK;
3800 bool Case2OK;
3801 bool Case3OK;
3802
3803 if (PrefetchBandwidth1 > 0) {
3804 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3805 >= Tvm_trips_rounded
3806 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3807 / PrefetchBandwidth1 >= Tr0_trips_rounded) {
3808 Case1OK = true;
3809 } else {
3810 Case1OK = false;
3811 }
3812 } else {
3813 Case1OK = false;
3814 }
3815
3816 if (PrefetchBandwidth2 > 0) {
3817 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3818 >= Tvm_trips_rounded
3819 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3820 / PrefetchBandwidth2 < Tr0_trips_rounded) {
3821 Case2OK = true;
3822 } else {
3823 Case2OK = false;
3824 }
3825 } else {
3826 Case2OK = false;
3827 }
3828
3829 if (PrefetchBandwidth3 > 0) {
3830 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3831 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3832 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3833 Tr0_trips_rounded) {
3834 Case3OK = true;
3835 } else {
3836 Case3OK = false;
3837 }
3838 } else {
3839 Case3OK = false;
3840 }
3841
3842 if (Case1OK)
3843 prefetch_bw_equ = PrefetchBandwidth1;
3844 else if (Case2OK)
3845 prefetch_bw_equ = PrefetchBandwidth2;
3846 else if (Case3OK)
3847 prefetch_bw_equ = PrefetchBandwidth3;
3848 else
3849 prefetch_bw_equ = PrefetchBandwidth4;
3850
3851 #ifdef __DML_VBA_DEBUG__
3852 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3853 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3854 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3855 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3856 #endif
3857
3858 if (prefetch_bw_equ > 0) {
3859 if (v->GPUVMEnable == true) {
3860 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3861 HostVMInefficiencyFactor / prefetch_bw_equ,
3862 Tvm_trips, LineTime / 4);
3863 } else {
3864 Tvm_equ = LineTime / 4;
3865 }
3866
3867 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3868 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3869 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3870 (LineTime - Tvm_equ) / 2, LineTime / 4);
3871 } else {
3872 Tr0_equ = (LineTime - Tvm_equ) / 2;
3873 }
3874 } else {
3875 Tvm_equ = 0;
3876 Tr0_equ = 0;
3877 #ifdef __DML_VBA_DEBUG__
3878 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3879 #endif
3880 }
3881 }
3882
3883 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3884 if (dst_y_prefetch_oto * LineTime < TPreReq) {
3885 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3886 } else {
3887 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
3888 }
3889 TimeForFetchingMetaPTE = Tvm_oto;
3890 TimeForFetchingRowInVBlank = Tr0_oto;
3891 *PrefetchBandwidth = prefetch_bw_oto;
3892 /* Clamp to oto for bandwidth calculation */
3893 LinesForPrefetchBandwidth = dst_y_prefetch_oto;
3894 } else {
3895 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3896 TimeForFetchingMetaPTE = Tvm_equ;
3897 TimeForFetchingRowInVBlank = Tr0_equ;
3898 *PrefetchBandwidth = prefetch_bw_equ;
3899 /* Clamp to equ for bandwidth calculation */
3900 LinesForPrefetchBandwidth = dst_y_prefetch_equ;
3901 }
3902
3903 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3904
3905 *DestinationLinesToRequestRowInVBlank =
3906 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3907
3908 LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth -
3909 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3910
3911 #ifdef __DML_VBA_DEBUG__
3912 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3913 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3914 __func__, *DestinationLinesToRequestVMInVBlank);
3915 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3916 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3917 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3918 __func__, *DestinationLinesToRequestRowInVBlank);
3919 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3920 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3921 #endif
3922
3923 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3924 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3925 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3926 #ifdef __DML_VBA_DEBUG__
3927 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3928 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3929 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3930 #endif
3931 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3932 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3933 *VRatioPrefetchY =
3934 dml_max((double) PrefetchSourceLinesY /
3935 LinesToRequestPrefetchPixelData,
3936 (double) MaxNumSwathY * SwathHeightY /
3937 (LinesToRequestPrefetchPixelData -
3938 (VInitPreFillY - 3.0) / 2.0));
3939 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3940 } else {
3941 MyError = true;
3942 *VRatioPrefetchY = 0;
3943 }
3944 #ifdef __DML_VBA_DEBUG__
3945 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3946 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3947 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3948 #endif
3949 }
3950
3951 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3952 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3953
3954 #ifdef __DML_VBA_DEBUG__
3955 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3956 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3957 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3958 #endif
3959 if ((SwathHeightC > 4)) {
3960 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3961 *VRatioPrefetchC =
3962 dml_max(*VRatioPrefetchC,
3963 (double) MaxNumSwathC * SwathHeightC /
3964 (LinesToRequestPrefetchPixelData -
3965 (VInitPreFillC - 3.0) / 2.0));
3966 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3967 } else {
3968 MyError = true;
3969 *VRatioPrefetchC = 0;
3970 }
3971 #ifdef __DML_VBA_DEBUG__
3972 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3973 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3974 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3975 #endif
3976 }
3977
3978 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3979 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3980 / LineTime;
3981
3982 #ifdef __DML_VBA_DEBUG__
3983 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3984 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3985 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3986 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3987 __func__, *RequiredPrefetchPixDataBWLuma);
3988 #endif
3989 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3990 LinesToRequestPrefetchPixelData
3991 * myPipe->BytePerPixelC
3992 * swath_width_chroma_ub / LineTime;
3993 } else {
3994 MyError = true;
3995 #ifdef __DML_VBA_DEBUG__
3996 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3997 __func__, LinesToRequestPrefetchPixelData);
3998 #endif
3999 *VRatioPrefetchY = 0;
4000 *VRatioPrefetchC = 0;
4001 *RequiredPrefetchPixDataBWLuma = 0;
4002 *RequiredPrefetchPixDataBWChroma = 0;
4003 }
4004 #ifdef __DML_VBA_DEBUG__
4005 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
4006 (double)LinesToRequestPrefetchPixelData * LineTime +
4007 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
4008 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
4009 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
4010 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
4011 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
4012 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
4013 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
4014 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
4015 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
4016 PixelPTEBytesPerRow);
4017 #endif
4018 } else {
4019 MyError = true;
4020 #ifdef __DML_VBA_DEBUG__
4021 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
4022 __func__, dst_y_prefetch_equ);
4023 #endif
4024 }
4025
4026 {
4027 double prefetch_vm_bw;
4028 double prefetch_row_bw;
4029
4030 if (PDEAndMetaPTEBytesFrame == 0) {
4031 prefetch_vm_bw = 0;
4032 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
4033 #ifdef __DML_VBA_DEBUG__
4034 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4035 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4036 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4037 __func__, *DestinationLinesToRequestVMInVBlank);
4038 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4039 #endif
4040 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4041 (*DestinationLinesToRequestVMInVBlank * LineTime);
4042 #ifdef __DML_VBA_DEBUG__
4043 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4044 #endif
4045 } else {
4046 prefetch_vm_bw = 0;
4047 MyError = true;
4048 #ifdef __DML_VBA_DEBUG__
4049 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4050 __func__, *DestinationLinesToRequestVMInVBlank);
4051 #endif
4052 }
4053
4054 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4055 prefetch_row_bw = 0;
4056 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
4057 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4058 (*DestinationLinesToRequestRowInVBlank * LineTime);
4059
4060 #ifdef __DML_VBA_DEBUG__
4061 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4062 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4063 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4064 __func__, *DestinationLinesToRequestRowInVBlank);
4065 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4066 #endif
4067 } else {
4068 prefetch_row_bw = 0;
4069 MyError = true;
4070 #ifdef __DML_VBA_DEBUG__
4071 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4072 __func__, *DestinationLinesToRequestRowInVBlank);
4073 #endif
4074 }
4075
4076 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4077 }
4078
4079 if (MyError) {
4080 *PrefetchBandwidth = 0;
4081 TimeForFetchingMetaPTE = 0;
4082 TimeForFetchingRowInVBlank = 0;
4083 *DestinationLinesToRequestVMInVBlank = 0;
4084 *DestinationLinesToRequestRowInVBlank = 0;
4085 *DestinationLinesForPrefetch = 0;
4086 LinesToRequestPrefetchPixelData = 0;
4087 *VRatioPrefetchY = 0;
4088 *VRatioPrefetchC = 0;
4089 *RequiredPrefetchPixDataBWLuma = 0;
4090 *RequiredPrefetchPixDataBWChroma = 0;
4091 }
4092
4093 return MyError;
4094 } // CalculatePrefetchSchedule
4095
dml32_CalculateFlipSchedule(double HostVMInefficiencyFactor,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,bool use_one_row_for_frame_flip,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)4096 void dml32_CalculateFlipSchedule(
4097 double HostVMInefficiencyFactor,
4098 double UrgentExtraLatency,
4099 double UrgentLatency,
4100 unsigned int GPUVMMaxPageTableLevels,
4101 bool HostVMEnable,
4102 unsigned int HostVMMaxNonCachedPageTableLevels,
4103 bool GPUVMEnable,
4104 double HostVMMinPageSize,
4105 double PDEAndMetaPTEBytesPerFrame,
4106 double MetaRowBytes,
4107 double DPTEBytesPerRow,
4108 double BandwidthAvailableForImmediateFlip,
4109 unsigned int TotImmediateFlipBytes,
4110 enum source_format_class SourcePixelFormat,
4111 double LineTime,
4112 double VRatio,
4113 double VRatioChroma,
4114 double Tno_bw,
4115 bool DCCEnable,
4116 unsigned int dpte_row_height,
4117 unsigned int meta_row_height,
4118 unsigned int dpte_row_height_chroma,
4119 unsigned int meta_row_height_chroma,
4120 bool use_one_row_for_frame_flip,
4121
4122 /* Output */
4123 double *DestinationLinesToRequestVMInImmediateFlip,
4124 double *DestinationLinesToRequestRowInImmediateFlip,
4125 double *final_flip_bw,
4126 bool *ImmediateFlipSupportedForPipe)
4127 {
4128 double min_row_time = 0.0;
4129 unsigned int HostVMDynamicLevelsTrips;
4130 double TimeForFetchingMetaPTEImmediateFlip;
4131 double TimeForFetchingRowInVBlankImmediateFlip;
4132 double ImmediateFlipBW = 1.0;
4133
4134 if (GPUVMEnable == true && HostVMEnable == true)
4135 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4136 else
4137 HostVMDynamicLevelsTrips = 0;
4138
4139 #ifdef __DML_VBA_DEBUG__
4140 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4141 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4142 #endif
4143
4144 if (TotImmediateFlipBytes > 0) {
4145 if (use_one_row_for_frame_flip) {
4146 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4147 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4148 } else {
4149 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4150 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4151 }
4152 if (GPUVMEnable == true) {
4153 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4154 HostVMInefficiencyFactor / ImmediateFlipBW,
4155 UrgentExtraLatency + UrgentLatency *
4156 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4157 LineTime / 4.0);
4158 } else {
4159 TimeForFetchingMetaPTEImmediateFlip = 0;
4160 }
4161 if ((GPUVMEnable == true || DCCEnable == true)) {
4162 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4163 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4164 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4165 } else {
4166 TimeForFetchingRowInVBlankImmediateFlip = 0;
4167 }
4168
4169 *DestinationLinesToRequestVMInImmediateFlip =
4170 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4171 *DestinationLinesToRequestRowInImmediateFlip =
4172 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4173
4174 if (GPUVMEnable == true) {
4175 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4176 (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4177 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4178 (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4179 } else if ((GPUVMEnable == true || DCCEnable == true)) {
4180 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4181 (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4182 } else {
4183 *final_flip_bw = 0;
4184 }
4185 } else {
4186 TimeForFetchingMetaPTEImmediateFlip = 0;
4187 TimeForFetchingRowInVBlankImmediateFlip = 0;
4188 *DestinationLinesToRequestVMInImmediateFlip = 0;
4189 *DestinationLinesToRequestRowInImmediateFlip = 0;
4190 *final_flip_bw = 0;
4191 }
4192
4193 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4194 if (GPUVMEnable == true && DCCEnable != true) {
4195 min_row_time = dml_min(dpte_row_height *
4196 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4197 } else if (GPUVMEnable != true && DCCEnable == true) {
4198 min_row_time = dml_min(meta_row_height *
4199 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4200 } else {
4201 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4202 LineTime / VRatio, dpte_row_height_chroma * LineTime /
4203 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4204 }
4205 } else {
4206 if (GPUVMEnable == true && DCCEnable != true) {
4207 min_row_time = dpte_row_height * LineTime / VRatio;
4208 } else if (GPUVMEnable != true && DCCEnable == true) {
4209 min_row_time = meta_row_height * LineTime / VRatio;
4210 } else {
4211 min_row_time =
4212 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4213 }
4214 }
4215
4216 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4217 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4218 > min_row_time) {
4219 *ImmediateFlipSupportedForPipe = false;
4220 } else {
4221 *ImmediateFlipSupportedForPipe = true;
4222 }
4223
4224 #ifdef __DML_VBA_DEBUG__
4225 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4226 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4227 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4228 __func__, *DestinationLinesToRequestVMInImmediateFlip);
4229 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4230 __func__, *DestinationLinesToRequestRowInImmediateFlip);
4231 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4232 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4233 __func__, TimeForFetchingRowInVBlankImmediateFlip);
4234 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4235 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4236 #endif
4237 } // CalculateFlipSchedule
4238
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct vba_vars_st * v,unsigned int PrefetchMode,double DCFCLK,double ReturnBW,SOCParametersList mmSOCParameters,double SOCCLK,double DCFClkDeepSleep,unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double BytePerPixelDETY[],double BytePerPixelDETC[],double DSTXAfterScaler[],double DSTYAfterScaler[],bool UnboundedRequestEnabled,unsigned int CompressedBufferSizeInkByte,enum clock_change_support * DRAMClockChangeSupport,double MaxActiveDRAMClockChangeLatencySupported[],unsigned int SubViewportLinesNeededInMALL[],enum dm_fclock_change_support * FCLKChangeSupport,double * MinActiveFCLKChangeLatencySupported,bool * USRRetrainingSupport,double ActiveDRAMClockChangeLatencyMargin[])4239 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4240 struct vba_vars_st *v,
4241 unsigned int PrefetchMode,
4242 double DCFCLK,
4243 double ReturnBW,
4244 SOCParametersList mmSOCParameters,
4245 double SOCCLK,
4246 double DCFClkDeepSleep,
4247 unsigned int DETBufferSizeY[],
4248 unsigned int DETBufferSizeC[],
4249 unsigned int SwathHeightY[],
4250 unsigned int SwathHeightC[],
4251 double SwathWidthY[],
4252 double SwathWidthC[],
4253 unsigned int DPPPerSurface[],
4254 double BytePerPixelDETY[],
4255 double BytePerPixelDETC[],
4256 double DSTXAfterScaler[],
4257 double DSTYAfterScaler[],
4258 bool UnboundedRequestEnabled,
4259 unsigned int CompressedBufferSizeInkByte,
4260
4261 /* Output */
4262 enum clock_change_support *DRAMClockChangeSupport,
4263 double MaxActiveDRAMClockChangeLatencySupported[],
4264 unsigned int SubViewportLinesNeededInMALL[],
4265 enum dm_fclock_change_support *FCLKChangeSupport,
4266 double *MinActiveFCLKChangeLatencySupported,
4267 bool *USRRetrainingSupport,
4268 double ActiveDRAMClockChangeLatencyMargin[])
4269 {
4270 unsigned int i, j, k;
4271 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4272 unsigned int DRAMClockChangeSupportNumber = 0;
4273 unsigned int LastSurfaceWithoutMargin;
4274 unsigned int DRAMClockChangeMethod = 0;
4275 bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4276 double MinActiveFCLKChangeMargin = 0.;
4277 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4278 double ActiveClockChangeLatencyHidingY;
4279 double ActiveClockChangeLatencyHidingC;
4280 double ActiveClockChangeLatencyHiding;
4281 double EffectiveDETBufferSizeY;
4282 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4283 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4284 double TotalPixelBW = 0.0;
4285 bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4286 double EffectiveLBLatencyHidingY;
4287 double EffectiveLBLatencyHidingC;
4288 double LinesInDETY[DC__NUM_DPP__MAX];
4289 double LinesInDETC[DC__NUM_DPP__MAX];
4290 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4291 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4292 double FullDETBufferingTimeY;
4293 double FullDETBufferingTimeC;
4294 double WritebackDRAMClockChangeLatencyMargin;
4295 double WritebackFCLKChangeLatencyMargin;
4296 double WritebackLatencyHiding;
4297 bool SameTimingForFCLKChange;
4298
4299 unsigned int TotalActiveWriteback = 0;
4300 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4301 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4302
4303 v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4304 v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4305 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4306 v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4307 v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4308 v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4309 + 10 / DCFClkDeepSleep;
4310 v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4311 + 10 / DCFClkDeepSleep;
4312 v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4313 + 10 / DCFClkDeepSleep;
4314 v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4315 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4316
4317 #ifdef __DML_VBA_DEBUG__
4318 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4319 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4320 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4321 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
4322 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
4323 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
4324 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
4325 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
4326 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
4327 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
4328 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4329 __func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
4330 #endif
4331
4332
4333 TotalActiveWriteback = 0;
4334 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4335 if (v->WritebackEnable[k] == true)
4336 TotalActiveWriteback = TotalActiveWriteback + 1;
4337 }
4338
4339 if (TotalActiveWriteback <= 1) {
4340 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4341 } else {
4342 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4343 + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4344 }
4345 if (v->USRRetrainingRequiredFinal)
4346 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4347 + mmSOCParameters.USRRetrainingLatency;
4348
4349 if (TotalActiveWriteback <= 1) {
4350 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4351 + mmSOCParameters.WritebackLatency;
4352 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4353 + mmSOCParameters.WritebackLatency;
4354 } else {
4355 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4356 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4357 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4358 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4359 }
4360
4361 if (v->USRRetrainingRequiredFinal)
4362 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4363 + mmSOCParameters.USRRetrainingLatency;
4364
4365 if (v->USRRetrainingRequiredFinal)
4366 v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4367 + mmSOCParameters.USRRetrainingLatency;
4368
4369 #ifdef __DML_VBA_DEBUG__
4370 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4371 __func__, v->Watermark.WritebackDRAMClockChangeWatermark);
4372 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
4373 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
4374 dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
4375 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4376 #endif
4377
4378 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4379 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4380 SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4381 }
4382
4383 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4384
4385 LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
4386 LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
4387
4388
4389 #ifdef __DML_VBA_DEBUG__
4390 dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
4391 dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal);
4392 dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]);
4393 dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]);
4394 dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]);
4395 #endif
4396
4397 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4398 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4399 EffectiveDETBufferSizeY = DETBufferSizeY[k];
4400
4401 if (UnboundedRequestEnabled) {
4402 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4403 + CompressedBufferSizeInkByte * 1024
4404 * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4405 / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4406 }
4407
4408 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4409 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4410 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4411
4412 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4413 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4414
4415 if (v->NumberOfActiveSurfaces > 1) {
4416 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4417 - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4418 / v->PixelClock[k] / v->VRatio[k];
4419 }
4420
4421 if (BytePerPixelDETC[k] > 0) {
4422 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4423 LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4424 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4425 / v->VRatioChroma[k];
4426 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4427 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4428 / v->PixelClock[k];
4429 if (v->NumberOfActiveSurfaces > 1) {
4430 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4431 - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4432 / v->PixelClock[k] / v->VRatioChroma[k];
4433 }
4434 ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4435 ActiveClockChangeLatencyHidingC);
4436 } else {
4437 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4438 }
4439
4440 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4441 - v->Watermark.DRAMClockChangeWatermark;
4442 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4443 - v->Watermark.FCLKChangeWatermark;
4444 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4445
4446 if (v->WritebackEnable[k]) {
4447 WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4448 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4449 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4450 if (v->WritebackPixelFormat[k] == dm_444_64)
4451 WritebackLatencyHiding = WritebackLatencyHiding / 2;
4452
4453 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4454 - v->Watermark.WritebackDRAMClockChangeWatermark;
4455
4456 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4457 - v->Watermark.WritebackFCLKChangeWatermark;
4458
4459 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4460 WritebackFCLKChangeLatencyMargin);
4461 ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4462 WritebackDRAMClockChangeLatencyMargin);
4463 }
4464 MaxActiveDRAMClockChangeLatencySupported[k] =
4465 (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4466 0 :
4467 (ActiveDRAMClockChangeLatencyMargin[k]
4468 + mmSOCParameters.DRAMClockChangeLatency);
4469 }
4470
4471 for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4472 for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4473 if (i == j ||
4474 (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4475 (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4476 (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4477 (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4478 v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4479 v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4480 (v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4481 SynchronizedSurfaces[i][j] = true;
4482 } else {
4483 SynchronizedSurfaces[i][j] = false;
4484 }
4485 }
4486 }
4487
4488 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4489 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4490 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4491 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4492 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4493 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4494 SurfaceWithMinActiveFCLKChangeMargin = k;
4495 }
4496 }
4497
4498 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4499
4500 SameTimingForFCLKChange = true;
4501 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4502 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4503 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4504 (SameTimingForFCLKChange ||
4505 ActiveFCLKChangeLatencyMargin[k] <
4506 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4507 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4508 }
4509 SameTimingForFCLKChange = false;
4510 }
4511 }
4512
4513 if (MinActiveFCLKChangeMargin > 0) {
4514 *FCLKChangeSupport = dm_fclock_change_vactive;
4515 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4516 (PrefetchMode <= 1)) {
4517 *FCLKChangeSupport = dm_fclock_change_vblank;
4518 } else {
4519 *FCLKChangeSupport = dm_fclock_change_unsupported;
4520 }
4521
4522 *USRRetrainingSupport = true;
4523 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4524 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4525 (USRRetrainingLatencyMargin[k] < 0)) {
4526 *USRRetrainingSupport = false;
4527 }
4528 }
4529
4530 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4531 if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4532 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4533 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4534 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4535 if (PrefetchMode > 0) {
4536 DRAMClockChangeSupportNumber = 2;
4537 } else if (DRAMClockChangeSupportNumber == 0) {
4538 DRAMClockChangeSupportNumber = 1;
4539 LastSurfaceWithoutMargin = k;
4540 } else if (DRAMClockChangeSupportNumber == 1 &&
4541 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4542 DRAMClockChangeSupportNumber = 2;
4543 }
4544 }
4545 }
4546
4547 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4548 if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4549 DRAMClockChangeMethod = 1;
4550 else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4551 DRAMClockChangeMethod = 2;
4552 }
4553
4554 if (DRAMClockChangeMethod == 0) {
4555 if (DRAMClockChangeSupportNumber == 0)
4556 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4557 else if (DRAMClockChangeSupportNumber == 1)
4558 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4559 else
4560 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4561 } else if (DRAMClockChangeMethod == 1) {
4562 if (DRAMClockChangeSupportNumber == 0)
4563 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4564 else if (DRAMClockChangeSupportNumber == 1)
4565 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4566 else
4567 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4568 } else {
4569 if (DRAMClockChangeSupportNumber == 0)
4570 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4571 else if (DRAMClockChangeSupportNumber == 1)
4572 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4573 else
4574 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4575 }
4576
4577 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4578 unsigned int dst_y_pstate;
4579 unsigned int src_y_pstate_l;
4580 unsigned int src_y_pstate_c;
4581 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4582
4583 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
4584 src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
4585 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4586 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4587
4588 #ifdef __DML_VBA_DEBUG__
4589 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
4590 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
4591 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
4592 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
4593 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4594 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
4595 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
4596 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
4597 dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]);
4598 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l);
4599 #endif
4600 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4601
4602 if (BytePerPixelDETC[k] > 0) {
4603 src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
4604 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4605 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4606 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4607
4608 #ifdef __DML_VBA_DEBUG__
4609 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c);
4610 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c);
4611 dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]);
4612 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c);
4613 #endif
4614 }
4615 }
4616 #ifdef __DML_VBA_DEBUG__
4617 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4618 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4619 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4620 __func__, *MinActiveFCLKChangeLatencySupported);
4621 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4622 #endif
4623 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4624
dml32_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,unsigned int WritebackSourceWidth,unsigned int WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize,double DISPCLKDPPCLKVCOSpeed)4625 double dml32_CalculateWriteBackDISPCLK(
4626 enum source_format_class WritebackPixelFormat,
4627 double PixelClock,
4628 double WritebackHRatio,
4629 double WritebackVRatio,
4630 unsigned int WritebackHTaps,
4631 unsigned int WritebackVTaps,
4632 unsigned int WritebackSourceWidth,
4633 unsigned int WritebackDestinationWidth,
4634 unsigned int HTotal,
4635 unsigned int WritebackLineBufferSize,
4636 double DISPCLKDPPCLKVCOSpeed)
4637 {
4638 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4639
4640 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4641 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4642 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4643 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4644 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4645 }
4646
dml32_CalculateMinAndMaxPrefetchMode(enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,unsigned int * MinPrefetchMode,unsigned int * MaxPrefetchMode)4647 void dml32_CalculateMinAndMaxPrefetchMode(
4648 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
4649 unsigned int *MinPrefetchMode,
4650 unsigned int *MaxPrefetchMode)
4651 {
4652 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4653 *MinPrefetchMode = 3;
4654 *MaxPrefetchMode = 3;
4655 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4656 *MinPrefetchMode = 2;
4657 *MaxPrefetchMode = 2;
4658 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4659 *MinPrefetchMode = 1;
4660 *MaxPrefetchMode = 1;
4661 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4662 *MinPrefetchMode = 0;
4663 *MaxPrefetchMode = 0;
4664 } else if (AllowForPStateChangeOrStutterInVBlankFinal ==
4665 dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
4666 *MinPrefetchMode = 0;
4667 *MaxPrefetchMode = 3;
4668 } else {
4669 *MinPrefetchMode = 0;
4670 *MaxPrefetchMode = 3;
4671 }
4672 } // CalculateMinAndMaxPrefetchMode
4673
dml32_CalculatePixelDeliveryTimes(unsigned int NumberOfActiveSurfaces,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])4674 void dml32_CalculatePixelDeliveryTimes(
4675 unsigned int NumberOfActiveSurfaces,
4676 double VRatio[],
4677 double VRatioChroma[],
4678 double VRatioPrefetchY[],
4679 double VRatioPrefetchC[],
4680 unsigned int swath_width_luma_ub[],
4681 unsigned int swath_width_chroma_ub[],
4682 unsigned int DPPPerSurface[],
4683 double HRatio[],
4684 double HRatioChroma[],
4685 double PixelClock[],
4686 double PSCL_THROUGHPUT[],
4687 double PSCL_THROUGHPUT_CHROMA[],
4688 double Dppclk[],
4689 unsigned int BytePerPixelC[],
4690 enum dm_rotation_angle SourceRotation[],
4691 unsigned int NumberOfCursors[],
4692 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
4693 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
4694 unsigned int BlockWidth256BytesY[],
4695 unsigned int BlockHeight256BytesY[],
4696 unsigned int BlockWidth256BytesC[],
4697 unsigned int BlockHeight256BytesC[],
4698
4699 /* Output */
4700 double DisplayPipeLineDeliveryTimeLuma[],
4701 double DisplayPipeLineDeliveryTimeChroma[],
4702 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
4703 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
4704 double DisplayPipeRequestDeliveryTimeLuma[],
4705 double DisplayPipeRequestDeliveryTimeChroma[],
4706 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4707 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4708 double CursorRequestDeliveryTime[],
4709 double CursorRequestDeliveryTimePrefetch[])
4710 {
4711 double req_per_swath_ub;
4712 unsigned int k;
4713
4714 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4715
4716 #ifdef __DML_VBA_DEBUG__
4717 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4718 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4719 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4720 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4721 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4722 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4723 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4724 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4725 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4726 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4727 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4728 #endif
4729
4730 if (VRatio[k] <= 1) {
4731 DisplayPipeLineDeliveryTimeLuma[k] =
4732 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4733 } else {
4734 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4735 }
4736
4737 if (BytePerPixelC[k] == 0) {
4738 DisplayPipeLineDeliveryTimeChroma[k] = 0;
4739 } else {
4740 if (VRatioChroma[k] <= 1) {
4741 DisplayPipeLineDeliveryTimeChroma[k] =
4742 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4743 } else {
4744 DisplayPipeLineDeliveryTimeChroma[k] =
4745 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4746 }
4747 }
4748
4749 if (VRatioPrefetchY[k] <= 1) {
4750 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4751 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4752 } else {
4753 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4754 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4755 }
4756
4757 if (BytePerPixelC[k] == 0) {
4758 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4759 } else {
4760 if (VRatioPrefetchC[k] <= 1) {
4761 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4762 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4763 } else {
4764 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4765 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4766 }
4767 }
4768 #ifdef __DML_VBA_DEBUG__
4769 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4770 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4771 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4772 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4773 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4774 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4775 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4776 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4777 #endif
4778 }
4779
4780 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4781 if (!IsVertical(SourceRotation[k]))
4782 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4783 else
4784 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4785 #ifdef __DML_VBA_DEBUG__
4786 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4787 #endif
4788
4789 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4790 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4791 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4792 if (BytePerPixelC[k] == 0) {
4793 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4794 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4795 } else {
4796 if (!IsVertical(SourceRotation[k]))
4797 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4798 else
4799 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4800 #ifdef __DML_VBA_DEBUG__
4801 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4802 #endif
4803 DisplayPipeRequestDeliveryTimeChroma[k] =
4804 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4805 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4806 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4807 }
4808 #ifdef __DML_VBA_DEBUG__
4809 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4810 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4811 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4812 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4813 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4814 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4815 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4816 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4817 #endif
4818 }
4819
4820 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4821 unsigned int cursor_req_per_width;
4822
4823 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4824 256.0 / 8.0, 1.0);
4825 if (NumberOfCursors[k] > 0) {
4826 if (VRatio[k] <= 1) {
4827 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4828 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4829 } else {
4830 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4831 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4832 }
4833 if (VRatioPrefetchY[k] <= 1) {
4834 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4835 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4836 } else {
4837 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4838 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4839 }
4840 } else {
4841 CursorRequestDeliveryTime[k] = 0;
4842 CursorRequestDeliveryTimePrefetch[k] = 0;
4843 }
4844 #ifdef __DML_VBA_DEBUG__
4845 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4846 __func__, k, NumberOfCursors[k]);
4847 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4848 __func__, k, CursorRequestDeliveryTime[k]);
4849 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4850 __func__, k, CursorRequestDeliveryTimePrefetch[k]);
4851 #endif
4852 }
4853 } // CalculatePixelDeliveryTimes
4854
dml32_CalculateMetaAndPTETimes(bool use_one_row_for_frame[],unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int MetaChunkSize,unsigned int MinMetaChunkSizeBytes,unsigned int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int dpte_row_height[],unsigned int dpte_row_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int dpte_group_bytes[],unsigned int PTERequestSizeY[],unsigned int PTERequestSizeC[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])4855 void dml32_CalculateMetaAndPTETimes(
4856 bool use_one_row_for_frame[],
4857 unsigned int NumberOfActiveSurfaces,
4858 bool GPUVMEnable,
4859 unsigned int MetaChunkSize,
4860 unsigned int MinMetaChunkSizeBytes,
4861 unsigned int HTotal[],
4862 double VRatio[],
4863 double VRatioChroma[],
4864 double DestinationLinesToRequestRowInVBlank[],
4865 double DestinationLinesToRequestRowInImmediateFlip[],
4866 bool DCCEnable[],
4867 double PixelClock[],
4868 unsigned int BytePerPixelY[],
4869 unsigned int BytePerPixelC[],
4870 enum dm_rotation_angle SourceRotation[],
4871 unsigned int dpte_row_height[],
4872 unsigned int dpte_row_height_chroma[],
4873 unsigned int meta_row_width[],
4874 unsigned int meta_row_width_chroma[],
4875 unsigned int meta_row_height[],
4876 unsigned int meta_row_height_chroma[],
4877 unsigned int meta_req_width[],
4878 unsigned int meta_req_width_chroma[],
4879 unsigned int meta_req_height[],
4880 unsigned int meta_req_height_chroma[],
4881 unsigned int dpte_group_bytes[],
4882 unsigned int PTERequestSizeY[],
4883 unsigned int PTERequestSizeC[],
4884 unsigned int PixelPTEReqWidthY[],
4885 unsigned int PixelPTEReqHeightY[],
4886 unsigned int PixelPTEReqWidthC[],
4887 unsigned int PixelPTEReqHeightC[],
4888 unsigned int dpte_row_width_luma_ub[],
4889 unsigned int dpte_row_width_chroma_ub[],
4890
4891 /* Output */
4892 double DST_Y_PER_PTE_ROW_NOM_L[],
4893 double DST_Y_PER_PTE_ROW_NOM_C[],
4894 double DST_Y_PER_META_ROW_NOM_L[],
4895 double DST_Y_PER_META_ROW_NOM_C[],
4896 double TimePerMetaChunkNominal[],
4897 double TimePerChromaMetaChunkNominal[],
4898 double TimePerMetaChunkVBlank[],
4899 double TimePerChromaMetaChunkVBlank[],
4900 double TimePerMetaChunkFlip[],
4901 double TimePerChromaMetaChunkFlip[],
4902 double time_per_pte_group_nom_luma[],
4903 double time_per_pte_group_vblank_luma[],
4904 double time_per_pte_group_flip_luma[],
4905 double time_per_pte_group_nom_chroma[],
4906 double time_per_pte_group_vblank_chroma[],
4907 double time_per_pte_group_flip_chroma[])
4908 {
4909 unsigned int meta_chunk_width;
4910 unsigned int min_meta_chunk_width;
4911 unsigned int meta_chunk_per_row_int;
4912 unsigned int meta_row_remainder;
4913 unsigned int meta_chunk_threshold;
4914 unsigned int meta_chunks_per_row_ub;
4915 unsigned int meta_chunk_width_chroma;
4916 unsigned int min_meta_chunk_width_chroma;
4917 unsigned int meta_chunk_per_row_int_chroma;
4918 unsigned int meta_row_remainder_chroma;
4919 unsigned int meta_chunk_threshold_chroma;
4920 unsigned int meta_chunks_per_row_ub_chroma;
4921 unsigned int dpte_group_width_luma;
4922 unsigned int dpte_groups_per_row_luma_ub;
4923 unsigned int dpte_group_width_chroma;
4924 unsigned int dpte_groups_per_row_chroma_ub;
4925 unsigned int k;
4926
4927 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4928 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4929 if (BytePerPixelC[k] == 0)
4930 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4931 else
4932 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4933 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4934 if (BytePerPixelC[k] == 0)
4935 DST_Y_PER_META_ROW_NOM_C[k] = 0;
4936 else
4937 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4938 }
4939
4940 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4941 if (DCCEnable[k] == true) {
4942 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4943 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4944 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4945 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4946 if (!IsVertical(SourceRotation[k]))
4947 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4948 else
4949 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4950
4951 if (meta_row_remainder <= meta_chunk_threshold)
4952 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4953 else
4954 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4955
4956 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4957 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4958 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4959 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4960 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4961 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4962 if (BytePerPixelC[k] == 0) {
4963 TimePerChromaMetaChunkNominal[k] = 0;
4964 TimePerChromaMetaChunkVBlank[k] = 0;
4965 TimePerChromaMetaChunkFlip[k] = 0;
4966 } else {
4967 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4968 meta_row_height_chroma[k];
4969 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4970 meta_row_height_chroma[k];
4971 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4972 meta_chunk_width_chroma;
4973 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4974 if (!IsVertical(SourceRotation[k])) {
4975 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4976 meta_req_width_chroma[k];
4977 } else {
4978 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4979 meta_req_height_chroma[k];
4980 }
4981 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4982 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4983 else
4984 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4985
4986 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4987 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4988 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4989 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4990 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4991 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4992 }
4993 } else {
4994 TimePerMetaChunkNominal[k] = 0;
4995 TimePerMetaChunkVBlank[k] = 0;
4996 TimePerMetaChunkFlip[k] = 0;
4997 TimePerChromaMetaChunkNominal[k] = 0;
4998 TimePerChromaMetaChunkVBlank[k] = 0;
4999 TimePerChromaMetaChunkFlip[k] = 0;
5000 }
5001 }
5002
5003 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5004 if (GPUVMEnable == true) {
5005 if (!IsVertical(SourceRotation[k])) {
5006 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5007 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5008 } else {
5009 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5010 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5011 }
5012
5013 if (use_one_row_for_frame[k]) {
5014 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5015 (double) dpte_group_width_luma / 2.0, 1.0);
5016 } else {
5017 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5018 (double) dpte_group_width_luma, 1.0);
5019 }
5020 #ifdef __DML_VBA_DEBUG__
5021 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n",
5022 __func__, k, use_one_row_for_frame[k]);
5023 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n",
5024 __func__, k, dpte_group_bytes[k]);
5025 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n",
5026 __func__, k, PTERequestSizeY[k]);
5027 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n",
5028 __func__, k, PixelPTEReqWidthY[k]);
5029 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n",
5030 __func__, k, PixelPTEReqHeightY[k]);
5031 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n",
5032 __func__, k, dpte_row_width_luma_ub[k]);
5033 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n",
5034 __func__, k, dpte_group_width_luma);
5035 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n",
5036 __func__, k, dpte_groups_per_row_luma_ub);
5037 #endif
5038
5039 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5040 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5041 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5042 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5043 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5044 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5045 if (BytePerPixelC[k] == 0) {
5046 time_per_pte_group_nom_chroma[k] = 0;
5047 time_per_pte_group_vblank_chroma[k] = 0;
5048 time_per_pte_group_flip_chroma[k] = 0;
5049 } else {
5050 if (!IsVertical(SourceRotation[k])) {
5051 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5052 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5053 } else {
5054 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5055 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5056 }
5057
5058 if (use_one_row_for_frame[k]) {
5059 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5060 (double) dpte_group_width_chroma / 2.0, 1.0);
5061 } else {
5062 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5063 (double) dpte_group_width_chroma, 1.0);
5064 }
5065 #ifdef __DML_VBA_DEBUG__
5066 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n",
5067 __func__, k, dpte_row_width_chroma_ub[k]);
5068 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n",
5069 __func__, k, dpte_group_width_chroma);
5070 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n",
5071 __func__, k, dpte_groups_per_row_chroma_ub);
5072 #endif
5073 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5074 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5075 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5076 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5077 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5078 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5079 }
5080 } else {
5081 time_per_pte_group_nom_luma[k] = 0;
5082 time_per_pte_group_vblank_luma[k] = 0;
5083 time_per_pte_group_flip_luma[k] = 0;
5084 time_per_pte_group_nom_chroma[k] = 0;
5085 time_per_pte_group_vblank_chroma[k] = 0;
5086 time_per_pte_group_flip_chroma[k] = 0;
5087 }
5088 #ifdef __DML_VBA_DEBUG__
5089 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n",
5090 __func__, k, DestinationLinesToRequestRowInVBlank[k]);
5091 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n",
5092 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5093 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n",
5094 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5095 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n",
5096 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5097 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n",
5098 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5099 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n",
5100 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5101 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n",
5102 __func__, k, TimePerMetaChunkNominal[k]);
5103 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n",
5104 __func__, k, TimePerMetaChunkVBlank[k]);
5105 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n",
5106 __func__, k, TimePerMetaChunkFlip[k]);
5107 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n",
5108 __func__, k, TimePerChromaMetaChunkNominal[k]);
5109 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n",
5110 __func__, k, TimePerChromaMetaChunkVBlank[k]);
5111 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n",
5112 __func__, k, TimePerChromaMetaChunkFlip[k]);
5113 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n",
5114 __func__, k, time_per_pte_group_nom_luma[k]);
5115 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n",
5116 __func__, k, time_per_pte_group_vblank_luma[k]);
5117 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n",
5118 __func__, k, time_per_pte_group_flip_luma[k]);
5119 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n",
5120 __func__, k, time_per_pte_group_nom_chroma[k]);
5121 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5122 __func__, k, time_per_pte_group_vblank_chroma[k]);
5123 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n",
5124 __func__, k, time_per_pte_group_flip_chroma[k]);
5125 #endif
5126 }
5127 } // CalculateMetaAndPTETimes
5128
dml32_CalculateVMGroupAndRequestTimes(unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],unsigned int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5129 void dml32_CalculateVMGroupAndRequestTimes(
5130 unsigned int NumberOfActiveSurfaces,
5131 bool GPUVMEnable,
5132 unsigned int GPUVMMaxPageTableLevels,
5133 unsigned int HTotal[],
5134 unsigned int BytePerPixelC[],
5135 double DestinationLinesToRequestVMInVBlank[],
5136 double DestinationLinesToRequestVMInImmediateFlip[],
5137 bool DCCEnable[],
5138 double PixelClock[],
5139 unsigned int dpte_row_width_luma_ub[],
5140 unsigned int dpte_row_width_chroma_ub[],
5141 unsigned int vm_group_bytes[],
5142 unsigned int dpde0_bytes_per_frame_ub_l[],
5143 unsigned int dpde0_bytes_per_frame_ub_c[],
5144 unsigned int meta_pte_bytes_per_frame_ub_l[],
5145 unsigned int meta_pte_bytes_per_frame_ub_c[],
5146
5147 /* Output */
5148 double TimePerVMGroupVBlank[],
5149 double TimePerVMGroupFlip[],
5150 double TimePerVMRequestVBlank[],
5151 double TimePerVMRequestFlip[])
5152 {
5153 unsigned int k;
5154 unsigned int num_group_per_lower_vm_stage;
5155 unsigned int num_req_per_lower_vm_stage;
5156
5157 #ifdef __DML_VBA_DEBUG__
5158 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5159 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5160 #endif
5161 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5162
5163 #ifdef __DML_VBA_DEBUG__
5164 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5165 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5166 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5167 __func__, k, dpde0_bytes_per_frame_ub_l[k]);
5168 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5169 __func__, k, dpde0_bytes_per_frame_ub_c[k]);
5170 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5171 __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5172 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5173 __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5174 #endif
5175
5176 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5177 if (DCCEnable[k] == false) {
5178 if (BytePerPixelC[k] > 0) {
5179 num_group_per_lower_vm_stage = dml_ceil(
5180 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5181 (double) (vm_group_bytes[k]), 1.0) +
5182 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5183 (double) (vm_group_bytes[k]), 1.0);
5184 } else {
5185 num_group_per_lower_vm_stage = dml_ceil(
5186 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5187 (double) (vm_group_bytes[k]), 1.0);
5188 }
5189 } else {
5190 if (GPUVMMaxPageTableLevels == 1) {
5191 if (BytePerPixelC[k] > 0) {
5192 num_group_per_lower_vm_stage = dml_ceil(
5193 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5194 (double) (vm_group_bytes[k]), 1.0) +
5195 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5196 (double) (vm_group_bytes[k]), 1.0);
5197 } else {
5198 num_group_per_lower_vm_stage = dml_ceil(
5199 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5200 (double) (vm_group_bytes[k]), 1.0);
5201 }
5202 } else {
5203 if (BytePerPixelC[k] > 0) {
5204 num_group_per_lower_vm_stage = 2 + dml_ceil(
5205 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5206 (double) (vm_group_bytes[k]), 1) +
5207 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5208 (double) (vm_group_bytes[k]), 1) +
5209 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5210 (double) (vm_group_bytes[k]), 1) +
5211 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5212 (double) (vm_group_bytes[k]), 1);
5213 } else {
5214 num_group_per_lower_vm_stage = 1 + dml_ceil(
5215 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5216 (double) (vm_group_bytes[k]), 1) + dml_ceil(
5217 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5218 (double) (vm_group_bytes[k]), 1);
5219 }
5220 }
5221 }
5222
5223 if (DCCEnable[k] == false) {
5224 if (BytePerPixelC[k] > 0) {
5225 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5226 dpde0_bytes_per_frame_ub_c[k] / 64;
5227 } else {
5228 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5229 }
5230 } else {
5231 if (GPUVMMaxPageTableLevels == 1) {
5232 if (BytePerPixelC[k] > 0) {
5233 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5234 meta_pte_bytes_per_frame_ub_c[k] / 64;
5235 } else {
5236 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5237 }
5238 } else {
5239 if (BytePerPixelC[k] > 0) {
5240 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5241 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5242 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5243 meta_pte_bytes_per_frame_ub_c[k] / 64;
5244 } else {
5245 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5246 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5247 }
5248 }
5249 }
5250
5251 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5252 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5253 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5254 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5255 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5256 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5257 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5258 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5259
5260 if (GPUVMMaxPageTableLevels > 2) {
5261 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5262 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5263 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5264 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5265 }
5266
5267 } else {
5268 TimePerVMGroupVBlank[k] = 0;
5269 TimePerVMGroupFlip[k] = 0;
5270 TimePerVMRequestVBlank[k] = 0;
5271 TimePerVMRequestFlip[k] = 0;
5272 }
5273
5274 #ifdef __DML_VBA_DEBUG__
5275 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5276 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5277 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5278 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5279 #endif
5280 }
5281 } // CalculateVMGroupAndRequestTimes
5282
dml32_CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,unsigned int nomDETInKByte,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum dm_rotation_angle SourceRotation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)5283 void dml32_CalculateDCCConfiguration(
5284 bool DCCEnabled,
5285 bool DCCProgrammingAssumesScanDirectionUnknown,
5286 enum source_format_class SourcePixelFormat,
5287 unsigned int SurfaceWidthLuma,
5288 unsigned int SurfaceWidthChroma,
5289 unsigned int SurfaceHeightLuma,
5290 unsigned int SurfaceHeightChroma,
5291 unsigned int nomDETInKByte,
5292 unsigned int RequestHeight256ByteLuma,
5293 unsigned int RequestHeight256ByteChroma,
5294 enum dm_swizzle_mode TilingFormat,
5295 unsigned int BytePerPixelY,
5296 unsigned int BytePerPixelC,
5297 double BytePerPixelDETY,
5298 double BytePerPixelDETC,
5299 enum dm_rotation_angle SourceRotation,
5300 /* Output */
5301 unsigned int *MaxUncompressedBlockLuma,
5302 unsigned int *MaxUncompressedBlockChroma,
5303 unsigned int *MaxCompressedBlockLuma,
5304 unsigned int *MaxCompressedBlockChroma,
5305 unsigned int *IndependentBlockLuma,
5306 unsigned int *IndependentBlockChroma)
5307 {
5308 typedef enum {
5309 REQ_256Bytes,
5310 REQ_128BytesNonContiguous,
5311 REQ_128BytesContiguous,
5312 REQ_NA
5313 } RequestType;
5314
5315 RequestType RequestLuma;
5316 RequestType RequestChroma;
5317
5318 unsigned int segment_order_horz_contiguous_luma;
5319 unsigned int segment_order_horz_contiguous_chroma;
5320 unsigned int segment_order_vert_contiguous_luma;
5321 unsigned int segment_order_vert_contiguous_chroma;
5322 unsigned int req128_horz_wc_l;
5323 unsigned int req128_horz_wc_c;
5324 unsigned int req128_vert_wc_l;
5325 unsigned int req128_vert_wc_c;
5326 unsigned int MAS_vp_horz_limit;
5327 unsigned int MAS_vp_vert_limit;
5328 unsigned int max_vp_horz_width;
5329 unsigned int max_vp_vert_height;
5330 unsigned int eff_surf_width_l;
5331 unsigned int eff_surf_width_c;
5332 unsigned int eff_surf_height_l;
5333 unsigned int eff_surf_height_c;
5334 unsigned int full_swath_bytes_horz_wc_l;
5335 unsigned int full_swath_bytes_horz_wc_c;
5336 unsigned int full_swath_bytes_vert_wc_l;
5337 unsigned int full_swath_bytes_vert_wc_c;
5338 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5339
5340 unsigned int yuv420;
5341 unsigned int horz_div_l;
5342 unsigned int horz_div_c;
5343 unsigned int vert_div_l;
5344 unsigned int vert_div_c;
5345
5346 unsigned int swath_buf_size;
5347 double detile_buf_vp_horz_limit;
5348 double detile_buf_vp_vert_limit;
5349
5350 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5351 SourcePixelFormat == dm_420_12) ? 1 : 0);
5352 horz_div_l = 1;
5353 horz_div_c = 1;
5354 vert_div_l = 1;
5355 vert_div_c = 1;
5356
5357 if (BytePerPixelY == 1)
5358 vert_div_l = 0;
5359 if (BytePerPixelC == 1)
5360 vert_div_c = 0;
5361
5362 if (BytePerPixelC == 0) {
5363 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5364 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5365 BytePerPixelY / (1 + horz_div_l));
5366 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5367 (1 + vert_div_l));
5368 } else {
5369 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5370 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5371 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5372 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5373 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5374 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5375 (1 + vert_div_c) / (1 + yuv420));
5376 }
5377
5378 if (SourcePixelFormat == dm_420_10) {
5379 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5380 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5381 }
5382
5383 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5384 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5385
5386 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5387 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5388 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5389 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5390 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5391 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
5392 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5393 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
5394
5395 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5396 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5397 if (BytePerPixelC > 0) {
5398 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5399 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5400 } else {
5401 full_swath_bytes_horz_wc_c = 0;
5402 full_swath_bytes_vert_wc_c = 0;
5403 }
5404
5405 if (SourcePixelFormat == dm_420_10) {
5406 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5407 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5408 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5409 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5410 }
5411
5412 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5413 req128_horz_wc_l = 0;
5414 req128_horz_wc_c = 0;
5415 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5416 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5417 req128_horz_wc_l = 0;
5418 req128_horz_wc_c = 1;
5419 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5420 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5421 req128_horz_wc_l = 1;
5422 req128_horz_wc_c = 0;
5423 } else {
5424 req128_horz_wc_l = 1;
5425 req128_horz_wc_c = 1;
5426 }
5427
5428 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5429 req128_vert_wc_l = 0;
5430 req128_vert_wc_c = 0;
5431 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5432 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5433 req128_vert_wc_l = 0;
5434 req128_vert_wc_c = 1;
5435 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5436 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5437 req128_vert_wc_l = 1;
5438 req128_vert_wc_c = 0;
5439 } else {
5440 req128_vert_wc_l = 1;
5441 req128_vert_wc_c = 1;
5442 }
5443
5444 if (BytePerPixelY == 2) {
5445 segment_order_horz_contiguous_luma = 0;
5446 segment_order_vert_contiguous_luma = 1;
5447 } else {
5448 segment_order_horz_contiguous_luma = 1;
5449 segment_order_vert_contiguous_luma = 0;
5450 }
5451
5452 if (BytePerPixelC == 2) {
5453 segment_order_horz_contiguous_chroma = 0;
5454 segment_order_vert_contiguous_chroma = 1;
5455 } else {
5456 segment_order_horz_contiguous_chroma = 1;
5457 segment_order_vert_contiguous_chroma = 0;
5458 }
5459 #ifdef __DML_VBA_DEBUG__
5460 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5461 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5462 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5463 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5464 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5465 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5466 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5467 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5468 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5469 __func__, segment_order_horz_contiguous_chroma);
5470 #endif
5471
5472 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5473 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5474 RequestLuma = REQ_256Bytes;
5475 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5476 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5477 RequestLuma = REQ_128BytesNonContiguous;
5478 else
5479 RequestLuma = REQ_128BytesContiguous;
5480
5481 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5482 RequestChroma = REQ_256Bytes;
5483 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5484 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5485 RequestChroma = REQ_128BytesNonContiguous;
5486 else
5487 RequestChroma = REQ_128BytesContiguous;
5488
5489 } else if (!IsVertical(SourceRotation)) {
5490 if (req128_horz_wc_l == 0)
5491 RequestLuma = REQ_256Bytes;
5492 else if (segment_order_horz_contiguous_luma == 0)
5493 RequestLuma = REQ_128BytesNonContiguous;
5494 else
5495 RequestLuma = REQ_128BytesContiguous;
5496
5497 if (req128_horz_wc_c == 0)
5498 RequestChroma = REQ_256Bytes;
5499 else if (segment_order_horz_contiguous_chroma == 0)
5500 RequestChroma = REQ_128BytesNonContiguous;
5501 else
5502 RequestChroma = REQ_128BytesContiguous;
5503
5504 } else {
5505 if (req128_vert_wc_l == 0)
5506 RequestLuma = REQ_256Bytes;
5507 else if (segment_order_vert_contiguous_luma == 0)
5508 RequestLuma = REQ_128BytesNonContiguous;
5509 else
5510 RequestLuma = REQ_128BytesContiguous;
5511
5512 if (req128_vert_wc_c == 0)
5513 RequestChroma = REQ_256Bytes;
5514 else if (segment_order_vert_contiguous_chroma == 0)
5515 RequestChroma = REQ_128BytesNonContiguous;
5516 else
5517 RequestChroma = REQ_128BytesContiguous;
5518 }
5519
5520 if (RequestLuma == REQ_256Bytes) {
5521 *MaxUncompressedBlockLuma = 256;
5522 *MaxCompressedBlockLuma = 256;
5523 *IndependentBlockLuma = 0;
5524 } else if (RequestLuma == REQ_128BytesContiguous) {
5525 *MaxUncompressedBlockLuma = 256;
5526 *MaxCompressedBlockLuma = 128;
5527 *IndependentBlockLuma = 128;
5528 } else {
5529 *MaxUncompressedBlockLuma = 256;
5530 *MaxCompressedBlockLuma = 64;
5531 *IndependentBlockLuma = 64;
5532 }
5533
5534 if (RequestChroma == REQ_256Bytes) {
5535 *MaxUncompressedBlockChroma = 256;
5536 *MaxCompressedBlockChroma = 256;
5537 *IndependentBlockChroma = 0;
5538 } else if (RequestChroma == REQ_128BytesContiguous) {
5539 *MaxUncompressedBlockChroma = 256;
5540 *MaxCompressedBlockChroma = 128;
5541 *IndependentBlockChroma = 128;
5542 } else {
5543 *MaxUncompressedBlockChroma = 256;
5544 *MaxCompressedBlockChroma = 64;
5545 *IndependentBlockChroma = 64;
5546 }
5547
5548 if (DCCEnabled != true || BytePerPixelC == 0) {
5549 *MaxUncompressedBlockChroma = 0;
5550 *MaxCompressedBlockChroma = 0;
5551 *IndependentBlockChroma = 0;
5552 }
5553
5554 if (DCCEnabled != true) {
5555 *MaxUncompressedBlockLuma = 0;
5556 *MaxCompressedBlockLuma = 0;
5557 *IndependentBlockLuma = 0;
5558 }
5559
5560 #ifdef __DML_VBA_DEBUG__
5561 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5562 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5563 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5564 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5565 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5566 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5567 #endif
5568
5569 } // CalculateDCCConfiguration
5570
dml32_CalculateStutterEfficiency(unsigned int CompressedBufferSizeInkByte,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool UnboundedRequestEnabled,unsigned int MetaFIFOSizeInKEntries,unsigned int ZeroSizeBufferEntries,unsigned int PixelChunkSizeInKByte,unsigned int NumberOfActiveSurfaces,unsigned int ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,unsigned int CompbufReservedSpace64B,unsigned int CompbufReservedSpaceZs,double SRExitTime,double SRExitZ8Time,bool SynchronizeTimingsFinal,unsigned int BlendingAndTiming[],double StutterEnterPlusExitWatermark,double Z8StutterEnterPlusExitWatermark,bool ProgressiveToInterlaceUnitInOPP,bool Interlace[],double MinTTUVBlank[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeY[],unsigned int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double NetDCCRateLuma[],double NetDCCRateChroma[],double DCCFractionOfZeroSizeRequestsLuma[],double DCCFractionOfZeroSizeRequestsChroma[],unsigned int HTotal[],unsigned int VTotal[],double PixelClock[],double VRatio[],enum dm_rotation_angle SourceRotation[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesC[],unsigned int BlockWidth256BytesC[],unsigned int DCCYMaxUncompressedBlock[],unsigned int DCCCMaxUncompressedBlock[],unsigned int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthSurfaceLuma[],double ReadBandwidthSurfaceChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,unsigned int * NumberOfStutterBurstsPerFrame,double * Z8StutterEfficiencyNotIncludingVBlank,double * Z8StutterEfficiency,unsigned int * Z8NumberOfStutterBurstsPerFrame,double * StutterPeriod,bool * DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)5571 void dml32_CalculateStutterEfficiency(
5572 unsigned int CompressedBufferSizeInkByte,
5573 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5574 bool UnboundedRequestEnabled,
5575 unsigned int MetaFIFOSizeInKEntries,
5576 unsigned int ZeroSizeBufferEntries,
5577 unsigned int PixelChunkSizeInKByte,
5578 unsigned int NumberOfActiveSurfaces,
5579 unsigned int ROBBufferSizeInKByte,
5580 double TotalDataReadBandwidth,
5581 double DCFCLK,
5582 double ReturnBW,
5583 unsigned int CompbufReservedSpace64B,
5584 unsigned int CompbufReservedSpaceZs,
5585 double SRExitTime,
5586 double SRExitZ8Time,
5587 bool SynchronizeTimingsFinal,
5588 unsigned int BlendingAndTiming[],
5589 double StutterEnterPlusExitWatermark,
5590 double Z8StutterEnterPlusExitWatermark,
5591 bool ProgressiveToInterlaceUnitInOPP,
5592 bool Interlace[],
5593 double MinTTUVBlank[],
5594 unsigned int DPPPerSurface[],
5595 unsigned int DETBufferSizeY[],
5596 unsigned int BytePerPixelY[],
5597 double BytePerPixelDETY[],
5598 double SwathWidthY[],
5599 unsigned int SwathHeightY[],
5600 unsigned int SwathHeightC[],
5601 double NetDCCRateLuma[],
5602 double NetDCCRateChroma[],
5603 double DCCFractionOfZeroSizeRequestsLuma[],
5604 double DCCFractionOfZeroSizeRequestsChroma[],
5605 unsigned int HTotal[],
5606 unsigned int VTotal[],
5607 double PixelClock[],
5608 double VRatio[],
5609 enum dm_rotation_angle SourceRotation[],
5610 unsigned int BlockHeight256BytesY[],
5611 unsigned int BlockWidth256BytesY[],
5612 unsigned int BlockHeight256BytesC[],
5613 unsigned int BlockWidth256BytesC[],
5614 unsigned int DCCYMaxUncompressedBlock[],
5615 unsigned int DCCCMaxUncompressedBlock[],
5616 unsigned int VActive[],
5617 bool DCCEnable[],
5618 bool WritebackEnable[],
5619 double ReadBandwidthSurfaceLuma[],
5620 double ReadBandwidthSurfaceChroma[],
5621 double meta_row_bw[],
5622 double dpte_row_bw[],
5623
5624 /* Output */
5625 double *StutterEfficiencyNotIncludingVBlank,
5626 double *StutterEfficiency,
5627 unsigned int *NumberOfStutterBurstsPerFrame,
5628 double *Z8StutterEfficiencyNotIncludingVBlank,
5629 double *Z8StutterEfficiency,
5630 unsigned int *Z8NumberOfStutterBurstsPerFrame,
5631 double *StutterPeriod,
5632 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5633 {
5634
5635 bool FoundCriticalSurface = false;
5636 unsigned int SwathSizeCriticalSurface = 0;
5637 unsigned int LastChunkOfSwathSize;
5638 unsigned int MissingPartOfLastSwathOfDETSize;
5639 double LastZ8StutterPeriod = 0.0;
5640 double LastStutterPeriod = 0.0;
5641 unsigned int TotalNumberOfActiveOTG = 0;
5642 double doublePixelClock;
5643 unsigned int doubleHTotal;
5644 unsigned int doubleVTotal;
5645 bool SameTiming = true;
5646 double DETBufferingTimeY;
5647 double SwathWidthYCriticalSurface = 0.0;
5648 double SwathHeightYCriticalSurface = 0.0;
5649 double VActiveTimeCriticalSurface = 0.0;
5650 double FrameTimeCriticalSurface = 0.0;
5651 unsigned int BytePerPixelYCriticalSurface = 0;
5652 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5653 unsigned int DETBufferSizeYCriticalSurface = 0;
5654 double MinTTUVBlankCriticalSurface = 0.0;
5655 unsigned int BlockWidth256BytesYCriticalSurface = 0;
5656 bool doublePlaneCriticalSurface = 0;
5657 bool doublePipeCriticalSurface = 0;
5658 double TotalCompressedReadBandwidth;
5659 double TotalRowReadBandwidth;
5660 double AverageDCCCompressionRate;
5661 double EffectiveCompressedBufferSize;
5662 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5663 double StutterBurstTime;
5664 unsigned int TotalActiveWriteback;
5665 double LinesInDETY;
5666 double LinesInDETYRoundedDownToSwath;
5667 double MaximumEffectiveCompressionLuma;
5668 double MaximumEffectiveCompressionChroma;
5669 double TotalZeroSizeRequestReadBandwidth;
5670 double TotalZeroSizeCompressedReadBandwidth;
5671 double AverageDCCZeroSizeFraction;
5672 double AverageZeroSizeCompressionRate;
5673 unsigned int k;
5674
5675 TotalZeroSizeRequestReadBandwidth = 0;
5676 TotalZeroSizeCompressedReadBandwidth = 0;
5677 TotalRowReadBandwidth = 0;
5678 TotalCompressedReadBandwidth = 0;
5679
5680 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5681 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5682 if (DCCEnable[k] == true) {
5683 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5684 || (!IsVertical(SourceRotation[k])
5685 && BlockHeight256BytesY[k] > SwathHeightY[k])
5686 || DCCYMaxUncompressedBlock[k] < 256) {
5687 MaximumEffectiveCompressionLuma = 2;
5688 } else {
5689 MaximumEffectiveCompressionLuma = 4;
5690 }
5691 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5692 + ReadBandwidthSurfaceLuma[k]
5693 / dml_min(NetDCCRateLuma[k],
5694 MaximumEffectiveCompressionLuma);
5695 #ifdef __DML_VBA_DEBUG__
5696 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5697 __func__, k, ReadBandwidthSurfaceLuma[k]);
5698 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5699 __func__, k, NetDCCRateLuma[k]);
5700 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5701 __func__, k, MaximumEffectiveCompressionLuma);
5702 #endif
5703 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5704 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5705 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5706 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5707 / MaximumEffectiveCompressionLuma;
5708
5709 if (ReadBandwidthSurfaceChroma[k] > 0) {
5710 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5711 || (!IsVertical(SourceRotation[k])
5712 && BlockHeight256BytesC[k] > SwathHeightC[k])
5713 || DCCCMaxUncompressedBlock[k] < 256) {
5714 MaximumEffectiveCompressionChroma = 2;
5715 } else {
5716 MaximumEffectiveCompressionChroma = 4;
5717 }
5718 TotalCompressedReadBandwidth =
5719 TotalCompressedReadBandwidth
5720 + ReadBandwidthSurfaceChroma[k]
5721 / dml_min(NetDCCRateChroma[k],
5722 MaximumEffectiveCompressionChroma);
5723 #ifdef __DML_VBA_DEBUG__
5724 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5725 __func__, k, ReadBandwidthSurfaceChroma[k]);
5726 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5727 __func__, k, NetDCCRateChroma[k]);
5728 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5729 __func__, k, MaximumEffectiveCompressionChroma);
5730 #endif
5731 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5732 + ReadBandwidthSurfaceChroma[k]
5733 * DCCFractionOfZeroSizeRequestsChroma[k];
5734 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5735 + ReadBandwidthSurfaceChroma[k]
5736 * DCCFractionOfZeroSizeRequestsChroma[k]
5737 / MaximumEffectiveCompressionChroma;
5738 }
5739 } else {
5740 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5741 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5742 }
5743 TotalRowReadBandwidth = TotalRowReadBandwidth
5744 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5745 }
5746 }
5747
5748 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5749 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5750
5751 #ifdef __DML_VBA_DEBUG__
5752 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5753 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5754 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5755 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5756 __func__, TotalZeroSizeCompressedReadBandwidth);
5757 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5758 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5759 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5760 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5761 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5762 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5763 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5764 #endif
5765 if (AverageDCCZeroSizeFraction == 1) {
5766 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5767 / TotalZeroSizeCompressedReadBandwidth;
5768 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5769 * AverageZeroSizeCompressionRate
5770 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5771 * AverageZeroSizeCompressionRate;
5772 } else if (AverageDCCZeroSizeFraction > 0) {
5773 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5774 / TotalZeroSizeCompressedReadBandwidth;
5775 EffectiveCompressedBufferSize = dml_min(
5776 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5777 (double) MetaFIFOSizeInKEntries * 1024 * 64
5778 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5779 + 1 / AverageDCCCompressionRate))
5780 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5781 * AverageDCCCompressionRate,
5782 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5783 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5784
5785 #ifdef __DML_VBA_DEBUG__
5786 dml_print("DML::%s: min 1 = %f\n", __func__,
5787 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5788 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5789 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5790 AverageDCCCompressionRate));
5791 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5792 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5793 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5794 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5795 #endif
5796 } else {
5797 EffectiveCompressedBufferSize = dml_min(
5798 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5799 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5800 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5801 * AverageDCCCompressionRate;
5802
5803 #ifdef __DML_VBA_DEBUG__
5804 dml_print("DML::%s: min 1 = %f\n", __func__,
5805 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5806 dml_print("DML::%s: min 2 = %f\n", __func__,
5807 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5808 #endif
5809 }
5810
5811 #ifdef __DML_VBA_DEBUG__
5812 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5813 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5814 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5815 #endif
5816
5817 *StutterPeriod = 0;
5818
5819 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5820 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5821 LinesInDETY = ((double) DETBufferSizeY[k]
5822 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5823 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5824 / BytePerPixelDETY[k] / SwathWidthY[k];
5825 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5826 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5827 / VRatio[k];
5828 #ifdef __DML_VBA_DEBUG__
5829 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5830 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5831 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5832 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5833 __func__, k, ReadBandwidthSurfaceLuma[k]);
5834 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5835 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5836 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5837 __func__, k, LinesInDETYRoundedDownToSwath);
5838 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5839 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5840 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5841 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5842 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5843 #endif
5844
5845 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5846 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5847
5848 FoundCriticalSurface = true;
5849 *StutterPeriod = DETBufferingTimeY;
5850 FrameTimeCriticalSurface = (
5851 isInterlaceTiming ?
5852 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5853 * (double) HTotal[k] / PixelClock[k];
5854 VActiveTimeCriticalSurface = (
5855 isInterlaceTiming ?
5856 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5857 * (double) HTotal[k] / PixelClock[k];
5858 BytePerPixelYCriticalSurface = BytePerPixelY[k];
5859 SwathWidthYCriticalSurface = SwathWidthY[k];
5860 SwathHeightYCriticalSurface = SwathHeightY[k];
5861 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5862 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5863 - (LinesInDETY - LinesInDETYRoundedDownToSwath);
5864 DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5865 MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5866 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5867 doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5868
5869 #ifdef __DML_VBA_DEBUG__
5870 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n",
5871 __func__, k, FoundCriticalSurface);
5872 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n",
5873 __func__, k, *StutterPeriod);
5874 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n",
5875 __func__, k, MinTTUVBlankCriticalSurface);
5876 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n",
5877 __func__, k, FrameTimeCriticalSurface);
5878 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n",
5879 __func__, k, VActiveTimeCriticalSurface);
5880 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n",
5881 __func__, k, BytePerPixelYCriticalSurface);
5882 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n",
5883 __func__, k, SwathWidthYCriticalSurface);
5884 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n",
5885 __func__, k, SwathHeightYCriticalSurface);
5886 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n",
5887 __func__, k, BlockWidth256BytesYCriticalSurface);
5888 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n",
5889 __func__, k, doublePlaneCriticalSurface);
5890 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n",
5891 __func__, k, doublePipeCriticalSurface);
5892 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5893 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5894 #endif
5895 }
5896 }
5897 }
5898
5899 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5900 EffectiveCompressedBufferSize);
5901 #ifdef __DML_VBA_DEBUG__
5902 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5903 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5904 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5905 __func__, *StutterPeriod * TotalDataReadBandwidth);
5906 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5907 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5908 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5909 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5910 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5911 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5912 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5913 #endif
5914
5915 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5916 / ReturnBW
5917 + (*StutterPeriod * TotalDataReadBandwidth
5918 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5919 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5920 #ifdef __DML_VBA_DEBUG__
5921 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5922 AverageDCCCompressionRate / ReturnBW);
5923 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5924 __func__, (*StutterPeriod * TotalDataReadBandwidth));
5925 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5926 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5927 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5928 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5929 #endif
5930 StutterBurstTime = dml_max(StutterBurstTime,
5931 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5932 * SwathWidthYCriticalSurface / ReturnBW);
5933
5934 #ifdef __DML_VBA_DEBUG__
5935 dml_print("DML::%s: Time to finish residue swath=%f\n",
5936 __func__,
5937 LinesToFinishSwathTransferStutterCriticalSurface *
5938 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5939 #endif
5940
5941 TotalActiveWriteback = 0;
5942 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5943 if (WritebackEnable[k])
5944 TotalActiveWriteback = TotalActiveWriteback + 1;
5945 }
5946
5947 if (TotalActiveWriteback == 0) {
5948 #ifdef __DML_VBA_DEBUG__
5949 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5950 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5951 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5952 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5953 #endif
5954 *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5955 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5956 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5957 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5958 *NumberOfStutterBurstsPerFrame = (
5959 *StutterEfficiencyNotIncludingVBlank > 0 ?
5960 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5961 *Z8NumberOfStutterBurstsPerFrame = (
5962 *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5963 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5964 } else {
5965 *StutterEfficiencyNotIncludingVBlank = 0.;
5966 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
5967 *NumberOfStutterBurstsPerFrame = 0;
5968 *Z8NumberOfStutterBurstsPerFrame = 0;
5969 }
5970 #ifdef __DML_VBA_DEBUG__
5971 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5972 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5973 __func__, *StutterEfficiencyNotIncludingVBlank);
5974 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5975 __func__, *Z8StutterEfficiencyNotIncludingVBlank);
5976 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5977 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5978 #endif
5979
5980 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5981 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5982 if (BlendingAndTiming[k] == k) {
5983 if (TotalNumberOfActiveOTG == 0) {
5984 doublePixelClock = PixelClock[k];
5985 doubleHTotal = HTotal[k];
5986 doubleVTotal = VTotal[k];
5987 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5988 || doubleVTotal != VTotal[k]) {
5989 SameTiming = false;
5990 }
5991 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5992 }
5993 }
5994 }
5995
5996 if (*StutterEfficiencyNotIncludingVBlank > 0) {
5997 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5998
5999 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
6000 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
6001 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
6002 + StutterBurstTime * VActiveTimeCriticalSurface
6003 / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6004 } else {
6005 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6006 }
6007 } else {
6008 *StutterEfficiency = 0;
6009 }
6010
6011 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6012 LastZ8StutterPeriod = VActiveTimeCriticalSurface
6013 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6014 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6015 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6016 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6017 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6018 } else {
6019 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6020 }
6021 } else {
6022 *Z8StutterEfficiency = 0.;
6023 }
6024
6025 #ifdef __DML_VBA_DEBUG__
6026 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6027 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6028 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6029 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6030 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6031 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6032 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6033 __func__, *StutterEfficiencyNotIncludingVBlank);
6034 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6035 #endif
6036
6037 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6038 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6039 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6040 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6041 - DETBufferSizeYCriticalSurface;
6042
6043 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6044 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6045 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6046 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6047
6048 #ifdef __DML_VBA_DEBUG__
6049 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6050 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6051 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6052 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6053 #endif
6054 } // CalculateStutterEfficiency
6055
dml32_CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,unsigned int ROBBufferSizeInKByte,unsigned int MaxNumDPP,bool nomDETInKByteOverrideEnable,unsigned int nomDETInKByteOverrideValue,unsigned int * MaxTotalDETInKByte,unsigned int * nomDETInKByte,unsigned int * MinCompressedBufferSizeInKByte)6056 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6057 unsigned int ConfigReturnBufferSizeInKByte,
6058 unsigned int ROBBufferSizeInKByte,
6059 unsigned int MaxNumDPP,
6060 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6061 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
6062
6063 /* Output */
6064 unsigned int *MaxTotalDETInKByte,
6065 unsigned int *nomDETInKByte,
6066 unsigned int *MinCompressedBufferSizeInKByte)
6067 {
6068 bool det_buff_size_override_en = nomDETInKByteOverrideEnable;
6069 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue;
6070
6071 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6072 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6073 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6074 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6075
6076 #ifdef __DML_VBA_DEBUG__
6077 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6078 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6079 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6080 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6081 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6082 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6083 #endif
6084
6085 if (det_buff_size_override_en) {
6086 *nomDETInKByte = det_buff_size_override_val;
6087 #ifdef __DML_VBA_DEBUG__
6088 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6089 #endif
6090 }
6091 } // CalculateMaxDETAndMinCompressedBufferSize
6092
dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[])6093 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6094 double ReturnBW,
6095 bool NotUrgentLatencyHiding[],
6096 double ReadBandwidthLuma[],
6097 double ReadBandwidthChroma[],
6098 double cursor_bw[],
6099 double meta_row_bandwidth[],
6100 double dpte_row_bandwidth[],
6101 unsigned int NumberOfDPP[],
6102 double UrgentBurstFactorLuma[],
6103 double UrgentBurstFactorChroma[],
6104 double UrgentBurstFactorCursor[])
6105 {
6106 unsigned int k;
6107 bool NotEnoughUrgentLatencyHiding = false;
6108 bool CalculateVActiveBandwithSupport_val = false;
6109 double VActiveBandwith = 0;
6110
6111 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6112 if (NotUrgentLatencyHiding[k]) {
6113 NotEnoughUrgentLatencyHiding = true;
6114 }
6115 }
6116
6117 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6118 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6119 }
6120
6121 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6122
6123 #ifdef __DML_VBA_DEBUG__
6124 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6125 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
6126 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6127 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6128 #endif
6129 return CalculateVActiveBandwithSupport_val;
6130 }
6131
dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double PrefetchBW[],double VRatio[],double MaxVRatioPre,double * MaxPrefetchBandwidth,double * FractionOfUrgentBandwidth,bool * PrefetchBandwidthSupport)6132 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6133 double ReturnBW,
6134 bool NotUrgentLatencyHiding[],
6135 double ReadBandwidthLuma[],
6136 double ReadBandwidthChroma[],
6137 double PrefetchBandwidthLuma[],
6138 double PrefetchBandwidthChroma[],
6139 double cursor_bw[],
6140 double meta_row_bandwidth[],
6141 double dpte_row_bandwidth[],
6142 double cursor_bw_pre[],
6143 double prefetch_vmrow_bw[],
6144 unsigned int NumberOfDPP[],
6145 double UrgentBurstFactorLuma[],
6146 double UrgentBurstFactorChroma[],
6147 double UrgentBurstFactorCursor[],
6148 double UrgentBurstFactorLumaPre[],
6149 double UrgentBurstFactorChromaPre[],
6150 double UrgentBurstFactorCursorPre[],
6151 double PrefetchBW[],
6152 double VRatio[],
6153 double MaxVRatioPre,
6154
6155 /* output */
6156 double *MaxPrefetchBandwidth,
6157 double *FractionOfUrgentBandwidth,
6158 bool *PrefetchBandwidthSupport)
6159 {
6160 unsigned int k;
6161 double ActiveBandwidthPerSurface;
6162 bool NotEnoughUrgentLatencyHiding = false;
6163 double TotalActiveBandwidth = 0;
6164 double TotalPrefetchBandwidth = 0;
6165
6166 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6167 if (NotUrgentLatencyHiding[k]) {
6168 NotEnoughUrgentLatencyHiding = true;
6169 }
6170 }
6171
6172 *MaxPrefetchBandwidth = 0;
6173 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6174 ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]);
6175
6176 TotalActiveBandwidth += ActiveBandwidthPerSurface;
6177
6178 TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k];
6179
6180 *MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6181 ActiveBandwidthPerSurface,
6182 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6183 }
6184
6185 if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__)
6186 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding;
6187 else
6188 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6189
6190 *FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW;
6191 }
6192
dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,double ReturnBW,double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double cursor_bw_pre[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[])6193 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6194 double ReturnBW,
6195 double ReadBandwidthLuma[],
6196 double ReadBandwidthChroma[],
6197 double PrefetchBandwidthLuma[],
6198 double PrefetchBandwidthChroma[],
6199 double cursor_bw[],
6200 double cursor_bw_pre[],
6201 unsigned int NumberOfDPP[],
6202 double UrgentBurstFactorLuma[],
6203 double UrgentBurstFactorChroma[],
6204 double UrgentBurstFactorCursor[],
6205 double UrgentBurstFactorLumaPre[],
6206 double UrgentBurstFactorChromaPre[],
6207 double UrgentBurstFactorCursorPre[])
6208 {
6209 unsigned int k;
6210 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6211
6212 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6213 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6214 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6215 }
6216
6217 return CalculateBandwidthAvailableForImmediateFlip_val;
6218 }
6219
dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,enum immediate_flip_requirement ImmediateFlipRequirement[],double final_flip_bw[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double * TotalBandwidth,double * FractionOfUrgentBandwidth,bool * ImmediateFlipBandwidthSupport)6220 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6221 double ReturnBW,
6222 enum immediate_flip_requirement ImmediateFlipRequirement[],
6223 double final_flip_bw[],
6224 double ReadBandwidthLuma[],
6225 double ReadBandwidthChroma[],
6226 double PrefetchBandwidthLuma[],
6227 double PrefetchBandwidthChroma[],
6228 double cursor_bw[],
6229 double meta_row_bandwidth[],
6230 double dpte_row_bandwidth[],
6231 double cursor_bw_pre[],
6232 double prefetch_vmrow_bw[],
6233 unsigned int NumberOfDPP[],
6234 double UrgentBurstFactorLuma[],
6235 double UrgentBurstFactorChroma[],
6236 double UrgentBurstFactorCursor[],
6237 double UrgentBurstFactorLumaPre[],
6238 double UrgentBurstFactorChromaPre[],
6239 double UrgentBurstFactorCursorPre[],
6240
6241 /* output */
6242 double *TotalBandwidth,
6243 double *FractionOfUrgentBandwidth,
6244 bool *ImmediateFlipBandwidthSupport)
6245 {
6246 unsigned int k;
6247 *TotalBandwidth = 0;
6248 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6249 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6250 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6251 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6252 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6253 } else {
6254 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6255 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6256 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6257 }
6258 }
6259 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6260 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6261 }
6262
dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,double ReturnBW,double UrgentLatency,unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],double BytePerPixelInDETY[],double BytePerPixelInDETC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int NumOfDPP[],unsigned int HTotal[],double PixelClock[],double VRatioY[],double VRatioC[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],enum unbounded_requesting_policy UseUnboundedRequesting)6263 bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
6264 double ReturnBW,
6265 double UrgentLatency,
6266 unsigned int SwathHeightY[],
6267 unsigned int SwathHeightC[],
6268 unsigned int SwathWidthY[],
6269 unsigned int SwathWidthC[],
6270 double BytePerPixelInDETY[],
6271 double BytePerPixelInDETC[],
6272 unsigned int DETBufferSizeY[],
6273 unsigned int DETBufferSizeC[],
6274 unsigned int NumOfDPP[],
6275 unsigned int HTotal[],
6276 double PixelClock[],
6277 double VRatioY[],
6278 double VRatioC[],
6279 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
6280 enum unbounded_requesting_policy UseUnboundedRequesting)
6281 {
6282 int k;
6283 double SwathSizeAllSurfaces = 0;
6284 double SwathSizeAllSurfacesInFetchTimeUs;
6285 double DETSwathLatencyHidingUs;
6286 double DETSwathLatencyHidingYUs;
6287 double DETSwathLatencyHidingCUs;
6288 double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
6289 double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
6290 bool NotEnoughDETSwathFillLatencyHiding = false;
6291
6292 if (UseUnboundedRequesting == dm_unbounded_requesting)
6293 return false;
6294
6295 /* calculate sum of single swath size for all pipes in bytes */
6296 for (k = 0; k < NumberOfActiveSurfaces; k++) {
6297 SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
6298
6299 if (SwathHeightC[k] != 0)
6300 SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
6301 else
6302 SwathSizePerSurfaceC[k] = 0;
6303
6304 SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
6305 }
6306
6307 SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;
6308
6309 /* ensure all DET - 1 swath can hide a fetch for all surfaces */
6310 for (k = 0; k < NumberOfActiveSurfaces; k++) {
6311 double LineTime = HTotal[k] / PixelClock[k];
6312
6313 /* only care if surface is not phantom */
6314 if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
6315 DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;
6316
6317 if (SwathHeightC[k] != 0) {
6318 DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;
6319
6320 DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs);
6321 } else {
6322 DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
6323 }
6324
6325 /* DET must be able to hide time to fetch 1 swath for each surface */
6326 if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
6327 NotEnoughDETSwathFillLatencyHiding = true;
6328 break;
6329 }
6330 }
6331 }
6332
6333 return NotEnoughDETSwathFillLatencyHiding;
6334 }
6335