amd/addrlib: fix uninitialized values for Addr2ComputeDccAddrFromCoord
[mesa.git] / src / amd / addrlib / src / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2007-2018 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 #include "util/macros.h"
41
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
44
45 namespace Addr
46 {
47
48 /**
49 ************************************************************************************************************************
50 * Gfx9HwlInit
51 *
52 * @brief
53 * Creates an Gfx9Lib object.
54 *
55 * @return
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
58 */
59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
60 {
61 return V2::Gfx9Lib::CreateObj(pClient);
62 }
63
64 namespace V2
65 {
66
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
70
71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
77
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
82
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
87
88 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
89 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
90 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
91 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
92
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
97
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
102
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
107
108 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
109 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
110 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
111 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
113 };
114
115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
116 8, 6, 5, 4, 3, 2, 1, 0};
117
118 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
119
120 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
121
122 /**
123 ************************************************************************************************************************
124 * Gfx9Lib::Gfx9Lib
125 *
126 * @brief
127 * Constructor
128 *
129 ************************************************************************************************************************
130 */
131 Gfx9Lib::Gfx9Lib(const Client* pClient)
132 :
133 Lib(pClient),
134 m_numEquations(0)
135 {
136 m_class = AI_ADDRLIB;
137 memset(&m_settings, 0, sizeof(m_settings));
138 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
139 m_metaEqOverrideIndex = 0;
140 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
141 }
142
143 /**
144 ************************************************************************************************************************
145 * Gfx9Lib::~Gfx9Lib
146 *
147 * @brief
148 * Destructor
149 ************************************************************************************************************************
150 */
151 Gfx9Lib::~Gfx9Lib()
152 {
153 }
154
155 /**
156 ************************************************************************************************************************
157 * Gfx9Lib::HwlComputeHtileInfo
158 *
159 * @brief
160 * Interface function stub of AddrComputeHtilenfo
161 *
162 * @return
163 * ADDR_E_RETURNCODE
164 ************************************************************************************************************************
165 */
166 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
167 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
168 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
169 ) const
170 {
171 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
172 pIn->swizzleMode);
173
174 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
175
176 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
177
178 if ((numPipeTotal == 1) && (numRbTotal == 1))
179 {
180 numCompressBlkPerMetaBlkLog2 = 10;
181 }
182 else
183 {
184 if (m_settings.applyAliasFix)
185 {
186 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
187 }
188 else
189 {
190 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
191 }
192 }
193
194 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
195
196 Dim3d metaBlkDim = {8, 8, 1};
197 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
198 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
199 UINT_32 heightAmp = totalAmpBits - widthAmp;
200 metaBlkDim.w <<= widthAmp;
201 metaBlkDim.h <<= heightAmp;
202
203 #if DEBUG
204 Dim3d metaBlkDimDbg = {8, 8, 1};
205 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
206 {
207 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
208 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
209 {
210 metaBlkDimDbg.h <<= 1;
211 }
212 else
213 {
214 metaBlkDimDbg.w <<= 1;
215 }
216 }
217 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
218 #endif
219
220 UINT_32 numMetaBlkX;
221 UINT_32 numMetaBlkY;
222 UINT_32 numMetaBlkZ;
223
224 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
225 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
226 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
227
228 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
229 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
230
231 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
232 {
233 align *= (numPipeTotal >> 1);
234 }
235
236 align = Max(align, metaBlkSize);
237
238 if (m_settings.metaBaseAlignFix)
239 {
240 align = Max(align, GetBlockSize(pIn->swizzleMode));
241 }
242
243 if (m_settings.htileAlignFix)
244 {
245 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
246 const INT_32 htileCachelineSizeLog2 = 11;
247 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
248
249 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
250
251 align <<= rbMaskPadding;
252 }
253
254 pOut->pitch = numMetaBlkX * metaBlkDim.w;
255 pOut->height = numMetaBlkY * metaBlkDim.h;
256 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
257
258 pOut->metaBlkWidth = metaBlkDim.w;
259 pOut->metaBlkHeight = metaBlkDim.h;
260 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
261
262 pOut->baseAlign = align;
263 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
264
265 return ADDR_OK;
266 }
267
268 /**
269 ************************************************************************************************************************
270 * Gfx9Lib::HwlComputeCmaskInfo
271 *
272 * @brief
273 * Interface function stub of AddrComputeCmaskInfo
274 *
275 * @return
276 * ADDR_E_RETURNCODE
277 ************************************************************************************************************************
278 */
279 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
280 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
281 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
282 ) const
283 {
284 // TODO: Clarify with AddrLib team
285 // ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
286
287 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
288 pIn->swizzleMode);
289
290 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
291
292 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
293
294 if ((numPipeTotal == 1) && (numRbTotal == 1))
295 {
296 numCompressBlkPerMetaBlkLog2 = 13;
297 }
298 else
299 {
300 if (m_settings.applyAliasFix)
301 {
302 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
303 }
304 else
305 {
306 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
307 }
308
309 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
310 }
311
312 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
313
314 Dim2d metaBlkDim = {8, 8};
315 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
316 UINT_32 heightAmp = totalAmpBits >> 1;
317 UINT_32 widthAmp = totalAmpBits - heightAmp;
318 metaBlkDim.w <<= widthAmp;
319 metaBlkDim.h <<= heightAmp;
320
321 #if DEBUG
322 Dim2d metaBlkDimDbg = {8, 8};
323 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
324 {
325 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
326 {
327 metaBlkDimDbg.h <<= 1;
328 }
329 else
330 {
331 metaBlkDimDbg.w <<= 1;
332 }
333 }
334 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
335 #endif
336
337 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
338 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
339 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
340
341 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
342
343 if (m_settings.metaBaseAlignFix)
344 {
345 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
346 }
347
348 pOut->pitch = numMetaBlkX * metaBlkDim.w;
349 pOut->height = numMetaBlkY * metaBlkDim.h;
350 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
351 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
352 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
353
354 pOut->metaBlkWidth = metaBlkDim.w;
355 pOut->metaBlkHeight = metaBlkDim.h;
356
357 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
358
359 return ADDR_OK;
360 }
361
362 /**
363 ************************************************************************************************************************
364 * Gfx9Lib::GetMetaMipInfo
365 *
366 * @brief
367 * Get meta mip info
368 *
369 * @return
370 * N/A
371 ************************************************************************************************************************
372 */
373 VOID Gfx9Lib::GetMetaMipInfo(
374 UINT_32 numMipLevels, ///< [in] number of mip levels
375 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
376 BOOL_32 dataThick, ///< [in] data surface is thick
377 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
378 UINT_32 mip0Width, ///< [in] mip0 width
379 UINT_32 mip0Height, ///< [in] mip0 height
380 UINT_32 mip0Depth, ///< [in] mip0 depth
381 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
382 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
383 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
384 const
385 {
386 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
387 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
388 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
389 UINT_32 tailWidth = pMetaBlkDim->w;
390 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
391 UINT_32 tailDepth = pMetaBlkDim->d;
392 BOOL_32 inTail = FALSE;
393 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
394
395 if (numMipLevels > 1)
396 {
397 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
398 {
399 // Z major
400 major = ADDR_MAJOR_Z;
401 }
402 else if (numMetaBlkX >= numMetaBlkY)
403 {
404 // X major
405 major = ADDR_MAJOR_X;
406 }
407 else
408 {
409 // Y major
410 major = ADDR_MAJOR_Y;
411 }
412
413 inTail = ((mip0Width <= tailWidth) &&
414 (mip0Height <= tailHeight) &&
415 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
416
417 if (inTail == FALSE)
418 {
419 UINT_32 orderLimit;
420 UINT_32 *pMipDim;
421 UINT_32 *pOrderDim;
422
423 if (major == ADDR_MAJOR_Z)
424 {
425 // Z major
426 pMipDim = &numMetaBlkY;
427 pOrderDim = &numMetaBlkZ;
428 orderLimit = 4;
429 }
430 else if (major == ADDR_MAJOR_X)
431 {
432 // X major
433 pMipDim = &numMetaBlkY;
434 pOrderDim = &numMetaBlkX;
435 orderLimit = 4;
436 }
437 else
438 {
439 // Y major
440 pMipDim = &numMetaBlkX;
441 pOrderDim = &numMetaBlkY;
442 orderLimit = 2;
443 }
444
445 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
446 {
447 *pMipDim += 2;
448 }
449 else
450 {
451 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
452 }
453 }
454 }
455
456 if (pInfo != NULL)
457 {
458 UINT_32 mipWidth = mip0Width;
459 UINT_32 mipHeight = mip0Height;
460 UINT_32 mipDepth = mip0Depth;
461 Dim3d mipCoord = {0};
462
463 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
464 {
465 if (inTail)
466 {
467 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
468 pMetaBlkDim);
469 break;
470 }
471 else
472 {
473 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
474 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
475 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
476
477 pInfo[mip].inMiptail = FALSE;
478 pInfo[mip].startX = mipCoord.w;
479 pInfo[mip].startY = mipCoord.h;
480 pInfo[mip].startZ = mipCoord.d;
481 pInfo[mip].width = mipWidth;
482 pInfo[mip].height = mipHeight;
483 pInfo[mip].depth = dataThick ? mipDepth : 1;
484
485 if ((mip >= 3) || (mip & 1))
486 {
487 switch (major)
488 {
489 case ADDR_MAJOR_X:
490 mipCoord.w += mipWidth;
491 break;
492 case ADDR_MAJOR_Y:
493 mipCoord.h += mipHeight;
494 break;
495 case ADDR_MAJOR_Z:
496 mipCoord.d += mipDepth;
497 break;
498 default:
499 break;
500 }
501 }
502 else
503 {
504 switch (major)
505 {
506 case ADDR_MAJOR_X:
507 mipCoord.h += mipHeight;
508 break;
509 case ADDR_MAJOR_Y:
510 mipCoord.w += mipWidth;
511 break;
512 case ADDR_MAJOR_Z:
513 mipCoord.h += mipHeight;
514 break;
515 default:
516 break;
517 }
518 }
519
520 mipWidth = Max(mipWidth >> 1, 1u);
521 mipHeight = Max(mipHeight >> 1, 1u);
522 mipDepth = Max(mipDepth >> 1, 1u);
523
524 inTail = ((mipWidth <= tailWidth) &&
525 (mipHeight <= tailHeight) &&
526 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
527 }
528 }
529 }
530
531 *pNumMetaBlkX = numMetaBlkX;
532 *pNumMetaBlkY = numMetaBlkY;
533 *pNumMetaBlkZ = numMetaBlkZ;
534 }
535
536 /**
537 ************************************************************************************************************************
538 * Gfx9Lib::HwlComputeDccInfo
539 *
540 * @brief
541 * Interface function to compute DCC key info
542 *
543 * @return
544 * ADDR_E_RETURNCODE
545 ************************************************************************************************************************
546 */
547 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
548 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
549 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
550 ) const
551 {
552 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
553 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
554 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
555
556 if (dataLinear)
557 {
558 metaLinear = TRUE;
559 }
560 else if (metaLinear == TRUE)
561 {
562 pipeAligned = FALSE;
563 }
564
565 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
566
567 if (metaLinear)
568 {
569 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
570 ADDR_ASSERT_ALWAYS();
571
572 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
573 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
574 }
575 else
576 {
577 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
578
579 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
580
581 UINT_32 numFrags = Max(pIn->numFrags, 1u);
582 UINT_32 numSlices = Max(pIn->numSlices, 1u);
583
584 minMetaBlkSize /= numFrags;
585
586 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
587
588 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
589
590 if ((numPipeTotal > 1) || (numRbTotal > 1))
591 {
592 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
593
594 numCompressBlkPerMetaBlk =
595 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
596
597 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
598 {
599 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
600 }
601 }
602
603 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
604 Dim3d metaBlkDim = compressBlkDim;
605
606 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
607 {
608 if ((metaBlkDim.h < metaBlkDim.w) ||
609 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
610 {
611 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
612 {
613 metaBlkDim.h <<= 1;
614 }
615 else
616 {
617 metaBlkDim.d <<= 1;
618 }
619 }
620 else
621 {
622 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
623 {
624 metaBlkDim.w <<= 1;
625 }
626 else
627 {
628 metaBlkDim.d <<= 1;
629 }
630 }
631 }
632
633 UINT_32 numMetaBlkX;
634 UINT_32 numMetaBlkY;
635 UINT_32 numMetaBlkZ;
636
637 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
638 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
639 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
640
641 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
642
643 if (numFrags > m_maxCompFrag)
644 {
645 sizeAlign *= (numFrags / m_maxCompFrag);
646 }
647
648 if (m_settings.metaBaseAlignFix)
649 {
650 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
651 }
652
653 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
654 numCompressBlkPerMetaBlk * numFrags;
655 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
656 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
657
658 pOut->pitch = numMetaBlkX * metaBlkDim.w;
659 pOut->height = numMetaBlkY * metaBlkDim.h;
660 pOut->depth = numMetaBlkZ * metaBlkDim.d;
661
662 pOut->compressBlkWidth = compressBlkDim.w;
663 pOut->compressBlkHeight = compressBlkDim.h;
664 pOut->compressBlkDepth = compressBlkDim.d;
665
666 pOut->metaBlkWidth = metaBlkDim.w;
667 pOut->metaBlkHeight = metaBlkDim.h;
668 pOut->metaBlkDepth = metaBlkDim.d;
669
670 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
671 pOut->fastClearSizePerSlice =
672 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
673 }
674
675 return ADDR_OK;
676 }
677
678 /**
679 ************************************************************************************************************************
680 * Gfx9Lib::HwlComputeMaxBaseAlignments
681 *
682 * @brief
683 * Gets maximum alignments
684 * @return
685 * maximum alignments
686 ************************************************************************************************************************
687 */
688 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
689 {
690 return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
691 }
692
693 /**
694 ************************************************************************************************************************
695 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
696 *
697 * @brief
698 * Gets maximum alignments for metadata
699 * @return
700 * maximum alignments for metadata
701 ************************************************************************************************************************
702 */
703 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
704 {
705 // Max base alignment for Htile
706 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
707 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
708
709 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
710 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
711 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
712 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
713
714 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
715
716 if (maxNumPipeTotal > 2)
717 {
718 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
719 }
720
721 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
722
723 if (m_settings.metaBaseAlignFix)
724 {
725 maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
726 }
727
728 if (m_settings.htileAlignFix)
729 {
730 maxBaseAlignHtile *= maxNumPipeTotal;
731 }
732
733 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
734
735 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
736 UINT_32 maxBaseAlignDcc3D = 65536;
737
738 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
739 {
740 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
741 }
742
743 // Max base alignment for Msaa Dcc
744 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
745
746 if (m_settings.metaBaseAlignFix)
747 {
748 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
749 }
750
751 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
752 }
753
754 /**
755 ************************************************************************************************************************
756 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
757 *
758 * @brief
759 * Interface function stub of AddrComputeCmaskAddrFromCoord
760 *
761 * @return
762 * ADDR_E_RETURNCODE
763 ************************************************************************************************************************
764 */
765 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
766 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
767 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
768 {
769 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
770 input.size = sizeof(input);
771 input.cMaskFlags = pIn->cMaskFlags;
772 input.colorFlags = pIn->colorFlags;
773 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
774 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
775 input.numSlices = Max(pIn->numSlices, 1u);
776 input.swizzleMode = pIn->swizzleMode;
777 input.resourceType = pIn->resourceType;
778
779 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
780 output.size = sizeof(output);
781
782 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
783
784 if (returnCode == ADDR_OK)
785 {
786 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
787 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
788 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
789 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
790
791 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
792 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
793 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
794
795 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
796
797 UINT_32 xb = pIn->x / output.metaBlkWidth;
798 UINT_32 yb = pIn->y / output.metaBlkHeight;
799 UINT_32 zb = pIn->slice;
800
801 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
802 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
803 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
804
805 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
806
807 pOut->addr = address >> 1;
808 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
809
810 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
811 pIn->swizzleMode);
812
813 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
814
815 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
816 }
817
818 return returnCode;
819 }
820
821 /**
822 ************************************************************************************************************************
823 * Gfx9Lib::HwlComputeHtileAddrFromCoord
824 *
825 * @brief
826 * Interface function stub of AddrComputeHtileAddrFromCoord
827 *
828 * @return
829 * ADDR_E_RETURNCODE
830 ************************************************************************************************************************
831 */
832 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
833 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
834 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
835 {
836 ADDR_E_RETURNCODE returnCode = ADDR_OK;
837
838 if (pIn->numMipLevels > 1)
839 {
840 returnCode = ADDR_NOTIMPLEMENTED;
841 }
842 else
843 {
844 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
845 input.size = sizeof(input);
846 input.hTileFlags = pIn->hTileFlags;
847 input.depthFlags = pIn->depthflags;
848 input.swizzleMode = pIn->swizzleMode;
849 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
850 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
851 input.numSlices = Max(pIn->numSlices, 1u);
852 input.numMipLevels = Max(pIn->numMipLevels, 1u);
853
854 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
855 output.size = sizeof(output);
856
857 returnCode = ComputeHtileInfo(&input, &output);
858
859 if (returnCode == ADDR_OK)
860 {
861 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
862 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
863 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
864 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
865
866 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
867 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
868 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
869
870 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
871
872 UINT_32 xb = pIn->x / output.metaBlkWidth;
873 UINT_32 yb = pIn->y / output.metaBlkHeight;
874 UINT_32 zb = pIn->slice;
875
876 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
877 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
878 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
879
880 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
881
882 pOut->addr = address >> 1;
883
884 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
885 pIn->swizzleMode);
886
887 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
888
889 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
890 }
891 }
892
893 return returnCode;
894 }
895
896 /**
897 ************************************************************************************************************************
898 * Gfx9Lib::HwlComputeHtileCoordFromAddr
899 *
900 * @brief
901 * Interface function stub of AddrComputeHtileCoordFromAddr
902 *
903 * @return
904 * ADDR_E_RETURNCODE
905 ************************************************************************************************************************
906 */
907 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
908 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
909 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
910 {
911 ADDR_E_RETURNCODE returnCode = ADDR_OK;
912
913 if (pIn->numMipLevels > 1)
914 {
915 returnCode = ADDR_NOTIMPLEMENTED;
916 }
917 else
918 {
919 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
920 input.size = sizeof(input);
921 input.hTileFlags = pIn->hTileFlags;
922 input.swizzleMode = pIn->swizzleMode;
923 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
924 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
925 input.numSlices = Max(pIn->numSlices, 1u);
926 input.numMipLevels = Max(pIn->numMipLevels, 1u);
927
928 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
929 output.size = sizeof(output);
930
931 returnCode = ComputeHtileInfo(&input, &output);
932
933 if (returnCode == ADDR_OK)
934 {
935 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
936 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
937 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
938 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
939
940 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
941 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
942 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
943
944 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
945
946 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
947 pIn->swizzleMode);
948
949 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
950
951 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
952
953 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
954 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
955
956 UINT_32 x, y, z, s, m;
957 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
958
959 pOut->slice = m / sliceSizeInBlock;
960 pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
961 pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x;
962 }
963 }
964
965 return returnCode;
966 }
967
968 /**
969 ************************************************************************************************************************
970 * Gfx9Lib::HwlComputeDccAddrFromCoord
971 *
972 * @brief
973 * Interface function stub of AddrComputeDccAddrFromCoord
974 *
975 * @return
976 * ADDR_E_RETURNCODE
977 ************************************************************************************************************************
978 */
979 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
980 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
981 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
982 {
983 ADDR_E_RETURNCODE returnCode = ADDR_OK;
984
985 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
986 {
987 returnCode = ADDR_NOTIMPLEMENTED;
988 }
989 else
990 {
991 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
992 input.size = sizeof(input);
993 input.dccKeyFlags = pIn->dccKeyFlags;
994 input.colorFlags = pIn->colorFlags;
995 input.swizzleMode = pIn->swizzleMode;
996 input.resourceType = pIn->resourceType;
997 input.bpp = pIn->bpp;
998 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
999 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
1000 input.numSlices = Max(pIn->numSlices, 1u);
1001 input.numFrags = Max(pIn->numFrags, 1u);
1002 input.numMipLevels = Max(pIn->numMipLevels, 1u);
1003
1004 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
1005 output.size = sizeof(output);
1006
1007 returnCode = ComputeDccInfo(&input, &output);
1008
1009 if (returnCode == ADDR_OK)
1010 {
1011 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1012 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
1013 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
1014 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1015 UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
1016 UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
1017 UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
1018 UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
1019
1020 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1021 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1022 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1023 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1024
1025 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1026
1027 UINT_32 xb = pIn->x / output.metaBlkWidth;
1028 UINT_32 yb = pIn->y / output.metaBlkHeight;
1029 UINT_32 zb = pIn->slice / output.metaBlkDepth;
1030
1031 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
1032 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1033 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1034
1035 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
1036
1037 pOut->addr = address >> 1;
1038
1039 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1040 pIn->swizzleMode);
1041
1042 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1043
1044 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1045 }
1046 }
1047
1048 return returnCode;
1049 }
1050
1051 /**
1052 ************************************************************************************************************************
1053 * Gfx9Lib::HwlInitGlobalParams
1054 *
1055 * @brief
1056 * Initializes global parameters
1057 *
1058 * @return
1059 * TRUE if all settings are valid
1060 *
1061 ************************************************************************************************************************
1062 */
1063 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1064 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1065 {
1066 BOOL_32 valid = TRUE;
1067
1068 if (m_settings.isArcticIsland)
1069 {
1070 GB_ADDR_CONFIG gbAddrConfig;
1071
1072 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1073
1074 // These values are copied from CModel code
1075 switch (gbAddrConfig.bits.NUM_PIPES)
1076 {
1077 case ADDR_CONFIG_1_PIPE:
1078 m_pipes = 1;
1079 m_pipesLog2 = 0;
1080 break;
1081 case ADDR_CONFIG_2_PIPE:
1082 m_pipes = 2;
1083 m_pipesLog2 = 1;
1084 break;
1085 case ADDR_CONFIG_4_PIPE:
1086 m_pipes = 4;
1087 m_pipesLog2 = 2;
1088 break;
1089 case ADDR_CONFIG_8_PIPE:
1090 m_pipes = 8;
1091 m_pipesLog2 = 3;
1092 break;
1093 case ADDR_CONFIG_16_PIPE:
1094 m_pipes = 16;
1095 m_pipesLog2 = 4;
1096 break;
1097 case ADDR_CONFIG_32_PIPE:
1098 m_pipes = 32;
1099 m_pipesLog2 = 5;
1100 break;
1101 default:
1102 ADDR_ASSERT_ALWAYS();
1103 break;
1104 }
1105
1106 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1107 {
1108 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1109 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1110 m_pipeInterleaveLog2 = 8;
1111 break;
1112 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1113 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1114 m_pipeInterleaveLog2 = 9;
1115 break;
1116 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1117 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1118 m_pipeInterleaveLog2 = 10;
1119 break;
1120 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1121 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1122 m_pipeInterleaveLog2 = 11;
1123 break;
1124 default:
1125 ADDR_ASSERT_ALWAYS();
1126 break;
1127 }
1128
1129 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1130 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1131 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1132
1133 switch (gbAddrConfig.bits.NUM_BANKS)
1134 {
1135 case ADDR_CONFIG_1_BANK:
1136 m_banks = 1;
1137 m_banksLog2 = 0;
1138 break;
1139 case ADDR_CONFIG_2_BANK:
1140 m_banks = 2;
1141 m_banksLog2 = 1;
1142 break;
1143 case ADDR_CONFIG_4_BANK:
1144 m_banks = 4;
1145 m_banksLog2 = 2;
1146 break;
1147 case ADDR_CONFIG_8_BANK:
1148 m_banks = 8;
1149 m_banksLog2 = 3;
1150 break;
1151 case ADDR_CONFIG_16_BANK:
1152 m_banks = 16;
1153 m_banksLog2 = 4;
1154 break;
1155 default:
1156 ADDR_ASSERT_ALWAYS();
1157 break;
1158 }
1159
1160 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1161 {
1162 case ADDR_CONFIG_1_SHADER_ENGINE:
1163 m_se = 1;
1164 m_seLog2 = 0;
1165 break;
1166 case ADDR_CONFIG_2_SHADER_ENGINE:
1167 m_se = 2;
1168 m_seLog2 = 1;
1169 break;
1170 case ADDR_CONFIG_4_SHADER_ENGINE:
1171 m_se = 4;
1172 m_seLog2 = 2;
1173 break;
1174 case ADDR_CONFIG_8_SHADER_ENGINE:
1175 m_se = 8;
1176 m_seLog2 = 3;
1177 break;
1178 default:
1179 ADDR_ASSERT_ALWAYS();
1180 break;
1181 }
1182
1183 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1184 {
1185 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1186 m_rbPerSe = 1;
1187 m_rbPerSeLog2 = 0;
1188 break;
1189 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1190 m_rbPerSe = 2;
1191 m_rbPerSeLog2 = 1;
1192 break;
1193 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1194 m_rbPerSe = 4;
1195 m_rbPerSeLog2 = 2;
1196 break;
1197 default:
1198 ADDR_ASSERT_ALWAYS();
1199 break;
1200 }
1201
1202 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1203 {
1204 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1205 m_maxCompFrag = 1;
1206 m_maxCompFragLog2 = 0;
1207 break;
1208 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1209 m_maxCompFrag = 2;
1210 m_maxCompFragLog2 = 1;
1211 break;
1212 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1213 m_maxCompFrag = 4;
1214 m_maxCompFragLog2 = 2;
1215 break;
1216 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1217 m_maxCompFrag = 8;
1218 m_maxCompFragLog2 = 3;
1219 break;
1220 default:
1221 ADDR_ASSERT_ALWAYS();
1222 break;
1223 }
1224
1225 m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1226 ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1227 ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1228 m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1229
1230 if ((m_rbPerSeLog2 == 1) &&
1231 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1232 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1233 {
1234 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1235 ADDR_ASSERT(m_settings.isRaven == FALSE);
1236 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1237
1238 if (m_settings.isVega12)
1239 {
1240 m_settings.htileCacheRbConflict = 1;
1241 }
1242 }
1243 }
1244 else
1245 {
1246 valid = FALSE;
1247 ADDR_NOT_IMPLEMENTED();
1248 }
1249
1250 if (valid)
1251 {
1252 InitEquationTable();
1253 }
1254
1255 return valid;
1256 }
1257
1258 /**
1259 ************************************************************************************************************************
1260 * Gfx9Lib::HwlConvertChipFamily
1261 *
1262 * @brief
1263 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1264 * @return
1265 * ChipFamily
1266 ************************************************************************************************************************
1267 */
1268 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1269 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1270 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1271 {
1272 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1273
1274 switch (uChipFamily)
1275 {
1276 case FAMILY_AI:
1277 m_settings.isArcticIsland = 1;
1278 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1279 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1280 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1281 m_settings.isDce12 = 1;
1282
1283 if (m_settings.isVega10 == 0)
1284 {
1285 m_settings.htileAlignFix = 1;
1286 m_settings.applyAliasFix = 1;
1287 }
1288
1289 m_settings.metaBaseAlignFix = 1;
1290
1291 m_settings.depthPipeXorDisable = 1;
1292 break;
1293 case FAMILY_RV:
1294 m_settings.isArcticIsland = 1;
1295
1296 if (ASICREV_IS_RAVEN(uChipRevision))
1297 {
1298 m_settings.isRaven = 1;
1299
1300 m_settings.depthPipeXorDisable = 1;
1301 }
1302
1303 if (ASICREV_IS_RAVEN2(uChipRevision))
1304 {
1305 m_settings.isRaven = 1;
1306 }
1307
1308 if (m_settings.isRaven == 0)
1309 {
1310 m_settings.htileAlignFix = 1;
1311 m_settings.applyAliasFix = 1;
1312 }
1313
1314 m_settings.isDcn1 = m_settings.isRaven;
1315
1316 m_settings.metaBaseAlignFix = 1;
1317 break;
1318
1319 default:
1320 ADDR_ASSERT(!"This should be a Fusion");
1321 break;
1322 }
1323
1324 return family;
1325 }
1326
1327 /**
1328 ************************************************************************************************************************
1329 * Gfx9Lib::InitRbEquation
1330 *
1331 * @brief
1332 * Init RB equation
1333 * @return
1334 * N/A
1335 ************************************************************************************************************************
1336 */
1337 VOID Gfx9Lib::GetRbEquation(
1338 CoordEq* pRbEq, ///< [out] rb equation
1339 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1340 UINT_32 numSeLog2) ///< [in] number of shader engine
1341 const
1342 {
1343 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1344 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1345 Coordinate cx('x', rbRegion);
1346 Coordinate cy('y', rbRegion);
1347
1348 UINT_32 start = 0;
1349 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1350
1351 // Clear the rb equation
1352 pRbEq->resize(0);
1353 pRbEq->resize(numRbTotalLog2);
1354
1355 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1356 {
1357 // Special case when more than 1 SE, and 2 RB per SE
1358 (*pRbEq)[0].add(cx);
1359 (*pRbEq)[0].add(cy);
1360 cx++;
1361 cy++;
1362
1363 if (m_settings.applyAliasFix == false)
1364 {
1365 (*pRbEq)[0].add(cy);
1366 }
1367
1368 (*pRbEq)[0].add(cy);
1369 start++;
1370 }
1371
1372 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1373
1374 for (UINT_32 i = 0; i < numBits; i++)
1375 {
1376 UINT_32 idx =
1377 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1378
1379 if ((i % 2) == 1)
1380 {
1381 (*pRbEq)[idx].add(cx);
1382 cx++;
1383 }
1384 else
1385 {
1386 (*pRbEq)[idx].add(cy);
1387 cy++;
1388 }
1389 }
1390 }
1391
1392 /**
1393 ************************************************************************************************************************
1394 * Gfx9Lib::GetDataEquation
1395 *
1396 * @brief
1397 * Get data equation for fmask and Z
1398 * @return
1399 * N/A
1400 ************************************************************************************************************************
1401 */
1402 VOID Gfx9Lib::GetDataEquation(
1403 CoordEq* pDataEq, ///< [out] data surface equation
1404 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1405 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1406 AddrResourceType resourceType, ///< [in] data surface resource type
1407 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1408 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1409 const
1410 {
1411 Coordinate cx('x', 0);
1412 Coordinate cy('y', 0);
1413 Coordinate cz('z', 0);
1414 Coordinate cs('s', 0);
1415
1416 // Clear the equation
1417 pDataEq->resize(0);
1418 pDataEq->resize(27);
1419
1420 if (dataSurfaceType == Gfx9DataColor)
1421 {
1422 if (IsLinear(swizzleMode))
1423 {
1424 Coordinate cm('m', 0);
1425
1426 pDataEq->resize(49);
1427
1428 for (UINT_32 i = 0; i < 49; i++)
1429 {
1430 (*pDataEq)[i].add(cm);
1431 cm++;
1432 }
1433 }
1434 else if (IsThick(resourceType, swizzleMode))
1435 {
1436 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1437 UINT_32 i;
1438 if (IsStandardSwizzle(resourceType, swizzleMode))
1439 {
1440 // Standard 3d swizzle
1441 // Fill in bottom x bits
1442 for (i = elementBytesLog2; i < 4; i++)
1443 {
1444 (*pDataEq)[i].add(cx);
1445 cx++;
1446 }
1447 // Fill in 2 bits of y and then z
1448 for (i = 4; i < 6; i++)
1449 {
1450 (*pDataEq)[i].add(cy);
1451 cy++;
1452 }
1453 for (i = 6; i < 8; i++)
1454 {
1455 (*pDataEq)[i].add(cz);
1456 cz++;
1457 }
1458 if (elementBytesLog2 < 2)
1459 {
1460 // fill in z & y bit
1461 (*pDataEq)[8].add(cz);
1462 (*pDataEq)[9].add(cy);
1463 cz++;
1464 cy++;
1465 }
1466 else if (elementBytesLog2 == 2)
1467 {
1468 // fill in y and x bit
1469 (*pDataEq)[8].add(cy);
1470 (*pDataEq)[9].add(cx);
1471 cy++;
1472 cx++;
1473 }
1474 else
1475 {
1476 // fill in 2 x bits
1477 (*pDataEq)[8].add(cx);
1478 cx++;
1479 (*pDataEq)[9].add(cx);
1480 cx++;
1481 }
1482 }
1483 else
1484 {
1485 // Z 3d swizzle
1486 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1487 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1488 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1489 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1490 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1491 {
1492 (*pDataEq)[i].add(cz);
1493 cz++;
1494 }
1495 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1496 {
1497 // add an x and z
1498 (*pDataEq)[6].add(cx);
1499 (*pDataEq)[7].add(cz);
1500 cx++;
1501 cz++;
1502 }
1503 else if (elementBytesLog2 == 2)
1504 {
1505 // add a y and z
1506 (*pDataEq)[6].add(cy);
1507 (*pDataEq)[7].add(cz);
1508 cy++;
1509 cz++;
1510 }
1511 // add y and x
1512 (*pDataEq)[8].add(cy);
1513 (*pDataEq)[9].add(cx);
1514 cy++;
1515 cx++;
1516 }
1517 // Fill in bit 10 and up
1518 pDataEq->mort3d( cz, cy, cx, 10 );
1519 }
1520 else if (IsThin(resourceType, swizzleMode))
1521 {
1522 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1523 // Color 2D
1524 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1525 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1526 UINT_32 i;
1527 // Fill in bottom x bits
1528 for (i = elementBytesLog2; i < 4; i++)
1529 {
1530 (*pDataEq)[i].add(cx);
1531 cx++;
1532 }
1533 // Fill in bottom y bits
1534 for (i = 4; i < 4 + microYBits; i++)
1535 {
1536 (*pDataEq)[i].add(cy);
1537 cy++;
1538 }
1539 // Fill in last of the micro_x bits
1540 for (i = 4 + microYBits; i < 8; i++)
1541 {
1542 (*pDataEq)[i].add(cx);
1543 cx++;
1544 }
1545 // Fill in x/y bits below sample split
1546 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1547 // Fill in sample bits
1548 for (i = 0; i < numSamplesLog2; i++)
1549 {
1550 cs.set('s', i);
1551 (*pDataEq)[tileSplitStart + i].add(cs);
1552 }
1553 // Fill in x/y bits above sample split
1554 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1555 {
1556 pDataEq->mort2d(cx, cy, blockSizeLog2);
1557 }
1558 else
1559 {
1560 pDataEq->mort2d(cy, cx, blockSizeLog2);
1561 }
1562 }
1563 else
1564 {
1565 ADDR_ASSERT_ALWAYS();
1566 }
1567 }
1568 else
1569 {
1570 // Fmask or depth
1571 UINT_32 sampleStart = elementBytesLog2;
1572 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1573 UINT_32 ymajStart = 6 + numSamplesLog2;
1574
1575 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1576 {
1577 cs.set('s', s);
1578 (*pDataEq)[sampleStart + s].add(cs);
1579 }
1580
1581 // Put in the x-major order pixel bits
1582 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1583 // Put in the y-major order pixel bits
1584 pDataEq->mort2d(cy, cx, ymajStart);
1585 }
1586 }
1587
1588 /**
1589 ************************************************************************************************************************
1590 * Gfx9Lib::GetPipeEquation
1591 *
1592 * @brief
1593 * Get pipe equation
1594 * @return
1595 * N/A
1596 ************************************************************************************************************************
1597 */
1598 VOID Gfx9Lib::GetPipeEquation(
1599 CoordEq* pPipeEq, ///< [out] pipe equation
1600 CoordEq* pDataEq, ///< [in] data equation
1601 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1602 UINT_32 numPipeLog2, ///< [in] number of pipes
1603 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1604 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1605 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1606 AddrResourceType resourceType ///< [in] data surface resource type
1607 ) const
1608 {
1609 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1610 CoordEq dataEq;
1611
1612 pDataEq->copy(dataEq);
1613
1614 if (dataSurfaceType == Gfx9DataColor)
1615 {
1616 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1617 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1618 }
1619
1620 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1621
1622 // This section should only apply to z/stencil, maybe fmask
1623 // If the pipe bit is below the comp block size,
1624 // then keep moving up the address until we find a bit that is above
1625 UINT_32 pipeStart = 0;
1626
1627 if (dataSurfaceType != Gfx9DataColor)
1628 {
1629 Coordinate tileMin('x', 3);
1630
1631 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1632 {
1633 pipeStart++;
1634 }
1635
1636 // if pipe is 0, then the first pipe bit is above the comp block size,
1637 // so we don't need to do anything
1638 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1639 // we will get the same pipe equation
1640 if (pipeStart != 0)
1641 {
1642 for (UINT_32 i = 0; i < numPipeLog2; i++)
1643 {
1644 // Copy the jth bit above pipe interleave to the current pipe equation bit
1645 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1646 }
1647 }
1648 }
1649
1650 if (IsPrt(swizzleMode))
1651 {
1652 // Clear out bits above the block size if prt's are enabled
1653 dataEq.resize(blockSizeLog2);
1654 dataEq.resize(48);
1655 }
1656
1657 if (IsXor(swizzleMode))
1658 {
1659 CoordEq xorMask;
1660
1661 if (IsThick(resourceType, swizzleMode))
1662 {
1663 CoordEq xorMask2;
1664
1665 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1666
1667 xorMask.resize(numPipeLog2);
1668
1669 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1670 {
1671 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1672 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1673 }
1674 }
1675 else
1676 {
1677 // Xor in the bits above the pipe+gpu bits
1678 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1679
1680 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1681 {
1682 Coordinate co;
1683 CoordEq xorMask2;
1684 // if 1xaa and not prt, then xor in the z bits
1685 xorMask2.resize(0);
1686 xorMask2.resize(numPipeLog2);
1687 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1688 {
1689 co.set('z', numPipeLog2 - 1 - pipeIdx);
1690 xorMask2[pipeIdx].add(co);
1691 }
1692
1693 pPipeEq->xorin(xorMask2);
1694 }
1695 }
1696
1697 xorMask.reverse();
1698 pPipeEq->xorin(xorMask);
1699 }
1700 }
1701 /**
1702 ************************************************************************************************************************
1703 * Gfx9Lib::GetMetaEquation
1704 *
1705 * @brief
1706 * Get meta equation for cmask/htile/DCC
1707 * @return
1708 * Pointer to a calculated meta equation
1709 ************************************************************************************************************************
1710 */
1711 const CoordEq* Gfx9Lib::GetMetaEquation(
1712 const MetaEqParams& metaEqParams)
1713 {
1714 UINT_32 cachedMetaEqIndex;
1715
1716 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1717 {
1718 if (memcmp(&metaEqParams,
1719 &m_cachedMetaEqKey[cachedMetaEqIndex],
1720 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1721 {
1722 break;
1723 }
1724 }
1725
1726 CoordEq* pMetaEq = NULL;
1727
1728 if (cachedMetaEqIndex < MaxCachedMetaEq)
1729 {
1730 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1731 }
1732 else
1733 {
1734 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1735
1736 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1737
1738 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1739
1740 GenMetaEquation(pMetaEq,
1741 metaEqParams.maxMip,
1742 metaEqParams.elementBytesLog2,
1743 metaEqParams.numSamplesLog2,
1744 metaEqParams.metaFlag,
1745 metaEqParams.dataSurfaceType,
1746 metaEqParams.swizzleMode,
1747 metaEqParams.resourceType,
1748 metaEqParams.metaBlkWidthLog2,
1749 metaEqParams.metaBlkHeightLog2,
1750 metaEqParams.metaBlkDepthLog2,
1751 metaEqParams.compBlkWidthLog2,
1752 metaEqParams.compBlkHeightLog2,
1753 metaEqParams.compBlkDepthLog2);
1754 }
1755
1756 return pMetaEq;
1757 }
1758
1759 /**
1760 ************************************************************************************************************************
1761 * Gfx9Lib::GenMetaEquation
1762 *
1763 * @brief
1764 * Get meta equation for cmask/htile/DCC
1765 * @return
1766 * N/A
1767 ************************************************************************************************************************
1768 */
1769 VOID Gfx9Lib::GenMetaEquation(
1770 CoordEq* pMetaEq, ///< [out] meta equation
1771 UINT_32 maxMip, ///< [in] max mip Id
1772 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1773 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1774 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1775 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1776 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1777 AddrResourceType resourceType, ///< [in] data surface resource type
1778 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1779 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1780 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1781 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1782 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1783 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1784 const
1785 {
1786 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1787 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1788
1789 // Get the correct data address and rb equation
1790 CoordEq dataEq;
1791 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1792 elementBytesLog2, numSamplesLog2);
1793
1794 // Get pipe and rb equations
1795 CoordEq pipeEquation;
1796 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1797 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1798 numPipeTotalLog2 = pipeEquation.getsize();
1799
1800 if (metaFlag.linear)
1801 {
1802 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1803 ADDR_ASSERT_ALWAYS();
1804
1805 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1806
1807 dataEq.copy(*pMetaEq);
1808
1809 if (IsLinear(swizzleMode))
1810 {
1811 if (metaFlag.pipeAligned)
1812 {
1813 // Remove the pipe bits
1814 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1815 pMetaEq->shift(-shift, pipeInterleaveLog2);
1816 }
1817 // Divide by comp block size, which for linear (which is always color) is 256 B
1818 pMetaEq->shift(-8);
1819
1820 if (metaFlag.pipeAligned)
1821 {
1822 // Put pipe bits back in
1823 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1824
1825 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1826 {
1827 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1828 }
1829 }
1830 }
1831
1832 pMetaEq->shift(1);
1833 }
1834 else
1835 {
1836 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1837 UINT_32 compFragLog2 =
1838 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1839 maxCompFragLog2 : numSamplesLog2;
1840
1841 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1842
1843 // Make sure the metaaddr is cleared
1844 pMetaEq->resize(0);
1845 pMetaEq->resize(27);
1846
1847 if (IsThick(resourceType, swizzleMode))
1848 {
1849 Coordinate cx('x', 0);
1850 Coordinate cy('y', 0);
1851 Coordinate cz('z', 0);
1852
1853 if (maxMip > 0)
1854 {
1855 pMetaEq->mort3d(cy, cx, cz);
1856 }
1857 else
1858 {
1859 pMetaEq->mort3d(cx, cy, cz);
1860 }
1861 }
1862 else
1863 {
1864 Coordinate cx('x', 0);
1865 Coordinate cy('y', 0);
1866 Coordinate cs;
1867
1868 if (maxMip > 0)
1869 {
1870 pMetaEq->mort2d(cy, cx, compFragLog2);
1871 }
1872 else
1873 {
1874 pMetaEq->mort2d(cx, cy, compFragLog2);
1875 }
1876
1877 //------------------------------------------------------------------------------------------------------------------------
1878 // Put the compressible fragments at the lsb
1879 // the uncompressible frags will be at the msb of the micro address
1880 //------------------------------------------------------------------------------------------------------------------------
1881 for (UINT_32 s = 0; s < compFragLog2; s++)
1882 {
1883 cs.set('s', s);
1884 (*pMetaEq)[s].add(cs);
1885 }
1886 }
1887
1888 // Keep a copy of the pipe equations
1889 CoordEq origPipeEquation;
1890 pipeEquation.copy(origPipeEquation);
1891
1892 Coordinate co;
1893 // filter out everything under the compressed block size
1894 co.set('x', compBlkWidthLog2);
1895 pMetaEq->Filter('<', co, 0, 'x');
1896 co.set('y', compBlkHeightLog2);
1897 pMetaEq->Filter('<', co, 0, 'y');
1898 co.set('z', compBlkDepthLog2);
1899 pMetaEq->Filter('<', co, 0, 'z');
1900
1901 // For non-color, filter out sample bits
1902 if (dataSurfaceType != Gfx9DataColor)
1903 {
1904 co.set('x', 0);
1905 pMetaEq->Filter('<', co, 0, 's');
1906 }
1907
1908 // filter out everything above the metablock size
1909 co.set('x', metaBlkWidthLog2 - 1);
1910 pMetaEq->Filter('>', co, 0, 'x');
1911 co.set('y', metaBlkHeightLog2 - 1);
1912 pMetaEq->Filter('>', co, 0, 'y');
1913 co.set('z', metaBlkDepthLog2 - 1);
1914 pMetaEq->Filter('>', co, 0, 'z');
1915
1916 // filter out everything above the metablock size for the channel bits
1917 co.set('x', metaBlkWidthLog2 - 1);
1918 pipeEquation.Filter('>', co, 0, 'x');
1919 co.set('y', metaBlkHeightLog2 - 1);
1920 pipeEquation.Filter('>', co, 0, 'y');
1921 co.set('z', metaBlkDepthLog2 - 1);
1922 pipeEquation.Filter('>', co, 0, 'z');
1923
1924 // Make sure we still have the same number of channel bits
1925 if (pipeEquation.getsize() != numPipeTotalLog2)
1926 {
1927 ADDR_ASSERT_ALWAYS();
1928 }
1929
1930 // Loop through all channel and rb bits,
1931 // and make sure these components exist in the metadata address
1932 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1933 {
1934 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1935 {
1936 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1937 {
1938 ADDR_ASSERT_ALWAYS();
1939 }
1940 }
1941 }
1942
1943 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1944 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1945 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1946 CoordEq origRbEquation;
1947
1948 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1949
1950 CoordEq rbEquation = origRbEquation;
1951
1952 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1953 {
1954 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1955 {
1956 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1957 {
1958 ADDR_ASSERT_ALWAYS();
1959 }
1960 }
1961 }
1962
1963 if (m_settings.applyAliasFix)
1964 {
1965 co.set('z', -1);
1966 }
1967
1968 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1969 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1970 {
1971 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1972 {
1973 BOOL_32 isRbEquationInPipeEquation = FALSE;
1974
1975 if (m_settings.applyAliasFix)
1976 {
1977 CoordTerm filteredPipeEq;
1978 filteredPipeEq = pipeEquation[j];
1979
1980 filteredPipeEq.Filter('>', co, 0, 'z');
1981
1982 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1983 }
1984 else
1985 {
1986 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1987 }
1988
1989 if (isRbEquationInPipeEquation)
1990 {
1991 rbEquation[i].Clear();
1992 }
1993 }
1994 }
1995
1996 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1997
1998 // Loop through each bit of the channel, get the smallest coordinate,
1999 // and remove it from the metaaddr, and rb_equation
2000 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2001 {
2002 pipeEquation[i].getsmallest(co);
2003
2004 UINT_32 old_size = pMetaEq->getsize();
2005 pMetaEq->Filter('=', co);
2006 UINT_32 new_size = pMetaEq->getsize();
2007 if (new_size != old_size-1)
2008 {
2009 ADDR_ASSERT_ALWAYS();
2010 }
2011 pipeEquation.remove(co);
2012 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2013 {
2014 if (rbEquation[j].remove(co))
2015 {
2016 // if we actually removed something from this bit, then add the remaining
2017 // channel bits, as these can be removed for this bit
2018 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2019 {
2020 if (pipeEquation[i][k] != co)
2021 {
2022 rbEquation[j].add(pipeEquation[i][k]);
2023 rbAppendedWithPipeBits[j] = true;
2024 }
2025 }
2026 }
2027 }
2028 }
2029
2030 // Loop through the rb bits and see what remain;
2031 // filter out the smallest coordinate if it remains
2032 UINT_32 rbBitsLeft = 0;
2033 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2034 {
2035 BOOL_32 isRbEqAppended = FALSE;
2036
2037 if (m_settings.applyAliasFix)
2038 {
2039 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2040 }
2041 else
2042 {
2043 isRbEqAppended = (rbEquation[i].getsize() > 0);
2044 }
2045
2046 if (isRbEqAppended)
2047 {
2048 rbBitsLeft++;
2049 rbEquation[i].getsmallest(co);
2050 UINT_32 old_size = pMetaEq->getsize();
2051 pMetaEq->Filter('=', co);
2052 UINT_32 new_size = pMetaEq->getsize();
2053 if (new_size != old_size - 1)
2054 {
2055 // assert warning
2056 }
2057 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2058 {
2059 if (rbEquation[j].remove(co))
2060 {
2061 // if we actually removed something from this bit, then add the remaining
2062 // rb bits, as these can be removed for this bit
2063 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2064 {
2065 if (rbEquation[i][k] != co)
2066 {
2067 rbEquation[j].add(rbEquation[i][k]);
2068 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2069 }
2070 }
2071 }
2072 }
2073 }
2074 }
2075
2076 // capture the size of the metaaddr
2077 UINT_32 metaSize = pMetaEq->getsize();
2078 // resize to 49 bits...make this a nibble address
2079 pMetaEq->resize(49);
2080 // Concatenate the macro address above the current address
2081 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2082 {
2083 co.set('m', j);
2084 (*pMetaEq)[i].add(co);
2085 }
2086
2087 // Multiply by meta element size (in nibbles)
2088 if (dataSurfaceType == Gfx9DataColor)
2089 {
2090 pMetaEq->shift(1);
2091 }
2092 else if (dataSurfaceType == Gfx9DataDepthStencil)
2093 {
2094 pMetaEq->shift(3);
2095 }
2096
2097 //------------------------------------------------------------------------------------------
2098 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2099 // Shift up from pipe interleave number of channel
2100 // and rb bits left, and uncompressed fragments
2101 //------------------------------------------------------------------------------------------
2102
2103 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2104
2105 // Put in the channel bits
2106 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2107 {
2108 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2109 }
2110
2111 // Put in remaining rb bits
2112 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2113 {
2114 BOOL_32 isRbEqAppended = FALSE;
2115
2116 if (m_settings.applyAliasFix)
2117 {
2118 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2119 }
2120 else
2121 {
2122 isRbEqAppended = (rbEquation[i].getsize() > 0);
2123 }
2124
2125 if (isRbEqAppended)
2126 {
2127 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2128 // Mark any rb bit we add in to the rb mask
2129 j++;
2130 }
2131 }
2132
2133 //------------------------------------------------------------------------------------------
2134 // Put in the uncompressed fragment bits
2135 //------------------------------------------------------------------------------------------
2136 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2137 {
2138 co.set('s', compFragLog2 + i);
2139 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2140 }
2141 }
2142 }
2143
2144 /**
2145 ************************************************************************************************************************
2146 * Gfx9Lib::IsEquationSupported
2147 *
2148 * @brief
2149 * Check if equation is supported for given swizzle mode and resource type.
2150 *
2151 * @return
2152 * TRUE if supported
2153 ************************************************************************************************************************
2154 */
2155 BOOL_32 Gfx9Lib::IsEquationSupported(
2156 AddrResourceType rsrcType,
2157 AddrSwizzleMode swMode,
2158 UINT_32 elementBytesLog2) const
2159 {
2160 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2161 (IsLinear(swMode) == FALSE) &&
2162 (((IsTex2d(rsrcType) == TRUE) &&
2163 ((elementBytesLog2 < 4) ||
2164 ((IsRotateSwizzle(swMode) == FALSE) &&
2165 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2166 ((IsTex3d(rsrcType) == TRUE) &&
2167 (IsRotateSwizzle(swMode) == FALSE) &&
2168 (IsBlock256b(swMode) == FALSE)));
2169
2170 return supported;
2171 }
2172
2173 /**
2174 ************************************************************************************************************************
2175 * Gfx9Lib::InitEquationTable
2176 *
2177 * @brief
2178 * Initialize Equation table.
2179 *
2180 * @return
2181 * N/A
2182 ************************************************************************************************************************
2183 */
2184 VOID Gfx9Lib::InitEquationTable()
2185 {
2186 memset(m_equationTable, 0, sizeof(m_equationTable));
2187
2188 // Loop all possible resource type (2D/3D)
2189 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2190 {
2191 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2192
2193 // Loop all possible swizzle mode
2194 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
2195 {
2196 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2197
2198 // Loop all possible bpp
2199 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2200 {
2201 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2202
2203 // Check if the input is supported
2204 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2205 {
2206 ADDR_EQUATION equation;
2207 ADDR_E_RETURNCODE retCode;
2208
2209 memset(&equation, 0, sizeof(ADDR_EQUATION));
2210
2211 // Generate the equation
2212 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2213 {
2214 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2215 }
2216 else if (IsThin(rsrcType, swMode))
2217 {
2218 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2219 }
2220 else
2221 {
2222 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2223 }
2224
2225 // Only fill the equation into the table if the return code is ADDR_OK,
2226 // otherwise if the return code is not ADDR_OK, it indicates this is not
2227 // a valid input, we do nothing but just fill invalid equation index
2228 // into the lookup table.
2229 if (retCode == ADDR_OK)
2230 {
2231 equationIndex = m_numEquations;
2232 ADDR_ASSERT(equationIndex < EquationTableSize);
2233
2234 m_equationTable[equationIndex] = equation;
2235
2236 m_numEquations++;
2237 }
2238 else
2239 {
2240 ADDR_ASSERT_ALWAYS();
2241 }
2242 }
2243
2244 // Fill the index into the lookup table, if the combination is not supported
2245 // fill the invalid equation index
2246 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2247 }
2248 }
2249 }
2250 }
2251
2252 /**
2253 ************************************************************************************************************************
2254 * Gfx9Lib::HwlGetEquationIndex
2255 *
2256 * @brief
2257 * Interface function stub of GetEquationIndex
2258 *
2259 * @return
2260 * ADDR_E_RETURNCODE
2261 ************************************************************************************************************************
2262 */
2263 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2264 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2265 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2266 ) const
2267 {
2268 AddrResourceType rsrcType = pIn->resourceType;
2269 AddrSwizzleMode swMode = pIn->swizzleMode;
2270 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2271 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2272
2273 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2274 {
2275 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2276 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2277
2278 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2279 }
2280
2281 if (pOut->pMipInfo != NULL)
2282 {
2283 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2284 {
2285 pOut->pMipInfo[i].equationIndex = index;
2286 }
2287 }
2288
2289 return index;
2290 }
2291
2292 /**
2293 ************************************************************************************************************************
2294 * Gfx9Lib::HwlComputeBlock256Equation
2295 *
2296 * @brief
2297 * Interface function stub of ComputeBlock256Equation
2298 *
2299 * @return
2300 * ADDR_E_RETURNCODE
2301 ************************************************************************************************************************
2302 */
2303 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2304 AddrResourceType rsrcType,
2305 AddrSwizzleMode swMode,
2306 UINT_32 elementBytesLog2,
2307 ADDR_EQUATION* pEquation) const
2308 {
2309 ADDR_E_RETURNCODE ret = ADDR_OK;
2310
2311 pEquation->numBits = 8;
2312
2313 UINT_32 i = 0;
2314 for (; i < elementBytesLog2; i++)
2315 {
2316 InitChannel(1, 0 , i, &pEquation->addr[i]);
2317 }
2318
2319 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2320
2321 const UINT_32 maxBitsUsed = 4;
2322 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2323 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2324
2325 for (i = 0; i < maxBitsUsed; i++)
2326 {
2327 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2328 InitChannel(1, 1, i, &y[i]);
2329 }
2330
2331 if (IsStandardSwizzle(rsrcType, swMode))
2332 {
2333 switch (elementBytesLog2)
2334 {
2335 case 0:
2336 pixelBit[0] = x[0];
2337 pixelBit[1] = x[1];
2338 pixelBit[2] = x[2];
2339 pixelBit[3] = x[3];
2340 pixelBit[4] = y[0];
2341 pixelBit[5] = y[1];
2342 pixelBit[6] = y[2];
2343 pixelBit[7] = y[3];
2344 break;
2345 case 1:
2346 pixelBit[0] = x[0];
2347 pixelBit[1] = x[1];
2348 pixelBit[2] = x[2];
2349 pixelBit[3] = y[0];
2350 pixelBit[4] = y[1];
2351 pixelBit[5] = y[2];
2352 pixelBit[6] = x[3];
2353 break;
2354 case 2:
2355 pixelBit[0] = x[0];
2356 pixelBit[1] = x[1];
2357 pixelBit[2] = y[0];
2358 pixelBit[3] = y[1];
2359 pixelBit[4] = y[2];
2360 pixelBit[5] = x[2];
2361 break;
2362 case 3:
2363 pixelBit[0] = x[0];
2364 pixelBit[1] = y[0];
2365 pixelBit[2] = y[1];
2366 pixelBit[3] = x[1];
2367 pixelBit[4] = x[2];
2368 break;
2369 case 4:
2370 pixelBit[0] = y[0];
2371 pixelBit[1] = y[1];
2372 pixelBit[2] = x[0];
2373 pixelBit[3] = x[1];
2374 break;
2375 default:
2376 ADDR_ASSERT_ALWAYS();
2377 ret = ADDR_INVALIDPARAMS;
2378 break;
2379 }
2380 }
2381 else if (IsDisplaySwizzle(rsrcType, swMode))
2382 {
2383 switch (elementBytesLog2)
2384 {
2385 case 0:
2386 pixelBit[0] = x[0];
2387 pixelBit[1] = x[1];
2388 pixelBit[2] = x[2];
2389 pixelBit[3] = y[1];
2390 pixelBit[4] = y[0];
2391 pixelBit[5] = y[2];
2392 pixelBit[6] = x[3];
2393 pixelBit[7] = y[3];
2394 break;
2395 case 1:
2396 pixelBit[0] = x[0];
2397 pixelBit[1] = x[1];
2398 pixelBit[2] = x[2];
2399 pixelBit[3] = y[0];
2400 pixelBit[4] = y[1];
2401 pixelBit[5] = y[2];
2402 pixelBit[6] = x[3];
2403 break;
2404 case 2:
2405 pixelBit[0] = x[0];
2406 pixelBit[1] = x[1];
2407 pixelBit[2] = y[0];
2408 pixelBit[3] = x[2];
2409 pixelBit[4] = y[1];
2410 pixelBit[5] = y[2];
2411 break;
2412 case 3:
2413 pixelBit[0] = x[0];
2414 pixelBit[1] = y[0];
2415 pixelBit[2] = x[1];
2416 pixelBit[3] = x[2];
2417 pixelBit[4] = y[1];
2418 break;
2419 case 4:
2420 pixelBit[0] = x[0];
2421 pixelBit[1] = y[0];
2422 pixelBit[2] = x[1];
2423 pixelBit[3] = y[1];
2424 break;
2425 default:
2426 ADDR_ASSERT_ALWAYS();
2427 ret = ADDR_INVALIDPARAMS;
2428 break;
2429 }
2430 }
2431 else if (IsRotateSwizzle(swMode))
2432 {
2433 switch (elementBytesLog2)
2434 {
2435 case 0:
2436 pixelBit[0] = y[0];
2437 pixelBit[1] = y[1];
2438 pixelBit[2] = y[2];
2439 pixelBit[3] = x[1];
2440 pixelBit[4] = x[0];
2441 pixelBit[5] = x[2];
2442 pixelBit[6] = x[3];
2443 pixelBit[7] = y[3];
2444 break;
2445 case 1:
2446 pixelBit[0] = y[0];
2447 pixelBit[1] = y[1];
2448 pixelBit[2] = y[2];
2449 pixelBit[3] = x[0];
2450 pixelBit[4] = x[1];
2451 pixelBit[5] = x[2];
2452 pixelBit[6] = x[3];
2453 break;
2454 case 2:
2455 pixelBit[0] = y[0];
2456 pixelBit[1] = y[1];
2457 pixelBit[2] = x[0];
2458 pixelBit[3] = y[2];
2459 pixelBit[4] = x[1];
2460 pixelBit[5] = x[2];
2461 break;
2462 case 3:
2463 pixelBit[0] = y[0];
2464 pixelBit[1] = x[0];
2465 pixelBit[2] = y[1];
2466 pixelBit[3] = x[1];
2467 pixelBit[4] = x[2];
2468 break;
2469 default:
2470 ADDR_ASSERT_ALWAYS();
2471 case 4:
2472 ret = ADDR_INVALIDPARAMS;
2473 break;
2474 }
2475 }
2476 else
2477 {
2478 ADDR_ASSERT_ALWAYS();
2479 ret = ADDR_INVALIDPARAMS;
2480 }
2481
2482 // Post validation
2483 if (ret == ADDR_OK)
2484 {
2485 MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2486 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2487 (microBlockDim.w * (1 << elementBytesLog2)));
2488 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2489 }
2490
2491 return ret;
2492 }
2493
2494 /**
2495 ************************************************************************************************************************
2496 * Gfx9Lib::HwlComputeThinEquation
2497 *
2498 * @brief
2499 * Interface function stub of ComputeThinEquation
2500 *
2501 * @return
2502 * ADDR_E_RETURNCODE
2503 ************************************************************************************************************************
2504 */
2505 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2506 AddrResourceType rsrcType,
2507 AddrSwizzleMode swMode,
2508 UINT_32 elementBytesLog2,
2509 ADDR_EQUATION* pEquation) const
2510 {
2511 ADDR_E_RETURNCODE ret = ADDR_OK;
2512
2513 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2514
2515 UINT_32 maxXorBits = blockSizeLog2;
2516 if (IsNonPrtXor(swMode))
2517 {
2518 // For non-prt-xor, maybe need to initialize some more bits for xor
2519 // The highest xor bit used in equation will be max the following 3 items:
2520 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2521 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2522 // 3. blockSizeLog2
2523
2524 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2525 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2526 GetPipeXorBits(blockSizeLog2) +
2527 2 * GetBankXorBits(blockSizeLog2));
2528 }
2529
2530 const UINT_32 maxBitsUsed = 14;
2531 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2532 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2533 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2534
2535 const UINT_32 extraXorBits = 16;
2536 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2537 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2538
2539 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2540 {
2541 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2542 InitChannel(1, 1, i, &y[i]);
2543 }
2544
2545 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2546
2547 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2548 {
2549 InitChannel(1, 0 , i, &pixelBit[i]);
2550 }
2551
2552 UINT_32 xIdx = 0;
2553 UINT_32 yIdx = 0;
2554 UINT_32 lowBits = 0;
2555
2556 if (IsZOrderSwizzle(swMode))
2557 {
2558 if (elementBytesLog2 <= 3)
2559 {
2560 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2561 {
2562 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2563 }
2564
2565 lowBits = 6;
2566 }
2567 else
2568 {
2569 ret = ADDR_INVALIDPARAMS;
2570 }
2571 }
2572 else
2573 {
2574 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2575
2576 if (ret == ADDR_OK)
2577 {
2578 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2579 xIdx = Log2(microBlockDim.w);
2580 yIdx = Log2(microBlockDim.h);
2581 lowBits = 8;
2582 }
2583 }
2584
2585 if (ret == ADDR_OK)
2586 {
2587 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2588 {
2589 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2590 }
2591
2592 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2593 {
2594 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2595 }
2596
2597 if (IsXor(swMode))
2598 {
2599 // Fill XOR bits
2600 UINT_32 pipeStart = m_pipeInterleaveLog2;
2601 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2602
2603 UINT_32 bankStart = pipeStart + pipeXorBits;
2604 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2605
2606 for (UINT_32 i = 0; i < pipeXorBits; i++)
2607 {
2608 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2609 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2610 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2611
2612 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2613 }
2614
2615 for (UINT_32 i = 0; i < bankXorBits; i++)
2616 {
2617 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2618 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2619 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2620
2621 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2622 }
2623
2624 if (IsPrt(swMode) == FALSE)
2625 {
2626 for (UINT_32 i = 0; i < pipeXorBits; i++)
2627 {
2628 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2629 }
2630
2631 for (UINT_32 i = 0; i < bankXorBits; i++)
2632 {
2633 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2634 }
2635 }
2636 }
2637
2638 pEquation->numBits = blockSizeLog2;
2639 }
2640
2641 return ret;
2642 }
2643
2644 /**
2645 ************************************************************************************************************************
2646 * Gfx9Lib::HwlComputeThickEquation
2647 *
2648 * @brief
2649 * Interface function stub of ComputeThickEquation
2650 *
2651 * @return
2652 * ADDR_E_RETURNCODE
2653 ************************************************************************************************************************
2654 */
2655 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2656 AddrResourceType rsrcType,
2657 AddrSwizzleMode swMode,
2658 UINT_32 elementBytesLog2,
2659 ADDR_EQUATION* pEquation) const
2660 {
2661 ADDR_E_RETURNCODE ret = ADDR_OK;
2662
2663 ADDR_ASSERT(IsTex3d(rsrcType));
2664
2665 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2666
2667 UINT_32 maxXorBits = blockSizeLog2;
2668 if (IsNonPrtXor(swMode))
2669 {
2670 // For non-prt-xor, maybe need to initialize some more bits for xor
2671 // The highest xor bit used in equation will be max the following 3:
2672 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2673 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2674 // 3. blockSizeLog2
2675
2676 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2677 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2678 GetPipeXorBits(blockSizeLog2) +
2679 3 * GetBankXorBits(blockSizeLog2));
2680 }
2681
2682 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2683 {
2684 InitChannel(1, 0 , i, &pEquation->addr[i]);
2685 }
2686
2687 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2688
2689 const UINT_32 maxBitsUsed = 12;
2690 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2691 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2692 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2693 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2694
2695 const UINT_32 extraXorBits = 24;
2696 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2697 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2698
2699 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2700 {
2701 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2702 InitChannel(1, 1, i, &y[i]);
2703 InitChannel(1, 2, i, &z[i]);
2704 }
2705
2706 if (IsZOrderSwizzle(swMode))
2707 {
2708 switch (elementBytesLog2)
2709 {
2710 case 0:
2711 pixelBit[0] = x[0];
2712 pixelBit[1] = y[0];
2713 pixelBit[2] = x[1];
2714 pixelBit[3] = y[1];
2715 pixelBit[4] = z[0];
2716 pixelBit[5] = z[1];
2717 pixelBit[6] = x[2];
2718 pixelBit[7] = z[2];
2719 pixelBit[8] = y[2];
2720 pixelBit[9] = x[3];
2721 break;
2722 case 1:
2723 pixelBit[0] = x[0];
2724 pixelBit[1] = y[0];
2725 pixelBit[2] = x[1];
2726 pixelBit[3] = y[1];
2727 pixelBit[4] = z[0];
2728 pixelBit[5] = z[1];
2729 pixelBit[6] = z[2];
2730 pixelBit[7] = y[2];
2731 pixelBit[8] = x[2];
2732 break;
2733 case 2:
2734 pixelBit[0] = x[0];
2735 pixelBit[1] = y[0];
2736 pixelBit[2] = x[1];
2737 pixelBit[3] = z[0];
2738 pixelBit[4] = y[1];
2739 pixelBit[5] = z[1];
2740 pixelBit[6] = y[2];
2741 pixelBit[7] = x[2];
2742 break;
2743 case 3:
2744 pixelBit[0] = x[0];
2745 pixelBit[1] = y[0];
2746 pixelBit[2] = z[0];
2747 pixelBit[3] = x[1];
2748 pixelBit[4] = z[1];
2749 pixelBit[5] = y[1];
2750 pixelBit[6] = x[2];
2751 break;
2752 case 4:
2753 pixelBit[0] = x[0];
2754 pixelBit[1] = y[0];
2755 pixelBit[2] = z[0];
2756 pixelBit[3] = z[1];
2757 pixelBit[4] = y[1];
2758 pixelBit[5] = x[1];
2759 break;
2760 default:
2761 ADDR_ASSERT_ALWAYS();
2762 ret = ADDR_INVALIDPARAMS;
2763 break;
2764 }
2765 }
2766 else if (IsStandardSwizzle(rsrcType, swMode))
2767 {
2768 switch (elementBytesLog2)
2769 {
2770 case 0:
2771 pixelBit[0] = x[0];
2772 pixelBit[1] = x[1];
2773 pixelBit[2] = x[2];
2774 pixelBit[3] = x[3];
2775 pixelBit[4] = y[0];
2776 pixelBit[5] = y[1];
2777 pixelBit[6] = z[0];
2778 pixelBit[7] = z[1];
2779 pixelBit[8] = z[2];
2780 pixelBit[9] = y[2];
2781 break;
2782 case 1:
2783 pixelBit[0] = x[0];
2784 pixelBit[1] = x[1];
2785 pixelBit[2] = x[2];
2786 pixelBit[3] = y[0];
2787 pixelBit[4] = y[1];
2788 pixelBit[5] = z[0];
2789 pixelBit[6] = z[1];
2790 pixelBit[7] = z[2];
2791 pixelBit[8] = y[2];
2792 break;
2793 case 2:
2794 pixelBit[0] = x[0];
2795 pixelBit[1] = x[1];
2796 pixelBit[2] = y[0];
2797 pixelBit[3] = y[1];
2798 pixelBit[4] = z[0];
2799 pixelBit[5] = z[1];
2800 pixelBit[6] = y[2];
2801 pixelBit[7] = x[2];
2802 break;
2803 case 3:
2804 pixelBit[0] = x[0];
2805 pixelBit[1] = y[0];
2806 pixelBit[2] = y[1];
2807 pixelBit[3] = z[0];
2808 pixelBit[4] = z[1];
2809 pixelBit[5] = x[1];
2810 pixelBit[6] = x[2];
2811 break;
2812 case 4:
2813 pixelBit[0] = y[0];
2814 pixelBit[1] = y[1];
2815 pixelBit[2] = z[0];
2816 pixelBit[3] = z[1];
2817 pixelBit[4] = x[0];
2818 pixelBit[5] = x[1];
2819 break;
2820 default:
2821 ADDR_ASSERT_ALWAYS();
2822 ret = ADDR_INVALIDPARAMS;
2823 break;
2824 }
2825 }
2826 else
2827 {
2828 ADDR_ASSERT_ALWAYS();
2829 ret = ADDR_INVALIDPARAMS;
2830 }
2831
2832 if (ret == ADDR_OK)
2833 {
2834 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2835 UINT_32 xIdx = Log2(microBlockDim.w);
2836 UINT_32 yIdx = Log2(microBlockDim.h);
2837 UINT_32 zIdx = Log2(microBlockDim.d);
2838
2839 pixelBit = pEquation->addr;
2840
2841 const UINT_32 lowBits = 10;
2842 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2843 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2844
2845 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2846 {
2847 if ((i % 3) == 0)
2848 {
2849 pixelBit[i] = x[xIdx++];
2850 }
2851 else if ((i % 3) == 1)
2852 {
2853 pixelBit[i] = z[zIdx++];
2854 }
2855 else
2856 {
2857 pixelBit[i] = y[yIdx++];
2858 }
2859 }
2860
2861 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2862 {
2863 if ((i % 3) == 0)
2864 {
2865 xorExtra[i - blockSizeLog2] = x[xIdx++];
2866 }
2867 else if ((i % 3) == 1)
2868 {
2869 xorExtra[i - blockSizeLog2] = z[zIdx++];
2870 }
2871 else
2872 {
2873 xorExtra[i - blockSizeLog2] = y[yIdx++];
2874 }
2875 }
2876
2877 if (IsXor(swMode))
2878 {
2879 // Fill XOR bits
2880 UINT_32 pipeStart = m_pipeInterleaveLog2;
2881 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2882 for (UINT_32 i = 0; i < pipeXorBits; i++)
2883 {
2884 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2885 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2886 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2887
2888 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2889
2890 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2891 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2892 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2893
2894 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2895 }
2896
2897 UINT_32 bankStart = pipeStart + pipeXorBits;
2898 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2899 for (UINT_32 i = 0; i < bankXorBits; i++)
2900 {
2901 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2902 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2903 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2904
2905 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2906
2907 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2908 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2909 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2910
2911 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2912 }
2913 }
2914
2915 pEquation->numBits = blockSizeLog2;
2916 }
2917
2918 return ret;
2919 }
2920
2921 /**
2922 ************************************************************************************************************************
2923 * Gfx9Lib::IsValidDisplaySwizzleMode
2924 *
2925 * @brief
2926 * Check if a swizzle mode is supported by display engine
2927 *
2928 * @return
2929 * TRUE is swizzle mode is supported by display engine
2930 ************************************************************************************************************************
2931 */
2932 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2933 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2934 {
2935 BOOL_32 support = FALSE;
2936
2937 const AddrResourceType resourceType = pIn->resourceType;
2938 (void)resourceType;
2939 const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
2940
2941 if (m_settings.isDce12)
2942 {
2943 switch (swizzleMode)
2944 {
2945 case ADDR_SW_256B_D:
2946 case ADDR_SW_256B_R:
2947 support = (pIn->bpp == 32);
2948 break;
2949
2950 case ADDR_SW_LINEAR:
2951 case ADDR_SW_4KB_D:
2952 case ADDR_SW_4KB_R:
2953 case ADDR_SW_64KB_D:
2954 case ADDR_SW_64KB_R:
2955 case ADDR_SW_VAR_D:
2956 case ADDR_SW_VAR_R:
2957 case ADDR_SW_4KB_D_X:
2958 case ADDR_SW_4KB_R_X:
2959 case ADDR_SW_64KB_D_X:
2960 case ADDR_SW_64KB_R_X:
2961 case ADDR_SW_VAR_D_X:
2962 case ADDR_SW_VAR_R_X:
2963 support = (pIn->bpp <= 64);
2964 break;
2965
2966 default:
2967 break;
2968 }
2969 }
2970 else if (m_settings.isDcn1)
2971 {
2972 switch (swizzleMode)
2973 {
2974 case ADDR_SW_4KB_D:
2975 case ADDR_SW_64KB_D:
2976 case ADDR_SW_VAR_D:
2977 case ADDR_SW_64KB_D_T:
2978 case ADDR_SW_4KB_D_X:
2979 case ADDR_SW_64KB_D_X:
2980 case ADDR_SW_VAR_D_X:
2981 support = (pIn->bpp == 64);
2982 break;
2983
2984 case ADDR_SW_LINEAR:
2985 case ADDR_SW_4KB_S:
2986 case ADDR_SW_64KB_S:
2987 case ADDR_SW_VAR_S:
2988 case ADDR_SW_64KB_S_T:
2989 case ADDR_SW_4KB_S_X:
2990 case ADDR_SW_64KB_S_X:
2991 case ADDR_SW_VAR_S_X:
2992 support = (pIn->bpp <= 64);
2993 break;
2994
2995 default:
2996 break;
2997 }
2998 }
2999 else
3000 {
3001 ADDR_NOT_IMPLEMENTED();
3002 }
3003
3004 return support;
3005 }
3006
3007 /**
3008 ************************************************************************************************************************
3009 * Gfx9Lib::HwlComputePipeBankXor
3010 *
3011 * @brief
3012 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3013 *
3014 * @return
3015 * PipeBankXor value
3016 ************************************************************************************************************************
3017 */
3018 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3019 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3020 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
3021 {
3022 if (IsXor(pIn->swizzleMode))
3023 {
3024 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3025 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3026 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3027
3028 UINT_32 pipeXor = 0;
3029 UINT_32 bankXor = 0;
3030
3031 const UINT_32 bankMask = (1 << bankBits) - 1;
3032 const UINT_32 index = pIn->surfIndex & bankMask;
3033
3034 const UINT_32 bpp = pIn->flags.fmask ?
3035 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3036 if (bankBits == 4)
3037 {
3038 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3039 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3040
3041 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3042 }
3043 else if (bankBits > 0)
3044 {
3045 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3046 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3047 bankXor = (index * bankIncrease) & bankMask;
3048 }
3049
3050 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3051 }
3052 else
3053 {
3054 pOut->pipeBankXor = 0;
3055 }
3056
3057 return ADDR_OK;
3058 }
3059
3060 /**
3061 ************************************************************************************************************************
3062 * Gfx9Lib::HwlComputeSlicePipeBankXor
3063 *
3064 * @brief
3065 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3066 *
3067 * @return
3068 * PipeBankXor value
3069 ************************************************************************************************************************
3070 */
3071 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3072 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3073 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3074 {
3075 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3076 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3077 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3078
3079 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3080 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3081
3082 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3083
3084 return ADDR_OK;
3085 }
3086
3087 /**
3088 ************************************************************************************************************************
3089 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3090 *
3091 * @brief
3092 * Compute sub resource offset to support swizzle pattern
3093 *
3094 * @return
3095 * Offset
3096 ************************************************************************************************************************
3097 */
3098 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3099 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3100 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3101 {
3102 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3103
3104 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3105 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3106 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3107 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3108 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3109 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3110
3111 pOut->offset = pIn->slice * pIn->sliceSize +
3112 pIn->macroBlockOffset +
3113 (pIn->mipTailOffset ^ pipeBankXor) -
3114 static_cast<UINT_64>(pipeBankXor);
3115 return ADDR_OK;
3116 }
3117
3118 /**
3119 ************************************************************************************************************************
3120 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3121 *
3122 * @brief
3123 * Compute surface info sanity check
3124 *
3125 * @return
3126 * Offset
3127 ************************************************************************************************************************
3128 */
3129 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3130 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3131 {
3132 BOOL_32 invalid = FALSE;
3133
3134 if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3135 {
3136 invalid = TRUE;
3137 }
3138 else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) ||
3139 (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
3140 {
3141 invalid = TRUE;
3142 }
3143
3144 BOOL_32 mipmap = (pIn->numMipLevels > 1);
3145 BOOL_32 msaa = (pIn->numFrags > 1);
3146
3147 ADDR2_SURFACE_FLAGS flags = pIn->flags;
3148 BOOL_32 zbuffer = (flags.depth || flags.stencil);
3149 BOOL_32 color = flags.color;
3150 BOOL_32 display = flags.display || flags.rotated;
3151
3152 AddrResourceType rsrcType = pIn->resourceType;
3153 BOOL_32 tex3d = IsTex3d(rsrcType);
3154 BOOL_32 thin3d = tex3d && flags.view3dAs2dArray;
3155 AddrSwizzleMode swizzle = pIn->swizzleMode;
3156 BOOL_32 linear = IsLinear(swizzle);
3157 BOOL_32 blk256B = IsBlock256b(swizzle);
3158 BOOL_32 blkVar = IsBlockVariable(swizzle);
3159 BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3160 BOOL_32 prt = flags.prt;
3161 BOOL_32 stereo = flags.qbStereo;
3162
3163 if (invalid == FALSE)
3164 {
3165 if ((pIn->numFrags > 1) &&
3166 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3167 {
3168 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3169 invalid = TRUE;
3170 }
3171 }
3172
3173 if (invalid == FALSE)
3174 {
3175 switch (rsrcType)
3176 {
3177 case ADDR_RSRC_TEX_1D:
3178 invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
3179 break;
3180 case ADDR_RSRC_TEX_2D:
3181 invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
3182 break;
3183 case ADDR_RSRC_TEX_3D:
3184 invalid = msaa || zbuffer || display || stereo;
3185 break;
3186 default:
3187 invalid = TRUE;
3188 break;
3189 }
3190 }
3191
3192 if (invalid == FALSE)
3193 {
3194 if (display)
3195 {
3196 invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
3197 }
3198 }
3199
3200 if (invalid == FALSE)
3201 {
3202 if (linear)
3203 {
3204 invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
3205 zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
3206 }
3207 else
3208 {
3209 if (blk256B || blkVar || isNonPrtXor)
3210 {
3211 invalid = prt;
3212 if (blk256B)
3213 {
3214 invalid = invalid || zbuffer || tex3d || mipmap || msaa;
3215 }
3216 }
3217
3218 if (invalid == FALSE)
3219 {
3220 if (IsZOrderSwizzle(swizzle))
3221 {
3222 invalid = (color && msaa) || thin3d;
3223 }
3224 else if (IsStandardSwizzle(swizzle))
3225 {
3226 invalid = zbuffer || thin3d;
3227 }
3228 else if (IsDisplaySwizzle(swizzle))
3229 {
3230 invalid = zbuffer || (prt && (ADDR_RSRC_TEX_3D == rsrcType));
3231 }
3232 else if (IsRotateSwizzle(swizzle))
3233 {
3234 invalid = zbuffer || (pIn->bpp > 64) || tex3d;
3235 }
3236 else
3237 {
3238 ADDR_ASSERT(!"invalid swizzle mode");
3239 invalid = TRUE;
3240 }
3241 }
3242 }
3243 }
3244
3245 ADDR_ASSERT(invalid == FALSE);
3246
3247 return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
3248 }
3249
3250 /**
3251 ************************************************************************************************************************
3252 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3253 *
3254 * @brief
3255 * Internal function to get suggested surface information for cliet to use
3256 *
3257 * @return
3258 * ADDR_E_RETURNCODE
3259 ************************************************************************************************************************
3260 */
3261 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3262 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3263 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3264 {
3265 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3266 ElemLib* pElemLib = GetElemLib();
3267
3268 UINT_32 bpp = pIn->bpp;
3269 UINT_32 width = pIn->width;
3270 UINT_32 height = pIn->height;
3271 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3272 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3273
3274 if (pIn->flags.fmask)
3275 {
3276 bpp = GetFmaskBpp(numSamples, numFrags);
3277 numFrags = 1;
3278 numSamples = 1;
3279 pOut->resourceType = ADDR_RSRC_TEX_2D;
3280 }
3281 else
3282 {
3283 // Set format to INVALID will skip this conversion
3284 if (pIn->format != ADDR_FMT_INVALID)
3285 {
3286 UINT_32 expandX, expandY;
3287
3288 // Don't care for this case
3289 ElemMode elemMode = ADDR_UNCOMPRESSED;
3290
3291 // Get compression/expansion factors and element mode which indicates compression/expansion
3292 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3293 &elemMode,
3294 &expandX,
3295 &expandY);
3296
3297 UINT_32 basePitch = 0;
3298 GetElemLib()->AdjustSurfaceInfo(elemMode,
3299 expandX,
3300 expandY,
3301 &bpp,
3302 &basePitch,
3303 &width,
3304 &height);
3305 }
3306
3307 // The output may get changed for volume(3D) texture resource in future
3308 pOut->resourceType = pIn->resourceType;
3309 }
3310
3311 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3312 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3313 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
3314 const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated;
3315
3316 // Forbid swizzle mode(s) by client setting, for simplicity we never allow VAR swizzle mode for GFX9
3317 ADDR2_SWMODE_SET allowedSwModeSet = {};
3318 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3319 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
3320 allowedSwModeSet.value |= pIn->forbiddenBlock.macro4KB ? 0 : Gfx9Blk4KBSwModeMask;
3321 allowedSwModeSet.value |= pIn->forbiddenBlock.macro64KB ? 0 : Gfx9Blk64KBSwModeMask;
3322
3323 if (pIn->preferredSwSet.value != 0)
3324 {
3325 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3326 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3327 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3328 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3329 }
3330
3331 if (pIn->noXor)
3332 {
3333 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3334 }
3335
3336 if (pIn->maxAlign > 0)
3337 {
3338 if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3339 {
3340 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3341 }
3342
3343 if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3344 {
3345 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3346 }
3347
3348 if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3349 {
3350 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3351 }
3352 }
3353
3354 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3355 switch (pOut->resourceType)
3356 {
3357 case ADDR_RSRC_TEX_1D:
3358 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3359 break;
3360
3361 case ADDR_RSRC_TEX_2D:
3362 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3363
3364 if (bpp > 64)
3365 {
3366 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3367 }
3368 break;
3369
3370 case ADDR_RSRC_TEX_3D:
3371 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3372
3373 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3374 {
3375 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3376 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3377 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3378 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3379 }
3380
3381 if ((bpp == 128) && pIn->flags.color)
3382 {
3383 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3384 }
3385
3386 if (pIn->flags.view3dAs2dArray)
3387 {
3388 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3389 }
3390 break;
3391
3392 default:
3393 ADDR_ASSERT_ALWAYS();
3394 allowedSwModeSet.value = 0;
3395 break;
3396 }
3397
3398 if (pIn->format == ADDR_FMT_32_32_32)
3399 {
3400 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3401 }
3402
3403 if (ElemLib::IsBlockCompressed(pIn->format))
3404 {
3405 if (pIn->flags.texture)
3406 {
3407 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3408 }
3409 else
3410 {
3411 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3412 }
3413 }
3414
3415 if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3416 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3417 {
3418 allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3419 }
3420
3421 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3422 {
3423 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3424
3425 if (pIn->flags.noMetadata == FALSE)
3426 {
3427 if (pIn->flags.depth &&
3428 pIn->flags.texture &&
3429 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3430 {
3431 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3432 // equation from wrong address within memory range a tile covered and use the
3433 // garbage data for compressed Z reading which finally leads to corruption.
3434 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3435 }
3436
3437 if (m_settings.htileCacheRbConflict &&
3438 (pIn->flags.depth || pIn->flags.stencil) &&
3439 (numSlices > 1) &&
3440 (pIn->flags.metaRbUnaligned == FALSE) &&
3441 (pIn->flags.metaPipeUnaligned == FALSE))
3442 {
3443 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3444 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3445 }
3446 }
3447 }
3448
3449 if (msaa)
3450 {
3451 allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3452 }
3453
3454 if ((numFrags > 1) &&
3455 (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3456 {
3457 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3458 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3459 }
3460
3461 if (numMipLevels > 1)
3462 {
3463 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3464 }
3465
3466 if (displayRsrc)
3467 {
3468 if (m_settings.isDce12)
3469 {
3470 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3471 }
3472 else if (m_settings.isDcn1)
3473 {
3474 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3475 }
3476 else
3477 {
3478 ADDR_NOT_IMPLEMENTED();
3479 }
3480 }
3481
3482 if (allowedSwModeSet.value != 0)
3483 {
3484 #if DEBUG
3485 // Post sanity check, at least AddrLib should accept the output generated by its own
3486 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3487 localIn.flags = pIn->flags;
3488 localIn.resourceType = pOut->resourceType;
3489 localIn.format = pIn->format;
3490 localIn.bpp = bpp;
3491 localIn.width = width;
3492 localIn.height = height;
3493 localIn.numSlices = numSlices;
3494 localIn.numMipLevels = numMipLevels;
3495 localIn.numSamples = numSamples;
3496 localIn.numFrags = numFrags;
3497
3498 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3499 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3500 {
3501 if (validateSwModeSet & 1)
3502 {
3503 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3504 HwlComputeSurfaceInfoSanityCheck(&localIn);
3505 }
3506
3507 validateSwModeSet >>= 1;
3508 }
3509 #endif
3510
3511 pOut->validSwModeSet = allowedSwModeSet;
3512 pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3513 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet);
3514 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3515
3516 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3517
3518 if (pOut->clientPreferredSwSet.value == 0)
3519 {
3520 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3521 }
3522
3523 if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3524 {
3525 pOut->swizzleMode = ADDR_SW_LINEAR;
3526 }
3527 else
3528 {
3529 // Always ignore linear swizzle mode if there is other choice.
3530 allowedSwModeSet.swLinear = 0;
3531
3532 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet);
3533
3534 // Determine block size if there is 2 or more block type candidates
3535 if (IsPow2(allowedBlockSet.value) == FALSE)
3536 {
3537 const AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_256B, ADDR_SW_4KB, ADDR_SW_64KB};
3538 Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3539 Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3540 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3541
3542 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3543 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3544 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3545 UINT_32 minSizeBlk = AddrBlockMicro;
3546 UINT_64 minSize = 0;
3547
3548 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3549 {
3550 if (allowedBlockSet.value & (1 << i))
3551 {
3552 ComputeBlockDimensionForSurf(&blkDim[i].w,
3553 &blkDim[i].h,
3554 &blkDim[i].d,
3555 bpp,
3556 numFrags,
3557 pOut->resourceType,
3558 swMode[i]);
3559
3560 if (displayRsrc)
3561 {
3562 blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3563 }
3564
3565 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3566 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
3567
3568 if ((minSize == 0) ||
3569 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3570 {
3571 minSize = padSize[i];
3572 minSizeBlk = i;
3573 }
3574 }
3575 }
3576
3577 if ((allowedBlockSet.micro == TRUE) &&
3578 (width <= blkDim[AddrBlockMicro].w) &&
3579 (height <= blkDim[AddrBlockMicro].h) &&
3580 (NextPow2(pIn->minSizeAlign) <= GetBlockSize(ADDR_SW_256B)))
3581 {
3582 minSizeBlk = AddrBlockMicro;
3583 }
3584
3585 if (minSizeBlk == AddrBlockMicro)
3586 {
3587 allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3588 }
3589 else if (minSizeBlk == AddrBlock4KB)
3590 {
3591 allowedSwModeSet.value &= Gfx9Blk4KBSwModeMask;
3592 }
3593 else
3594 {
3595 ADDR_ASSERT(minSizeBlk == AddrBlock64KB);
3596 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3597 }
3598 }
3599
3600 // Block type should be determined.
3601 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet).value));
3602
3603 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3604
3605 // Determine swizzle type if there is 2 or more swizzle type candidates
3606 if (IsPow2(allowedSwSet.value) == FALSE)
3607 {
3608 if (ElemLib::IsBlockCompressed(pIn->format))
3609 {
3610 if (allowedSwSet.sw_D)
3611 {
3612 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3613 }
3614 else
3615 {
3616 ADDR_ASSERT(allowedSwSet.sw_S);
3617 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3618 }
3619 }
3620 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3621 {
3622 if (allowedSwSet.sw_S)
3623 {
3624 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3625 }
3626 else if (allowedSwSet.sw_D)
3627 {
3628 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3629 }
3630 else
3631 {
3632 ADDR_ASSERT(allowedSwSet.sw_R);
3633 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3634 }
3635 }
3636 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3637 {
3638 if (pIn->flags.color && allowedSwSet.sw_D)
3639 {
3640 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3641 }
3642 else if (allowedSwSet.sw_Z)
3643 {
3644 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3645 }
3646 else
3647 {
3648 ADDR_ASSERT(allowedSwSet.sw_S);
3649 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3650 }
3651 }
3652 else
3653 {
3654 if (pIn->flags.rotated && allowedSwSet.sw_R)
3655 {
3656 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3657 }
3658 else if (displayRsrc && allowedSwSet.sw_D)
3659 {
3660 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3661 }
3662 else if (allowedSwSet.sw_S)
3663 {
3664 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3665 }
3666 else
3667 {
3668 ADDR_ASSERT(allowedSwSet.sw_Z);
3669 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3670 }
3671 }
3672 }
3673
3674 // Swizzle type should be determined.
3675 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3676
3677 // Determine swizzle mode now - always select the "largest" swizzle mode for a given block type +
3678 // swizzle type combination. For example, for AddrBlock64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3679 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3680 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3681 }
3682 }
3683 else
3684 {
3685 // Invalid combination...
3686 ADDR_ASSERT_ALWAYS();
3687 returnCode = ADDR_INVALIDPARAMS;
3688 }
3689
3690 return returnCode;
3691 }
3692
3693 /**
3694 ************************************************************************************************************************
3695 * Gfx9Lib::ComputeStereoInfo
3696 *
3697 * @brief
3698 * Compute height alignment and right eye pipeBankXor for stereo surface
3699 *
3700 * @return
3701 * Error code
3702 *
3703 ************************************************************************************************************************
3704 */
3705 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3706 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3707 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
3708 UINT_32* pHeightAlign
3709 ) const
3710 {
3711 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3712
3713 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3714
3715 if (eqIndex < m_numEquations)
3716 {
3717 if (IsXor(pIn->swizzleMode))
3718 {
3719 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3720 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
3721 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
3722 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
3723 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3724 MAYBE_UNUSED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3725
3726 ADDR_ASSERT(maxYCoordBlock256 ==
3727 GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
3728
3729 const UINT_32 maxYCoordInBaseEquation =
3730 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
3731
3732 ADDR_ASSERT(maxYCoordInBaseEquation ==
3733 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3734
3735 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3736
3737 ADDR_ASSERT(maxYCoordInPipeXor ==
3738 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3739
3740 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3741 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3742
3743 ADDR_ASSERT(maxYCoordInBankXor ==
3744 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3745
3746 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3747
3748 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3749 {
3750 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3751
3752 if (pOut->pStereoInfo != NULL)
3753 {
3754 pOut->pStereoInfo->rightSwizzle = 0;
3755
3756 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3757 {
3758 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3759 {
3760 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3761 }
3762
3763 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3764 {
3765 pOut->pStereoInfo->rightSwizzle |=
3766 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3767 }
3768
3769 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3770 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3771 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3772 }
3773 }
3774 }
3775 }
3776 }
3777 else
3778 {
3779 ADDR_ASSERT_ALWAYS();
3780 returnCode = ADDR_ERROR;
3781 }
3782
3783 return returnCode;
3784 }
3785
3786 /**
3787 ************************************************************************************************************************
3788 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3789 *
3790 * @brief
3791 * Internal function to calculate alignment for tiled surface
3792 *
3793 * @return
3794 * ADDR_E_RETURNCODE
3795 ************************************************************************************************************************
3796 */
3797 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3798 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3799 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3800 ) const
3801 {
3802 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3803 &pOut->blockHeight,
3804 &pOut->blockSlices,
3805 pIn->bpp,
3806 pIn->numFrags,
3807 pIn->resourceType,
3808 pIn->swizzleMode);
3809
3810 if (returnCode == ADDR_OK)
3811 {
3812 UINT_32 pitchAlignInElement = pOut->blockWidth;
3813
3814 if ((IsTex2d(pIn->resourceType) == TRUE) &&
3815 (pIn->flags.display || pIn->flags.rotated) &&
3816 (pIn->numMipLevels <= 1) &&
3817 (pIn->numSamples <= 1) &&
3818 (pIn->numFrags <= 1))
3819 {
3820 // Display engine needs pitch align to be at least 32 pixels.
3821 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3822 }
3823
3824 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3825
3826 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3827 {
3828 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3829 {
3830 returnCode = ADDR_INVALIDPARAMS;
3831 }
3832 else if (pIn->pitchInElement < pOut->pitch)
3833 {
3834 returnCode = ADDR_INVALIDPARAMS;
3835 }
3836 else
3837 {
3838 pOut->pitch = pIn->pitchInElement;
3839 }
3840 }
3841
3842 UINT_32 heightAlign = 0;
3843
3844 if (pIn->flags.qbStereo)
3845 {
3846 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3847 }
3848
3849 if (returnCode == ADDR_OK)
3850 {
3851 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3852
3853 if (heightAlign > 1)
3854 {
3855 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3856 }
3857
3858 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3859
3860 pOut->epitchIsHeight = FALSE;
3861 pOut->mipChainInTail = FALSE;
3862 pOut->firstMipIdInTail = pIn->numMipLevels;
3863
3864 pOut->mipChainPitch = pOut->pitch;
3865 pOut->mipChainHeight = pOut->height;
3866 pOut->mipChainSlice = pOut->numSlices;
3867
3868 if (pIn->numMipLevels > 1)
3869 {
3870 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
3871 pIn->swizzleMode,
3872 pIn->bpp,
3873 pIn->width,
3874 pIn->height,
3875 pIn->numSlices,
3876 pOut->blockWidth,
3877 pOut->blockHeight,
3878 pOut->blockSlices,
3879 pIn->numMipLevels,
3880 pOut->pMipInfo);
3881
3882 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
3883
3884 if (endingMipId == 0)
3885 {
3886 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3887 pIn->swizzleMode,
3888 pOut->blockWidth,
3889 pOut->blockHeight,
3890 pOut->blockSlices);
3891
3892 pOut->epitchIsHeight = TRUE;
3893 pOut->pitch = tailMaxDim.w;
3894 pOut->height = tailMaxDim.h;
3895 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
3896 tailMaxDim.d : pIn->numSlices;
3897 pOut->mipChainInTail = TRUE;
3898 }
3899 else
3900 {
3901 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
3902 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
3903
3904 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
3905 pIn->swizzleMode,
3906 mip0WidthInBlk,
3907 mip0HeightInBlk,
3908 pOut->numSlices / pOut->blockSlices);
3909 if (majorMode == ADDR_MAJOR_Y)
3910 {
3911 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
3912
3913 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
3914 {
3915 mip1WidthInBlk++;
3916 }
3917
3918 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
3919
3920 pOut->epitchIsHeight = FALSE;
3921 }
3922 else
3923 {
3924 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
3925
3926 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
3927 {
3928 mip1HeightInBlk++;
3929 }
3930
3931 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
3932
3933 pOut->epitchIsHeight = TRUE;
3934 }
3935 }
3936
3937 if (pOut->pMipInfo != NULL)
3938 {
3939 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
3940
3941 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3942 {
3943 Dim3d mipStartPos = {0};
3944 UINT_32 mipTailOffsetInBytes = 0;
3945
3946 mipStartPos = GetMipStartPos(pIn->resourceType,
3947 pIn->swizzleMode,
3948 pOut->pitch,
3949 pOut->height,
3950 pOut->numSlices,
3951 pOut->blockWidth,
3952 pOut->blockHeight,
3953 pOut->blockSlices,
3954 i,
3955 elementBytesLog2,
3956 &mipTailOffsetInBytes);
3957
3958 UINT_32 pitchInBlock =
3959 pOut->mipChainPitch / pOut->blockWidth;
3960 UINT_32 sliceInBlock =
3961 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
3962 UINT_64 blockIndex =
3963 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
3964 UINT_64 macroBlockOffset =
3965 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
3966
3967 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
3968 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
3969 }
3970 }
3971 }
3972 else if (pOut->pMipInfo != NULL)
3973 {
3974 pOut->pMipInfo[0].pitch = pOut->pitch;
3975 pOut->pMipInfo[0].height = pOut->height;
3976 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3977 pOut->pMipInfo[0].offset = 0;
3978 }
3979
3980 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
3981 (pIn->bpp >> 3) * pIn->numFrags;
3982 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
3983 pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
3984
3985 if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
3986 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
3987 (pIn->flags.texture == TRUE) &&
3988 (pIn->flags.noMetadata == FALSE) &&
3989 (pIn->flags.metaPipeUnaligned == FALSE))
3990 {
3991 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
3992 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
3993 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
3994 // them, which may cause invalid metadata to be fetched.
3995 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes);
3996 }
3997
3998 if (pIn->flags.prt)
3999 {
4000 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4001 }
4002 }
4003 }
4004
4005 return returnCode;
4006 }
4007
4008 /**
4009 ************************************************************************************************************************
4010 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4011 *
4012 * @brief
4013 * Internal function to calculate alignment for linear surface
4014 *
4015 * @return
4016 * ADDR_E_RETURNCODE
4017 ************************************************************************************************************************
4018 */
4019 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4020 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4021 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4022 ) const
4023 {
4024 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4025 UINT_32 pitch = 0;
4026 UINT_32 actualHeight = 0;
4027 UINT_32 elementBytes = pIn->bpp >> 3;
4028 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4029
4030 if (IsTex1d(pIn->resourceType))
4031 {
4032 if (pIn->height > 1)
4033 {
4034 returnCode = ADDR_INVALIDPARAMS;
4035 }
4036 else
4037 {
4038 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4039
4040 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4041 actualHeight = pIn->numMipLevels;
4042
4043 if (pIn->flags.prt == FALSE)
4044 {
4045 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4046 &pitch, &actualHeight);
4047 }
4048
4049 if (returnCode == ADDR_OK)
4050 {
4051 if (pOut->pMipInfo != NULL)
4052 {
4053 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4054 {
4055 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4056 pOut->pMipInfo[i].pitch = pitch;
4057 pOut->pMipInfo[i].height = 1;
4058 pOut->pMipInfo[i].depth = 1;
4059 }
4060 }
4061 }
4062 }
4063 }
4064 else
4065 {
4066 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4067 }
4068
4069 if ((pitch == 0) || (actualHeight == 0))
4070 {
4071 returnCode = ADDR_INVALIDPARAMS;
4072 }
4073
4074 if (returnCode == ADDR_OK)
4075 {
4076 pOut->pitch = pitch;
4077 pOut->height = pIn->height;
4078 pOut->numSlices = pIn->numSlices;
4079 pOut->mipChainPitch = pitch;
4080 pOut->mipChainHeight = actualHeight;
4081 pOut->mipChainSlice = pOut->numSlices;
4082 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4083 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4084 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4085 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4086 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4087 pOut->blockHeight = 1;
4088 pOut->blockSlices = 1;
4089 }
4090
4091 // Post calculation validate
4092 ADDR_ASSERT(pOut->sliceSize > 0);
4093
4094 return returnCode;
4095 }
4096
4097 /**
4098 ************************************************************************************************************************
4099 * Gfx9Lib::GetMipChainInfo
4100 *
4101 * @brief
4102 * Internal function to get out information about mip chain
4103 *
4104 * @return
4105 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4106 ************************************************************************************************************************
4107 */
4108 UINT_32 Gfx9Lib::GetMipChainInfo(
4109 AddrResourceType resourceType,
4110 AddrSwizzleMode swizzleMode,
4111 UINT_32 bpp,
4112 UINT_32 mip0Width,
4113 UINT_32 mip0Height,
4114 UINT_32 mip0Depth,
4115 UINT_32 blockWidth,
4116 UINT_32 blockHeight,
4117 UINT_32 blockDepth,
4118 UINT_32 numMipLevel,
4119 ADDR2_MIP_INFO* pMipInfo) const
4120 {
4121 const Dim3d tailMaxDim =
4122 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4123
4124 UINT_32 mipPitch = mip0Width;
4125 UINT_32 mipHeight = mip0Height;
4126 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4127 UINT_32 offset = 0;
4128 UINT_32 firstMipIdInTail = numMipLevel;
4129 BOOL_32 inTail = FALSE;
4130 BOOL_32 finalDim = FALSE;
4131 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4132 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4133
4134 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4135 {
4136 if (inTail)
4137 {
4138 if (finalDim == FALSE)
4139 {
4140 UINT_32 mipSize;
4141
4142 if (is3dThick)
4143 {
4144 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4145 }
4146 else
4147 {
4148 mipSize = mipPitch * mipHeight * (bpp >> 3);
4149 }
4150
4151 if (mipSize <= 256)
4152 {
4153 UINT_32 index = Log2(bpp >> 3);
4154
4155 if (is3dThick)
4156 {
4157 mipPitch = Block256_3dZ[index].w;
4158 mipHeight = Block256_3dZ[index].h;
4159 mipDepth = Block256_3dZ[index].d;
4160 }
4161 else
4162 {
4163 mipPitch = Block256_2d[index].w;
4164 mipHeight = Block256_2d[index].h;
4165 }
4166
4167 finalDim = TRUE;
4168 }
4169 }
4170 }
4171 else
4172 {
4173 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4174 mipPitch, mipHeight, mipDepth);
4175
4176 if (inTail)
4177 {
4178 firstMipIdInTail = mipId;
4179 mipPitch = tailMaxDim.w;
4180 mipHeight = tailMaxDim.h;
4181
4182 if (is3dThick)
4183 {
4184 mipDepth = tailMaxDim.d;
4185 }
4186 }
4187 else
4188 {
4189 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4190 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4191
4192 if (is3dThick)
4193 {
4194 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4195 }
4196 }
4197 }
4198
4199 if (pMipInfo != NULL)
4200 {
4201 pMipInfo[mipId].pitch = mipPitch;
4202 pMipInfo[mipId].height = mipHeight;
4203 pMipInfo[mipId].depth = mipDepth;
4204 pMipInfo[mipId].offset = offset;
4205 }
4206
4207 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4208
4209 if (finalDim)
4210 {
4211 if (is3dThin)
4212 {
4213 mipDepth = Max(mipDepth >> 1, 1u);
4214 }
4215 }
4216 else
4217 {
4218 mipPitch = Max(mipPitch >> 1, 1u);
4219 mipHeight = Max(mipHeight >> 1, 1u);
4220
4221 if (is3dThick || is3dThin)
4222 {
4223 mipDepth = Max(mipDepth >> 1, 1u);
4224 }
4225 }
4226 }
4227
4228 return firstMipIdInTail;
4229 }
4230
4231 /**
4232 ************************************************************************************************************************
4233 * Gfx9Lib::GetMetaMiptailInfo
4234 *
4235 * @brief
4236 * Get mip tail coordinate information.
4237 *
4238 * @return
4239 * N/A
4240 ************************************************************************************************************************
4241 */
4242 VOID Gfx9Lib::GetMetaMiptailInfo(
4243 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4244 Dim3d mipCoord, ///< [in] mip tail base coord
4245 UINT_32 numMipInTail, ///< [in] number of mips in tail
4246 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4247 ) const
4248 {
4249 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4250 UINT_32 mipWidth = pMetaBlkDim->w;
4251 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4252 UINT_32 mipDepth = pMetaBlkDim->d;
4253 UINT_32 minInc;
4254
4255 if (isThick)
4256 {
4257 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4258 }
4259 else if (pMetaBlkDim->h >= 1024)
4260 {
4261 minInc = 256;
4262 }
4263 else if (pMetaBlkDim->h == 512)
4264 {
4265 minInc = 128;
4266 }
4267 else
4268 {
4269 minInc = 64;
4270 }
4271
4272 UINT_32 blk32MipId = 0xFFFFFFFF;
4273
4274 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4275 {
4276 pInfo[mip].inMiptail = TRUE;
4277 pInfo[mip].startX = mipCoord.w;
4278 pInfo[mip].startY = mipCoord.h;
4279 pInfo[mip].startZ = mipCoord.d;
4280 pInfo[mip].width = mipWidth;
4281 pInfo[mip].height = mipHeight;
4282 pInfo[mip].depth = mipDepth;
4283
4284 if (mipWidth <= 32)
4285 {
4286 if (blk32MipId == 0xFFFFFFFF)
4287 {
4288 blk32MipId = mip;
4289 }
4290
4291 mipCoord.w = pInfo[blk32MipId].startX;
4292 mipCoord.h = pInfo[blk32MipId].startY;
4293 mipCoord.d = pInfo[blk32MipId].startZ;
4294
4295 switch (mip - blk32MipId)
4296 {
4297 case 0:
4298 mipCoord.w += 32; // 16x16
4299 break;
4300 case 1:
4301 mipCoord.h += 32; // 8x8
4302 break;
4303 case 2:
4304 mipCoord.h += 32; // 4x4
4305 mipCoord.w += 16;
4306 break;
4307 case 3:
4308 mipCoord.h += 32; // 2x2
4309 mipCoord.w += 32;
4310 break;
4311 case 4:
4312 mipCoord.h += 32; // 1x1
4313 mipCoord.w += 48;
4314 break;
4315 // The following are for BC/ASTC formats
4316 case 5:
4317 mipCoord.h += 48; // 1/2 x 1/2
4318 break;
4319 case 6:
4320 mipCoord.h += 48; // 1/4 x 1/4
4321 mipCoord.w += 16;
4322 break;
4323 case 7:
4324 mipCoord.h += 48; // 1/8 x 1/8
4325 mipCoord.w += 32;
4326 break;
4327 case 8:
4328 mipCoord.h += 48; // 1/16 x 1/16
4329 mipCoord.w += 48;
4330 break;
4331 default:
4332 ADDR_ASSERT_ALWAYS();
4333 break;
4334 }
4335
4336 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4337 mipHeight = mipWidth;
4338
4339 if (isThick)
4340 {
4341 mipDepth = mipWidth;
4342 }
4343 }
4344 else
4345 {
4346 if (mipWidth <= minInc)
4347 {
4348 // if we're below the minimal increment...
4349 if (isThick)
4350 {
4351 // For 3d, just go in z direction
4352 mipCoord.d += mipDepth;
4353 }
4354 else
4355 {
4356 // For 2d, first go across, then down
4357 if ((mipWidth * 2) == minInc)
4358 {
4359 // if we're 2 mips below, that's when we go back in x, and down in y
4360 mipCoord.w -= minInc;
4361 mipCoord.h += minInc;
4362 }
4363 else
4364 {
4365 // otherwise, just go across in x
4366 mipCoord.w += minInc;
4367 }
4368 }
4369 }
4370 else
4371 {
4372 // On even mip, go down, otherwise, go across
4373 if (mip & 1)
4374 {
4375 mipCoord.w += mipWidth;
4376 }
4377 else
4378 {
4379 mipCoord.h += mipHeight;
4380 }
4381 }
4382 // Divide the width by 2
4383 mipWidth >>= 1;
4384 // After the first mip in tail, the mip is always a square
4385 mipHeight = mipWidth;
4386 // ...or for 3d, a cube
4387 if (isThick)
4388 {
4389 mipDepth = mipWidth;
4390 }
4391 }
4392 }
4393 }
4394
4395 /**
4396 ************************************************************************************************************************
4397 * Gfx9Lib::GetMipStartPos
4398 *
4399 * @brief
4400 * Internal function to get out information about mip logical start position
4401 *
4402 * @return
4403 * logical start position in macro block width/heith/depth of one mip level within one slice
4404 ************************************************************************************************************************
4405 */
4406 Dim3d Gfx9Lib::GetMipStartPos(
4407 AddrResourceType resourceType,
4408 AddrSwizzleMode swizzleMode,
4409 UINT_32 width,
4410 UINT_32 height,
4411 UINT_32 depth,
4412 UINT_32 blockWidth,
4413 UINT_32 blockHeight,
4414 UINT_32 blockDepth,
4415 UINT_32 mipId,
4416 UINT_32 log2ElementBytes,
4417 UINT_32* pMipTailBytesOffset) const
4418 {
4419 Dim3d mipStartPos = {0};
4420 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4421
4422 // Report mip in tail if Mip0 is already in mip tail
4423 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4424 UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
4425 UINT_32 mipIndexInTail = mipId;
4426
4427 if (inMipTail == FALSE)
4428 {
4429 // Mip 0 dimension, unit in block
4430 UINT_32 mipWidthInBlk = width / blockWidth;
4431 UINT_32 mipHeightInBlk = height / blockHeight;
4432 UINT_32 mipDepthInBlk = depth / blockDepth;
4433 AddrMajorMode majorMode = GetMajorMode(resourceType,
4434 swizzleMode,
4435 mipWidthInBlk,
4436 mipHeightInBlk,
4437 mipDepthInBlk);
4438
4439 UINT_32 endingMip = mipId + 1;
4440
4441 for (UINT_32 i = 1; i <= mipId; i++)
4442 {
4443 if ((i == 1) || (i == 3))
4444 {
4445 if (majorMode == ADDR_MAJOR_Y)
4446 {
4447 mipStartPos.w += mipWidthInBlk;
4448 }
4449 else
4450 {
4451 mipStartPos.h += mipHeightInBlk;
4452 }
4453 }
4454 else
4455 {
4456 if (majorMode == ADDR_MAJOR_X)
4457 {
4458 mipStartPos.w += mipWidthInBlk;
4459 }
4460 else if (majorMode == ADDR_MAJOR_Y)
4461 {
4462 mipStartPos.h += mipHeightInBlk;
4463 }
4464 else
4465 {
4466 mipStartPos.d += mipDepthInBlk;
4467 }
4468 }
4469
4470 BOOL_32 inTail = FALSE;
4471
4472 if (IsThick(resourceType, swizzleMode))
4473 {
4474 UINT_32 dim = log2blkSize % 3;
4475
4476 if (dim == 0)
4477 {
4478 inTail =
4479 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4480 }
4481 else if (dim == 1)
4482 {
4483 inTail =
4484 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4485 }
4486 else
4487 {
4488 inTail =
4489 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4490 }
4491 }
4492 else
4493 {
4494 if (log2blkSize & 1)
4495 {
4496 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4497 }
4498 else
4499 {
4500 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4501 }
4502 }
4503
4504 if (inTail)
4505 {
4506 endingMip = i;
4507 break;
4508 }
4509
4510 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4511 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4512 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4513 }
4514
4515 if (mipId >= endingMip)
4516 {
4517 inMipTail = TRUE;
4518 mipIndexInTail = mipId - endingMip;
4519 }
4520 }
4521
4522 if (inMipTail)
4523 {
4524 UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
4525 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4526 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4527 }
4528
4529 return mipStartPos;
4530 }
4531
4532 /**
4533 ************************************************************************************************************************
4534 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4535 *
4536 * @brief
4537 * Internal function to calculate address from coord for tiled swizzle surface
4538 *
4539 * @return
4540 * ADDR_E_RETURNCODE
4541 ************************************************************************************************************************
4542 */
4543 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4544 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4545 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4546 ) const
4547 {
4548 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4549 localIn.swizzleMode = pIn->swizzleMode;
4550 localIn.flags = pIn->flags;
4551 localIn.resourceType = pIn->resourceType;
4552 localIn.bpp = pIn->bpp;
4553 localIn.width = Max(pIn->unalignedWidth, 1u);
4554 localIn.height = Max(pIn->unalignedHeight, 1u);
4555 localIn.numSlices = Max(pIn->numSlices, 1u);
4556 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4557 localIn.numSamples = Max(pIn->numSamples, 1u);
4558 localIn.numFrags = Max(pIn->numFrags, 1u);
4559 if (localIn.numMipLevels <= 1)
4560 {
4561 localIn.pitchInElement = pIn->pitchInElement;
4562 }
4563
4564 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4565 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4566
4567 BOOL_32 valid = (returnCode == ADDR_OK) &&
4568 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4569 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4570 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4571
4572 if (valid)
4573 {
4574 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4575 Dim3d mipStartPos = {0};
4576 UINT_32 mipTailBytesOffset = 0;
4577
4578 if (pIn->numMipLevels > 1)
4579 {
4580 // Mip-map chain cannot be MSAA surface
4581 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4582
4583 mipStartPos = GetMipStartPos(pIn->resourceType,
4584 pIn->swizzleMode,
4585 localOut.pitch,
4586 localOut.height,
4587 localOut.numSlices,
4588 localOut.blockWidth,
4589 localOut.blockHeight,
4590 localOut.blockSlices,
4591 pIn->mipId,
4592 log2ElementBytes,
4593 &mipTailBytesOffset);
4594 }
4595
4596 UINT_32 interleaveOffset = 0;
4597 UINT_32 pipeBits = 0;
4598 UINT_32 pipeXor = 0;
4599 UINT_32 bankBits = 0;
4600 UINT_32 bankXor = 0;
4601
4602 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4603 {
4604 UINT_32 blockOffset = 0;
4605 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4606
4607 if (IsZOrderSwizzle(pIn->swizzleMode))
4608 {
4609 // Morton generation
4610 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4611 {
4612 UINT_32 totalLowBits = 6 - log2ElementBytes;
4613 UINT_32 mortBits = totalLowBits / 2;
4614 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4615 // Are 9 bits enough?
4616 UINT_32 highBitsValue =
4617 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4618 blockOffset = lowBitsValue | highBitsValue;
4619 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4620 }
4621 else
4622 {
4623 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4624 }
4625
4626 // Fill LSBs with sample bits
4627 if (pIn->numSamples > 1)
4628 {
4629 blockOffset *= pIn->numSamples;
4630 blockOffset |= pIn->sample;
4631 }
4632
4633 // Shift according to BytesPP
4634 blockOffset <<= log2ElementBytes;
4635 }
4636 else
4637 {
4638 // Micro block offset
4639 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4640 blockOffset = microBlockOffset;
4641
4642 // Micro block dimension
4643 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4644 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4645 // Morton generation, does 12 bit enough?
4646 blockOffset |=
4647 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4648
4649 // Sample bits start location
4650 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
4651 // Join sample bits information to the highest Macro block bits
4652 if (IsNonPrtXor(pIn->swizzleMode))
4653 {
4654 // Non-prt-Xor : xor highest Macro block bits with sample bits
4655 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4656 }
4657 else
4658 {
4659 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4660 // after this op, the blockOffset only contains log2 Macro block size bits
4661 blockOffset %= (1 << sampleStart);
4662 blockOffset |= (pIn->sample << sampleStart);
4663 ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
4664 }
4665 }
4666
4667 if (IsXor(pIn->swizzleMode))
4668 {
4669 // Mask off bits above Macro block bits to keep page synonyms working for prt
4670 if (IsPrt(pIn->swizzleMode))
4671 {
4672 blockOffset &= ((1 << log2blkSize) - 1);
4673 }
4674
4675 // Preserve offset inside pipe interleave
4676 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4677 blockOffset >>= m_pipeInterleaveLog2;
4678
4679 // Pipe/Se xor bits
4680 pipeBits = GetPipeXorBits(log2blkSize);
4681 // Pipe xor
4682 pipeXor = FoldXor2d(blockOffset, pipeBits);
4683 blockOffset >>= pipeBits;
4684
4685 // Bank xor bits
4686 bankBits = GetBankXorBits(log2blkSize);
4687 // Bank Xor
4688 bankXor = FoldXor2d(blockOffset, bankBits);
4689 blockOffset >>= bankBits;
4690
4691 // Put all the part back together
4692 blockOffset <<= bankBits;
4693 blockOffset |= bankXor;
4694 blockOffset <<= pipeBits;
4695 blockOffset |= pipeXor;
4696 blockOffset <<= m_pipeInterleaveLog2;
4697 blockOffset |= interleaveOffset;
4698 }
4699
4700 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4701 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4702
4703 blockOffset |= mipTailBytesOffset;
4704
4705 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4706 {
4707 // Apply slice xor if not MSAA/PRT
4708 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4709 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4710 (m_pipeInterleaveLog2 + pipeBits));
4711 }
4712
4713 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4714 bankBits, pipeBits, &blockOffset);
4715
4716 blockOffset %= (1 << log2blkSize);
4717
4718 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4719 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4720 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4721 UINT_64 macroBlockIndex =
4722 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4723 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4724 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4725
4726 pOut->addr = blockOffset | (macroBlockIndex << log2blkSize);
4727 }
4728 else
4729 {
4730 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4731
4732 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4733
4734 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4735 (pIn->y / microBlockDim.h),
4736 (pIn->slice / microBlockDim.d),
4737 8);
4738
4739 blockOffset <<= 10;
4740 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4741
4742 if (IsXor(pIn->swizzleMode))
4743 {
4744 // Mask off bits above Macro block bits to keep page synonyms working for prt
4745 if (IsPrt(pIn->swizzleMode))
4746 {
4747 blockOffset &= ((1 << log2blkSize) - 1);
4748 }
4749
4750 // Preserve offset inside pipe interleave
4751 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4752 blockOffset >>= m_pipeInterleaveLog2;
4753
4754 // Pipe/Se xor bits
4755 pipeBits = GetPipeXorBits(log2blkSize);
4756 // Pipe xor
4757 pipeXor = FoldXor3d(blockOffset, pipeBits);
4758 blockOffset >>= pipeBits;
4759
4760 // Bank xor bits
4761 bankBits = GetBankXorBits(log2blkSize);
4762 // Bank Xor
4763 bankXor = FoldXor3d(blockOffset, bankBits);
4764 blockOffset >>= bankBits;
4765
4766 // Put all the part back together
4767 blockOffset <<= bankBits;
4768 blockOffset |= bankXor;
4769 blockOffset <<= pipeBits;
4770 blockOffset |= pipeXor;
4771 blockOffset <<= m_pipeInterleaveLog2;
4772 blockOffset |= interleaveOffset;
4773 }
4774
4775 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4776 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4777 blockOffset |= mipTailBytesOffset;
4778
4779 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4780 bankBits, pipeBits, &blockOffset);
4781
4782 blockOffset %= (1 << log2blkSize);
4783
4784 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
4785 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4786 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4787
4788 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4789 UINT_32 sliceSizeInBlock =
4790 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4791 UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4792
4793 pOut->addr = blockOffset | (blockIndex << log2blkSize);
4794 }
4795 }
4796 else
4797 {
4798 returnCode = ADDR_INVALIDPARAMS;
4799 }
4800
4801 return returnCode;
4802 }
4803
4804 /**
4805 ************************************************************************************************************************
4806 * Gfx9Lib::ComputeSurfaceInfoLinear
4807 *
4808 * @brief
4809 * Internal function to calculate padding for linear swizzle 2D/3D surface
4810 *
4811 * @return
4812 * N/A
4813 ************************************************************************************************************************
4814 */
4815 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4816 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
4817 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
4818 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
4819 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
4820 ) const
4821 {
4822 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4823
4824 UINT_32 elementBytes = pIn->bpp >> 3;
4825 UINT_32 pitchAlignInElement = 0;
4826
4827 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4828 {
4829 ADDR_ASSERT(pIn->numMipLevels <= 1);
4830 ADDR_ASSERT(pIn->numSlices <= 1);
4831 pitchAlignInElement = 1;
4832 }
4833 else
4834 {
4835 pitchAlignInElement = (256 / elementBytes);
4836 }
4837
4838 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
4839 UINT_32 slice0PaddedHeight = pIn->height;
4840
4841 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4842 &mipChainWidth, &slice0PaddedHeight);
4843
4844 if (returnCode == ADDR_OK)
4845 {
4846 UINT_32 mipChainHeight = 0;
4847 UINT_32 mipHeight = pIn->height;
4848 UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4849
4850 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4851 {
4852 if (pMipInfo != NULL)
4853 {
4854 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4855 pMipInfo[i].pitch = mipChainWidth;
4856 pMipInfo[i].height = mipHeight;
4857 pMipInfo[i].depth = mipDepth;
4858 }
4859
4860 mipChainHeight += mipHeight;
4861 mipHeight = RoundHalf(mipHeight);
4862 mipHeight = Max(mipHeight, 1u);
4863 }
4864
4865 *pMipmap0PaddedWidth = mipChainWidth;
4866 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
4867 }
4868
4869 return returnCode;
4870 }
4871
4872 } // V2
4873 } // Addr