amd: update addrlib
[mesa.git] / src / amd / addrlib / src / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 #include "util/macros.h"
41
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
44
45 namespace Addr
46 {
47
48 /**
49 ************************************************************************************************************************
50 * Gfx9HwlInit
51 *
52 * @brief
53 * Creates an Gfx9Lib object.
54 *
55 * @return
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
58 */
59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
60 {
61 return V2::Gfx9Lib::CreateObj(pClient);
62 }
63
64 namespace V2
65 {
66
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
70
71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
77
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
82
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
87
88 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
89 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
90 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
91 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
92
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
97
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
102
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
107
108 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
109 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
110 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
111 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
113 };
114
115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
116 8, 6, 5, 4, 3, 2, 1, 0};
117
118 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
119
120 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
121
122 /**
123 ************************************************************************************************************************
124 * Gfx9Lib::Gfx9Lib
125 *
126 * @brief
127 * Constructor
128 *
129 ************************************************************************************************************************
130 */
131 Gfx9Lib::Gfx9Lib(const Client* pClient)
132 :
133 Lib(pClient),
134 m_numEquations(0)
135 {
136 m_class = AI_ADDRLIB;
137 memset(&m_settings, 0, sizeof(m_settings));
138 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
139 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
140 m_metaEqOverrideIndex = 0;
141 }
142
143 /**
144 ************************************************************************************************************************
145 * Gfx9Lib::~Gfx9Lib
146 *
147 * @brief
148 * Destructor
149 ************************************************************************************************************************
150 */
151 Gfx9Lib::~Gfx9Lib()
152 {
153 }
154
155 /**
156 ************************************************************************************************************************
157 * Gfx9Lib::HwlComputeHtileInfo
158 *
159 * @brief
160 * Interface function stub of AddrComputeHtilenfo
161 *
162 * @return
163 * ADDR_E_RETURNCODE
164 ************************************************************************************************************************
165 */
166 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
167 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
168 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
169 ) const
170 {
171 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
172 pIn->swizzleMode);
173
174 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
175
176 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
177
178 if ((numPipeTotal == 1) && (numRbTotal == 1))
179 {
180 numCompressBlkPerMetaBlkLog2 = 10;
181 }
182 else
183 {
184 if (m_settings.applyAliasFix)
185 {
186 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
187 }
188 else
189 {
190 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
191 }
192 }
193
194 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
195
196 Dim3d metaBlkDim = {8, 8, 1};
197 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
198 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
199 UINT_32 heightAmp = totalAmpBits - widthAmp;
200 metaBlkDim.w <<= widthAmp;
201 metaBlkDim.h <<= heightAmp;
202
203 #if DEBUG
204 Dim3d metaBlkDimDbg = {8, 8, 1};
205 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
206 {
207 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
208 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
209 {
210 metaBlkDimDbg.h <<= 1;
211 }
212 else
213 {
214 metaBlkDimDbg.w <<= 1;
215 }
216 }
217 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
218 #endif
219
220 UINT_32 numMetaBlkX;
221 UINT_32 numMetaBlkY;
222 UINT_32 numMetaBlkZ;
223
224 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
225 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
226 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
227
228 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
229 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
230
231 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
232 {
233 align *= (numPipeTotal >> 1);
234 }
235
236 align = Max(align, metaBlkSize);
237
238 if (m_settings.metaBaseAlignFix)
239 {
240 align = Max(align, GetBlockSize(pIn->swizzleMode));
241 }
242
243 if (m_settings.htileAlignFix)
244 {
245 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
246 const INT_32 htileCachelineSizeLog2 = 11;
247 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
248
249 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
250
251 align <<= rbMaskPadding;
252 }
253
254 pOut->pitch = numMetaBlkX * metaBlkDim.w;
255 pOut->height = numMetaBlkY * metaBlkDim.h;
256 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
257
258 pOut->metaBlkWidth = metaBlkDim.w;
259 pOut->metaBlkHeight = metaBlkDim.h;
260 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
261
262 pOut->baseAlign = align;
263 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
264
265 return ADDR_OK;
266 }
267
268 /**
269 ************************************************************************************************************************
270 * Gfx9Lib::HwlComputeCmaskInfo
271 *
272 * @brief
273 * Interface function stub of AddrComputeCmaskInfo
274 *
275 * @return
276 * ADDR_E_RETURNCODE
277 ************************************************************************************************************************
278 */
279 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
280 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
281 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
282 ) const
283 {
284 // TODO: Clarify with AddrLib team
285 // ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
286
287 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
288 pIn->swizzleMode);
289
290 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
291
292 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
293
294 if ((numPipeTotal == 1) && (numRbTotal == 1))
295 {
296 numCompressBlkPerMetaBlkLog2 = 13;
297 }
298 else
299 {
300 if (m_settings.applyAliasFix)
301 {
302 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
303 }
304 else
305 {
306 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
307 }
308
309 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
310 }
311
312 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
313
314 Dim2d metaBlkDim = {8, 8};
315 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
316 UINT_32 heightAmp = totalAmpBits >> 1;
317 UINT_32 widthAmp = totalAmpBits - heightAmp;
318 metaBlkDim.w <<= widthAmp;
319 metaBlkDim.h <<= heightAmp;
320
321 #if DEBUG
322 Dim2d metaBlkDimDbg = {8, 8};
323 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
324 {
325 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
326 {
327 metaBlkDimDbg.h <<= 1;
328 }
329 else
330 {
331 metaBlkDimDbg.w <<= 1;
332 }
333 }
334 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
335 #endif
336
337 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
338 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
339 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
340
341 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
342
343 if (m_settings.metaBaseAlignFix)
344 {
345 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
346 }
347
348 pOut->pitch = numMetaBlkX * metaBlkDim.w;
349 pOut->height = numMetaBlkY * metaBlkDim.h;
350 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
351 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
352 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
353
354 pOut->metaBlkWidth = metaBlkDim.w;
355 pOut->metaBlkHeight = metaBlkDim.h;
356
357 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
358
359 return ADDR_OK;
360 }
361
362 /**
363 ************************************************************************************************************************
364 * Gfx9Lib::GetMetaMipInfo
365 *
366 * @brief
367 * Get meta mip info
368 *
369 * @return
370 * N/A
371 ************************************************************************************************************************
372 */
373 VOID Gfx9Lib::GetMetaMipInfo(
374 UINT_32 numMipLevels, ///< [in] number of mip levels
375 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
376 BOOL_32 dataThick, ///< [in] data surface is thick
377 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
378 UINT_32 mip0Width, ///< [in] mip0 width
379 UINT_32 mip0Height, ///< [in] mip0 height
380 UINT_32 mip0Depth, ///< [in] mip0 depth
381 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
382 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
383 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
384 const
385 {
386 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
387 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
388 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
389 UINT_32 tailWidth = pMetaBlkDim->w;
390 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
391 UINT_32 tailDepth = pMetaBlkDim->d;
392 BOOL_32 inTail = FALSE;
393 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
394
395 if (numMipLevels > 1)
396 {
397 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
398 {
399 // Z major
400 major = ADDR_MAJOR_Z;
401 }
402 else if (numMetaBlkX >= numMetaBlkY)
403 {
404 // X major
405 major = ADDR_MAJOR_X;
406 }
407 else
408 {
409 // Y major
410 major = ADDR_MAJOR_Y;
411 }
412
413 inTail = ((mip0Width <= tailWidth) &&
414 (mip0Height <= tailHeight) &&
415 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
416
417 if (inTail == FALSE)
418 {
419 UINT_32 orderLimit;
420 UINT_32 *pMipDim;
421 UINT_32 *pOrderDim;
422
423 if (major == ADDR_MAJOR_Z)
424 {
425 // Z major
426 pMipDim = &numMetaBlkY;
427 pOrderDim = &numMetaBlkZ;
428 orderLimit = 4;
429 }
430 else if (major == ADDR_MAJOR_X)
431 {
432 // X major
433 pMipDim = &numMetaBlkY;
434 pOrderDim = &numMetaBlkX;
435 orderLimit = 4;
436 }
437 else
438 {
439 // Y major
440 pMipDim = &numMetaBlkX;
441 pOrderDim = &numMetaBlkY;
442 orderLimit = 2;
443 }
444
445 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
446 {
447 *pMipDim += 2;
448 }
449 else
450 {
451 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
452 }
453 }
454 }
455
456 if (pInfo != NULL)
457 {
458 UINT_32 mipWidth = mip0Width;
459 UINT_32 mipHeight = mip0Height;
460 UINT_32 mipDepth = mip0Depth;
461 Dim3d mipCoord = {0};
462
463 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
464 {
465 if (inTail)
466 {
467 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
468 pMetaBlkDim);
469 break;
470 }
471 else
472 {
473 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
474 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
475 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
476
477 pInfo[mip].inMiptail = FALSE;
478 pInfo[mip].startX = mipCoord.w;
479 pInfo[mip].startY = mipCoord.h;
480 pInfo[mip].startZ = mipCoord.d;
481 pInfo[mip].width = mipWidth;
482 pInfo[mip].height = mipHeight;
483 pInfo[mip].depth = dataThick ? mipDepth : 1;
484
485 if ((mip >= 3) || (mip & 1))
486 {
487 switch (major)
488 {
489 case ADDR_MAJOR_X:
490 mipCoord.w += mipWidth;
491 break;
492 case ADDR_MAJOR_Y:
493 mipCoord.h += mipHeight;
494 break;
495 case ADDR_MAJOR_Z:
496 mipCoord.d += mipDepth;
497 break;
498 default:
499 break;
500 }
501 }
502 else
503 {
504 switch (major)
505 {
506 case ADDR_MAJOR_X:
507 mipCoord.h += mipHeight;
508 break;
509 case ADDR_MAJOR_Y:
510 mipCoord.w += mipWidth;
511 break;
512 case ADDR_MAJOR_Z:
513 mipCoord.h += mipHeight;
514 break;
515 default:
516 break;
517 }
518 }
519
520 mipWidth = Max(mipWidth >> 1, 1u);
521 mipHeight = Max(mipHeight >> 1, 1u);
522 mipDepth = Max(mipDepth >> 1, 1u);
523
524 inTail = ((mipWidth <= tailWidth) &&
525 (mipHeight <= tailHeight) &&
526 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
527 }
528 }
529 }
530
531 *pNumMetaBlkX = numMetaBlkX;
532 *pNumMetaBlkY = numMetaBlkY;
533 *pNumMetaBlkZ = numMetaBlkZ;
534 }
535
536 /**
537 ************************************************************************************************************************
538 * Gfx9Lib::HwlComputeDccInfo
539 *
540 * @brief
541 * Interface function to compute DCC key info
542 *
543 * @return
544 * ADDR_E_RETURNCODE
545 ************************************************************************************************************************
546 */
547 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
548 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
549 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
550 ) const
551 {
552 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
553 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
554 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
555
556 if (dataLinear)
557 {
558 metaLinear = TRUE;
559 }
560 else if (metaLinear == TRUE)
561 {
562 pipeAligned = FALSE;
563 }
564
565 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
566
567 if (metaLinear)
568 {
569 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
570 ADDR_ASSERT_ALWAYS();
571
572 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
573 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
574 }
575 else
576 {
577 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
578
579 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
580
581 UINT_32 numFrags = Max(pIn->numFrags, 1u);
582 UINT_32 numSlices = Max(pIn->numSlices, 1u);
583
584 minMetaBlkSize /= numFrags;
585
586 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
587
588 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
589
590 if ((numPipeTotal > 1) || (numRbTotal > 1))
591 {
592 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
593
594 numCompressBlkPerMetaBlk =
595 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
596
597 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
598 {
599 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
600 }
601 }
602
603 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
604 Dim3d metaBlkDim = compressBlkDim;
605
606 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
607 {
608 if ((metaBlkDim.h < metaBlkDim.w) ||
609 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
610 {
611 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
612 {
613 metaBlkDim.h <<= 1;
614 }
615 else
616 {
617 metaBlkDim.d <<= 1;
618 }
619 }
620 else
621 {
622 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
623 {
624 metaBlkDim.w <<= 1;
625 }
626 else
627 {
628 metaBlkDim.d <<= 1;
629 }
630 }
631 }
632
633 UINT_32 numMetaBlkX;
634 UINT_32 numMetaBlkY;
635 UINT_32 numMetaBlkZ;
636
637 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
638 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
639 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
640
641 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
642
643 if (numFrags > m_maxCompFrag)
644 {
645 sizeAlign *= (numFrags / m_maxCompFrag);
646 }
647
648 if (m_settings.metaBaseAlignFix)
649 {
650 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
651 }
652
653 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
654 numCompressBlkPerMetaBlk * numFrags;
655 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
656 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
657
658 pOut->pitch = numMetaBlkX * metaBlkDim.w;
659 pOut->height = numMetaBlkY * metaBlkDim.h;
660 pOut->depth = numMetaBlkZ * metaBlkDim.d;
661
662 pOut->compressBlkWidth = compressBlkDim.w;
663 pOut->compressBlkHeight = compressBlkDim.h;
664 pOut->compressBlkDepth = compressBlkDim.d;
665
666 pOut->metaBlkWidth = metaBlkDim.w;
667 pOut->metaBlkHeight = metaBlkDim.h;
668 pOut->metaBlkDepth = metaBlkDim.d;
669
670 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
671 pOut->fastClearSizePerSlice =
672 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
673 }
674
675 return ADDR_OK;
676 }
677
678 /**
679 ************************************************************************************************************************
680 * Gfx9Lib::HwlComputeMaxBaseAlignments
681 *
682 * @brief
683 * Gets maximum alignments
684 * @return
685 * maximum alignments
686 ************************************************************************************************************************
687 */
688 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
689 {
690 return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
691 }
692
693 /**
694 ************************************************************************************************************************
695 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
696 *
697 * @brief
698 * Gets maximum alignments for metadata
699 * @return
700 * maximum alignments for metadata
701 ************************************************************************************************************************
702 */
703 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
704 {
705 // Max base alignment for Htile
706 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
707 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
708
709 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
710 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
711 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
712 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
713
714 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
715
716 if (maxNumPipeTotal > 2)
717 {
718 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
719 }
720
721 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
722
723 if (m_settings.metaBaseAlignFix)
724 {
725 maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
726 }
727
728 if (m_settings.htileAlignFix)
729 {
730 maxBaseAlignHtile *= maxNumPipeTotal;
731 }
732
733 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
734
735 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
736 UINT_32 maxBaseAlignDcc3D = 65536;
737
738 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
739 {
740 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
741 }
742
743 // Max base alignment for Msaa Dcc
744 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
745
746 if (m_settings.metaBaseAlignFix)
747 {
748 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
749 }
750
751 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
752 }
753
754 /**
755 ************************************************************************************************************************
756 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
757 *
758 * @brief
759 * Interface function stub of AddrComputeCmaskAddrFromCoord
760 *
761 * @return
762 * ADDR_E_RETURNCODE
763 ************************************************************************************************************************
764 */
765 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
766 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
767 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
768 {
769 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
770 input.size = sizeof(input);
771 input.cMaskFlags = pIn->cMaskFlags;
772 input.colorFlags = pIn->colorFlags;
773 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
774 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
775 input.numSlices = Max(pIn->numSlices, 1u);
776 input.swizzleMode = pIn->swizzleMode;
777 input.resourceType = pIn->resourceType;
778
779 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
780 output.size = sizeof(output);
781
782 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
783
784 if (returnCode == ADDR_OK)
785 {
786 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
787 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
788 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
789 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
790
791 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
792 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
793 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
794
795 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
796
797 UINT_32 xb = pIn->x / output.metaBlkWidth;
798 UINT_32 yb = pIn->y / output.metaBlkHeight;
799 UINT_32 zb = pIn->slice;
800
801 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
802 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
803 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
804
805 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
806
807 pOut->addr = address >> 1;
808 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
809
810 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
811 pIn->swizzleMode);
812
813 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
814
815 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
816 }
817
818 return returnCode;
819 }
820
821 /**
822 ************************************************************************************************************************
823 * Gfx9Lib::HwlComputeHtileAddrFromCoord
824 *
825 * @brief
826 * Interface function stub of AddrComputeHtileAddrFromCoord
827 *
828 * @return
829 * ADDR_E_RETURNCODE
830 ************************************************************************************************************************
831 */
832 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
833 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
834 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
835 {
836 ADDR_E_RETURNCODE returnCode = ADDR_OK;
837
838 if (pIn->numMipLevels > 1)
839 {
840 returnCode = ADDR_NOTIMPLEMENTED;
841 }
842 else
843 {
844 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
845 input.size = sizeof(input);
846 input.hTileFlags = pIn->hTileFlags;
847 input.depthFlags = pIn->depthflags;
848 input.swizzleMode = pIn->swizzleMode;
849 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
850 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
851 input.numSlices = Max(pIn->numSlices, 1u);
852 input.numMipLevels = Max(pIn->numMipLevels, 1u);
853
854 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
855 output.size = sizeof(output);
856
857 returnCode = ComputeHtileInfo(&input, &output);
858
859 if (returnCode == ADDR_OK)
860 {
861 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
862 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
863 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
864 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
865
866 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
867 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
868 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
869
870 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
871
872 UINT_32 xb = pIn->x / output.metaBlkWidth;
873 UINT_32 yb = pIn->y / output.metaBlkHeight;
874 UINT_32 zb = pIn->slice;
875
876 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
877 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
878 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
879
880 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
881
882 pOut->addr = address >> 1;
883
884 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
885 pIn->swizzleMode);
886
887 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
888
889 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
890 }
891 }
892
893 return returnCode;
894 }
895
896 /**
897 ************************************************************************************************************************
898 * Gfx9Lib::HwlComputeHtileCoordFromAddr
899 *
900 * @brief
901 * Interface function stub of AddrComputeHtileCoordFromAddr
902 *
903 * @return
904 * ADDR_E_RETURNCODE
905 ************************************************************************************************************************
906 */
907 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
908 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
909 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
910 {
911 ADDR_E_RETURNCODE returnCode = ADDR_OK;
912
913 if (pIn->numMipLevels > 1)
914 {
915 returnCode = ADDR_NOTIMPLEMENTED;
916 }
917 else
918 {
919 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
920 input.size = sizeof(input);
921 input.hTileFlags = pIn->hTileFlags;
922 input.swizzleMode = pIn->swizzleMode;
923 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
924 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
925 input.numSlices = Max(pIn->numSlices, 1u);
926 input.numMipLevels = Max(pIn->numMipLevels, 1u);
927
928 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
929 output.size = sizeof(output);
930
931 returnCode = ComputeHtileInfo(&input, &output);
932
933 if (returnCode == ADDR_OK)
934 {
935 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
936 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
937 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
938 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
939
940 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
941 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
942 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
943
944 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
945
946 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
947 pIn->swizzleMode);
948
949 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
950
951 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
952
953 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
954 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
955
956 UINT_32 x, y, z, s, m;
957 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
958
959 pOut->slice = m / sliceSizeInBlock;
960 pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
961 pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x;
962 }
963 }
964
965 return returnCode;
966 }
967
968 /**
969 ************************************************************************************************************************
970 * Gfx9Lib::HwlComputeDccAddrFromCoord
971 *
972 * @brief
973 * Interface function stub of AddrComputeDccAddrFromCoord
974 *
975 * @return
976 * ADDR_E_RETURNCODE
977 ************************************************************************************************************************
978 */
979 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
980 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
981 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
982 {
983 ADDR_E_RETURNCODE returnCode = ADDR_OK;
984
985 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
986 {
987 returnCode = ADDR_NOTIMPLEMENTED;
988 }
989 else
990 {
991 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
992 input.size = sizeof(input);
993 input.dccKeyFlags = pIn->dccKeyFlags;
994 input.colorFlags = pIn->colorFlags;
995 input.swizzleMode = pIn->swizzleMode;
996 input.resourceType = pIn->resourceType;
997 input.bpp = pIn->bpp;
998 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
999 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
1000 input.numSlices = Max(pIn->numSlices, 1u);
1001 input.numFrags = Max(pIn->numFrags, 1u);
1002 input.numMipLevels = Max(pIn->numMipLevels, 1u);
1003
1004 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
1005 output.size = sizeof(output);
1006
1007 returnCode = ComputeDccInfo(&input, &output);
1008
1009 if (returnCode == ADDR_OK)
1010 {
1011 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1012 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
1013 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
1014 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1015 UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
1016 UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
1017 UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
1018 UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
1019
1020 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1021 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1022 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1023 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1024
1025 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1026
1027 UINT_32 xb = pIn->x / output.metaBlkWidth;
1028 UINT_32 yb = pIn->y / output.metaBlkHeight;
1029 UINT_32 zb = pIn->slice / output.metaBlkDepth;
1030
1031 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
1032 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1033 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1034
1035 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
1036
1037 pOut->addr = address >> 1;
1038
1039 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1040 pIn->swizzleMode);
1041
1042 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1043
1044 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1045 }
1046 }
1047
1048 return returnCode;
1049 }
1050
1051 /**
1052 ************************************************************************************************************************
1053 * Gfx9Lib::HwlInitGlobalParams
1054 *
1055 * @brief
1056 * Initializes global parameters
1057 *
1058 * @return
1059 * TRUE if all settings are valid
1060 *
1061 ************************************************************************************************************************
1062 */
1063 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1064 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1065 {
1066 BOOL_32 valid = TRUE;
1067
1068 if (m_settings.isArcticIsland)
1069 {
1070 GB_ADDR_CONFIG gbAddrConfig;
1071
1072 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1073
1074 // These values are copied from CModel code
1075 switch (gbAddrConfig.bits.NUM_PIPES)
1076 {
1077 case ADDR_CONFIG_1_PIPE:
1078 m_pipes = 1;
1079 m_pipesLog2 = 0;
1080 break;
1081 case ADDR_CONFIG_2_PIPE:
1082 m_pipes = 2;
1083 m_pipesLog2 = 1;
1084 break;
1085 case ADDR_CONFIG_4_PIPE:
1086 m_pipes = 4;
1087 m_pipesLog2 = 2;
1088 break;
1089 case ADDR_CONFIG_8_PIPE:
1090 m_pipes = 8;
1091 m_pipesLog2 = 3;
1092 break;
1093 case ADDR_CONFIG_16_PIPE:
1094 m_pipes = 16;
1095 m_pipesLog2 = 4;
1096 break;
1097 case ADDR_CONFIG_32_PIPE:
1098 m_pipes = 32;
1099 m_pipesLog2 = 5;
1100 break;
1101 default:
1102 ADDR_ASSERT_ALWAYS();
1103 break;
1104 }
1105
1106 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1107 {
1108 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1109 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1110 m_pipeInterleaveLog2 = 8;
1111 break;
1112 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1113 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1114 m_pipeInterleaveLog2 = 9;
1115 break;
1116 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1117 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1118 m_pipeInterleaveLog2 = 10;
1119 break;
1120 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1121 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1122 m_pipeInterleaveLog2 = 11;
1123 break;
1124 default:
1125 ADDR_ASSERT_ALWAYS();
1126 break;
1127 }
1128
1129 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1130 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1131 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1132
1133 switch (gbAddrConfig.bits.NUM_BANKS)
1134 {
1135 case ADDR_CONFIG_1_BANK:
1136 m_banks = 1;
1137 m_banksLog2 = 0;
1138 break;
1139 case ADDR_CONFIG_2_BANK:
1140 m_banks = 2;
1141 m_banksLog2 = 1;
1142 break;
1143 case ADDR_CONFIG_4_BANK:
1144 m_banks = 4;
1145 m_banksLog2 = 2;
1146 break;
1147 case ADDR_CONFIG_8_BANK:
1148 m_banks = 8;
1149 m_banksLog2 = 3;
1150 break;
1151 case ADDR_CONFIG_16_BANK:
1152 m_banks = 16;
1153 m_banksLog2 = 4;
1154 break;
1155 default:
1156 ADDR_ASSERT_ALWAYS();
1157 break;
1158 }
1159
1160 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1161 {
1162 case ADDR_CONFIG_1_SHADER_ENGINE:
1163 m_se = 1;
1164 m_seLog2 = 0;
1165 break;
1166 case ADDR_CONFIG_2_SHADER_ENGINE:
1167 m_se = 2;
1168 m_seLog2 = 1;
1169 break;
1170 case ADDR_CONFIG_4_SHADER_ENGINE:
1171 m_se = 4;
1172 m_seLog2 = 2;
1173 break;
1174 case ADDR_CONFIG_8_SHADER_ENGINE:
1175 m_se = 8;
1176 m_seLog2 = 3;
1177 break;
1178 default:
1179 ADDR_ASSERT_ALWAYS();
1180 break;
1181 }
1182
1183 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1184 {
1185 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1186 m_rbPerSe = 1;
1187 m_rbPerSeLog2 = 0;
1188 break;
1189 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1190 m_rbPerSe = 2;
1191 m_rbPerSeLog2 = 1;
1192 break;
1193 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1194 m_rbPerSe = 4;
1195 m_rbPerSeLog2 = 2;
1196 break;
1197 default:
1198 ADDR_ASSERT_ALWAYS();
1199 break;
1200 }
1201
1202 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1203 {
1204 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1205 m_maxCompFrag = 1;
1206 m_maxCompFragLog2 = 0;
1207 break;
1208 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1209 m_maxCompFrag = 2;
1210 m_maxCompFragLog2 = 1;
1211 break;
1212 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1213 m_maxCompFrag = 4;
1214 m_maxCompFragLog2 = 2;
1215 break;
1216 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1217 m_maxCompFrag = 8;
1218 m_maxCompFragLog2 = 3;
1219 break;
1220 default:
1221 ADDR_ASSERT_ALWAYS();
1222 break;
1223 }
1224
1225 m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1226 ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1227 ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1228 m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1229
1230 if ((m_rbPerSeLog2 == 1) &&
1231 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1232 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1233 {
1234 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1235 ADDR_ASSERT(m_settings.isRaven == FALSE);
1236
1237 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1238
1239 if (m_settings.isVega12)
1240 {
1241 m_settings.htileCacheRbConflict = 1;
1242 }
1243 }
1244 }
1245 else
1246 {
1247 valid = FALSE;
1248 ADDR_NOT_IMPLEMENTED();
1249 }
1250
1251 if (valid)
1252 {
1253 InitEquationTable();
1254 }
1255
1256 return valid;
1257 }
1258
1259 /**
1260 ************************************************************************************************************************
1261 * Gfx9Lib::HwlConvertChipFamily
1262 *
1263 * @brief
1264 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1265 * @return
1266 * ChipFamily
1267 ************************************************************************************************************************
1268 */
1269 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1270 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1271 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1272 {
1273 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1274
1275 switch (uChipFamily)
1276 {
1277 case FAMILY_AI:
1278 m_settings.isArcticIsland = 1;
1279 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1280 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1281 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1282 m_settings.isDce12 = 1;
1283
1284 if (m_settings.isVega10 == 0)
1285 {
1286 m_settings.htileAlignFix = 1;
1287 m_settings.applyAliasFix = 1;
1288 }
1289
1290 m_settings.metaBaseAlignFix = 1;
1291
1292 m_settings.depthPipeXorDisable = 1;
1293 break;
1294 case FAMILY_RV:
1295 m_settings.isArcticIsland = 1;
1296
1297 if (ASICREV_IS_RAVEN(uChipRevision))
1298 {
1299 m_settings.isRaven = 1;
1300
1301 m_settings.depthPipeXorDisable = 1;
1302 }
1303
1304 if (ASICREV_IS_RAVEN2(uChipRevision))
1305 {
1306 m_settings.isRaven = 1;
1307 }
1308
1309 if (m_settings.isRaven == 0)
1310 {
1311 m_settings.htileAlignFix = 1;
1312 m_settings.applyAliasFix = 1;
1313 }
1314
1315 m_settings.isDcn1 = m_settings.isRaven;
1316
1317 m_settings.metaBaseAlignFix = 1;
1318 break;
1319
1320 default:
1321 ADDR_ASSERT(!"This should be a Fusion");
1322 break;
1323 }
1324
1325 return family;
1326 }
1327
1328 /**
1329 ************************************************************************************************************************
1330 * Gfx9Lib::InitRbEquation
1331 *
1332 * @brief
1333 * Init RB equation
1334 * @return
1335 * N/A
1336 ************************************************************************************************************************
1337 */
1338 VOID Gfx9Lib::GetRbEquation(
1339 CoordEq* pRbEq, ///< [out] rb equation
1340 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1341 UINT_32 numSeLog2) ///< [in] number of shader engine
1342 const
1343 {
1344 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1345 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1346 Coordinate cx('x', rbRegion);
1347 Coordinate cy('y', rbRegion);
1348
1349 UINT_32 start = 0;
1350 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1351
1352 // Clear the rb equation
1353 pRbEq->resize(0);
1354 pRbEq->resize(numRbTotalLog2);
1355
1356 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1357 {
1358 // Special case when more than 1 SE, and 2 RB per SE
1359 (*pRbEq)[0].add(cx);
1360 (*pRbEq)[0].add(cy);
1361 cx++;
1362 cy++;
1363
1364 if (m_settings.applyAliasFix == false)
1365 {
1366 (*pRbEq)[0].add(cy);
1367 }
1368
1369 (*pRbEq)[0].add(cy);
1370 start++;
1371 }
1372
1373 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1374
1375 for (UINT_32 i = 0; i < numBits; i++)
1376 {
1377 UINT_32 idx =
1378 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1379
1380 if ((i % 2) == 1)
1381 {
1382 (*pRbEq)[idx].add(cx);
1383 cx++;
1384 }
1385 else
1386 {
1387 (*pRbEq)[idx].add(cy);
1388 cy++;
1389 }
1390 }
1391 }
1392
1393 /**
1394 ************************************************************************************************************************
1395 * Gfx9Lib::GetDataEquation
1396 *
1397 * @brief
1398 * Get data equation for fmask and Z
1399 * @return
1400 * N/A
1401 ************************************************************************************************************************
1402 */
1403 VOID Gfx9Lib::GetDataEquation(
1404 CoordEq* pDataEq, ///< [out] data surface equation
1405 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1406 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1407 AddrResourceType resourceType, ///< [in] data surface resource type
1408 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1409 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1410 const
1411 {
1412 Coordinate cx('x', 0);
1413 Coordinate cy('y', 0);
1414 Coordinate cz('z', 0);
1415 Coordinate cs('s', 0);
1416
1417 // Clear the equation
1418 pDataEq->resize(0);
1419 pDataEq->resize(27);
1420
1421 if (dataSurfaceType == Gfx9DataColor)
1422 {
1423 if (IsLinear(swizzleMode))
1424 {
1425 Coordinate cm('m', 0);
1426
1427 pDataEq->resize(49);
1428
1429 for (UINT_32 i = 0; i < 49; i++)
1430 {
1431 (*pDataEq)[i].add(cm);
1432 cm++;
1433 }
1434 }
1435 else if (IsThick(resourceType, swizzleMode))
1436 {
1437 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1438 UINT_32 i;
1439 if (IsStandardSwizzle(resourceType, swizzleMode))
1440 {
1441 // Standard 3d swizzle
1442 // Fill in bottom x bits
1443 for (i = elementBytesLog2; i < 4; i++)
1444 {
1445 (*pDataEq)[i].add(cx);
1446 cx++;
1447 }
1448 // Fill in 2 bits of y and then z
1449 for (i = 4; i < 6; i++)
1450 {
1451 (*pDataEq)[i].add(cy);
1452 cy++;
1453 }
1454 for (i = 6; i < 8; i++)
1455 {
1456 (*pDataEq)[i].add(cz);
1457 cz++;
1458 }
1459 if (elementBytesLog2 < 2)
1460 {
1461 // fill in z & y bit
1462 (*pDataEq)[8].add(cz);
1463 (*pDataEq)[9].add(cy);
1464 cz++;
1465 cy++;
1466 }
1467 else if (elementBytesLog2 == 2)
1468 {
1469 // fill in y and x bit
1470 (*pDataEq)[8].add(cy);
1471 (*pDataEq)[9].add(cx);
1472 cy++;
1473 cx++;
1474 }
1475 else
1476 {
1477 // fill in 2 x bits
1478 (*pDataEq)[8].add(cx);
1479 cx++;
1480 (*pDataEq)[9].add(cx);
1481 cx++;
1482 }
1483 }
1484 else
1485 {
1486 // Z 3d swizzle
1487 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1488 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1489 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1490 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1491 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1492 {
1493 (*pDataEq)[i].add(cz);
1494 cz++;
1495 }
1496 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1497 {
1498 // add an x and z
1499 (*pDataEq)[6].add(cx);
1500 (*pDataEq)[7].add(cz);
1501 cx++;
1502 cz++;
1503 }
1504 else if (elementBytesLog2 == 2)
1505 {
1506 // add a y and z
1507 (*pDataEq)[6].add(cy);
1508 (*pDataEq)[7].add(cz);
1509 cy++;
1510 cz++;
1511 }
1512 // add y and x
1513 (*pDataEq)[8].add(cy);
1514 (*pDataEq)[9].add(cx);
1515 cy++;
1516 cx++;
1517 }
1518 // Fill in bit 10 and up
1519 pDataEq->mort3d( cz, cy, cx, 10 );
1520 }
1521 else if (IsThin(resourceType, swizzleMode))
1522 {
1523 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1524 // Color 2D
1525 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1526 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1527 UINT_32 i;
1528 // Fill in bottom x bits
1529 for (i = elementBytesLog2; i < 4; i++)
1530 {
1531 (*pDataEq)[i].add(cx);
1532 cx++;
1533 }
1534 // Fill in bottom y bits
1535 for (i = 4; i < 4 + microYBits; i++)
1536 {
1537 (*pDataEq)[i].add(cy);
1538 cy++;
1539 }
1540 // Fill in last of the micro_x bits
1541 for (i = 4 + microYBits; i < 8; i++)
1542 {
1543 (*pDataEq)[i].add(cx);
1544 cx++;
1545 }
1546 // Fill in x/y bits below sample split
1547 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1548 // Fill in sample bits
1549 for (i = 0; i < numSamplesLog2; i++)
1550 {
1551 cs.set('s', i);
1552 (*pDataEq)[tileSplitStart + i].add(cs);
1553 }
1554 // Fill in x/y bits above sample split
1555 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1556 {
1557 pDataEq->mort2d(cx, cy, blockSizeLog2);
1558 }
1559 else
1560 {
1561 pDataEq->mort2d(cy, cx, blockSizeLog2);
1562 }
1563 }
1564 else
1565 {
1566 ADDR_ASSERT_ALWAYS();
1567 }
1568 }
1569 else
1570 {
1571 // Fmask or depth
1572 UINT_32 sampleStart = elementBytesLog2;
1573 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1574 UINT_32 ymajStart = 6 + numSamplesLog2;
1575
1576 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1577 {
1578 cs.set('s', s);
1579 (*pDataEq)[sampleStart + s].add(cs);
1580 }
1581
1582 // Put in the x-major order pixel bits
1583 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1584 // Put in the y-major order pixel bits
1585 pDataEq->mort2d(cy, cx, ymajStart);
1586 }
1587 }
1588
1589 /**
1590 ************************************************************************************************************************
1591 * Gfx9Lib::GetPipeEquation
1592 *
1593 * @brief
1594 * Get pipe equation
1595 * @return
1596 * N/A
1597 ************************************************************************************************************************
1598 */
1599 VOID Gfx9Lib::GetPipeEquation(
1600 CoordEq* pPipeEq, ///< [out] pipe equation
1601 CoordEq* pDataEq, ///< [in] data equation
1602 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1603 UINT_32 numPipeLog2, ///< [in] number of pipes
1604 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1605 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1606 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1607 AddrResourceType resourceType ///< [in] data surface resource type
1608 ) const
1609 {
1610 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1611 CoordEq dataEq;
1612
1613 pDataEq->copy(dataEq);
1614
1615 if (dataSurfaceType == Gfx9DataColor)
1616 {
1617 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1618 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1619 }
1620
1621 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1622
1623 // This section should only apply to z/stencil, maybe fmask
1624 // If the pipe bit is below the comp block size,
1625 // then keep moving up the address until we find a bit that is above
1626 UINT_32 pipeStart = 0;
1627
1628 if (dataSurfaceType != Gfx9DataColor)
1629 {
1630 Coordinate tileMin('x', 3);
1631
1632 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1633 {
1634 pipeStart++;
1635 }
1636
1637 // if pipe is 0, then the first pipe bit is above the comp block size,
1638 // so we don't need to do anything
1639 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1640 // we will get the same pipe equation
1641 if (pipeStart != 0)
1642 {
1643 for (UINT_32 i = 0; i < numPipeLog2; i++)
1644 {
1645 // Copy the jth bit above pipe interleave to the current pipe equation bit
1646 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1647 }
1648 }
1649 }
1650
1651 if (IsPrt(swizzleMode))
1652 {
1653 // Clear out bits above the block size if prt's are enabled
1654 dataEq.resize(blockSizeLog2);
1655 dataEq.resize(48);
1656 }
1657
1658 if (IsXor(swizzleMode))
1659 {
1660 CoordEq xorMask;
1661
1662 if (IsThick(resourceType, swizzleMode))
1663 {
1664 CoordEq xorMask2;
1665
1666 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1667
1668 xorMask.resize(numPipeLog2);
1669
1670 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1671 {
1672 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1673 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1674 }
1675 }
1676 else
1677 {
1678 // Xor in the bits above the pipe+gpu bits
1679 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1680
1681 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1682 {
1683 Coordinate co;
1684 CoordEq xorMask2;
1685 // if 1xaa and not prt, then xor in the z bits
1686 xorMask2.resize(0);
1687 xorMask2.resize(numPipeLog2);
1688 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1689 {
1690 co.set('z', numPipeLog2 - 1 - pipeIdx);
1691 xorMask2[pipeIdx].add(co);
1692 }
1693
1694 pPipeEq->xorin(xorMask2);
1695 }
1696 }
1697
1698 xorMask.reverse();
1699 pPipeEq->xorin(xorMask);
1700 }
1701 }
1702 /**
1703 ************************************************************************************************************************
1704 * Gfx9Lib::GetMetaEquation
1705 *
1706 * @brief
1707 * Get meta equation for cmask/htile/DCC
1708 * @return
1709 * Pointer to a calculated meta equation
1710 ************************************************************************************************************************
1711 */
1712 const CoordEq* Gfx9Lib::GetMetaEquation(
1713 const MetaEqParams& metaEqParams)
1714 {
1715 UINT_32 cachedMetaEqIndex;
1716
1717 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1718 {
1719 if (memcmp(&metaEqParams,
1720 &m_cachedMetaEqKey[cachedMetaEqIndex],
1721 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1722 {
1723 break;
1724 }
1725 }
1726
1727 CoordEq* pMetaEq = NULL;
1728
1729 if (cachedMetaEqIndex < MaxCachedMetaEq)
1730 {
1731 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1732 }
1733 else
1734 {
1735 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1736
1737 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1738
1739 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1740
1741 GenMetaEquation(pMetaEq,
1742 metaEqParams.maxMip,
1743 metaEqParams.elementBytesLog2,
1744 metaEqParams.numSamplesLog2,
1745 metaEqParams.metaFlag,
1746 metaEqParams.dataSurfaceType,
1747 metaEqParams.swizzleMode,
1748 metaEqParams.resourceType,
1749 metaEqParams.metaBlkWidthLog2,
1750 metaEqParams.metaBlkHeightLog2,
1751 metaEqParams.metaBlkDepthLog2,
1752 metaEqParams.compBlkWidthLog2,
1753 metaEqParams.compBlkHeightLog2,
1754 metaEqParams.compBlkDepthLog2);
1755 }
1756
1757 return pMetaEq;
1758 }
1759
1760 /**
1761 ************************************************************************************************************************
1762 * Gfx9Lib::GenMetaEquation
1763 *
1764 * @brief
1765 * Get meta equation for cmask/htile/DCC
1766 * @return
1767 * N/A
1768 ************************************************************************************************************************
1769 */
1770 VOID Gfx9Lib::GenMetaEquation(
1771 CoordEq* pMetaEq, ///< [out] meta equation
1772 UINT_32 maxMip, ///< [in] max mip Id
1773 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1774 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1775 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1776 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1777 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1778 AddrResourceType resourceType, ///< [in] data surface resource type
1779 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1780 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1781 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1782 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1783 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1784 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1785 const
1786 {
1787 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1788 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1789
1790 // Get the correct data address and rb equation
1791 CoordEq dataEq;
1792 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1793 elementBytesLog2, numSamplesLog2);
1794
1795 // Get pipe and rb equations
1796 CoordEq pipeEquation;
1797 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1798 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1799 numPipeTotalLog2 = pipeEquation.getsize();
1800
1801 if (metaFlag.linear)
1802 {
1803 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1804 ADDR_ASSERT_ALWAYS();
1805
1806 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1807
1808 dataEq.copy(*pMetaEq);
1809
1810 if (IsLinear(swizzleMode))
1811 {
1812 if (metaFlag.pipeAligned)
1813 {
1814 // Remove the pipe bits
1815 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1816 pMetaEq->shift(-shift, pipeInterleaveLog2);
1817 }
1818 // Divide by comp block size, which for linear (which is always color) is 256 B
1819 pMetaEq->shift(-8);
1820
1821 if (metaFlag.pipeAligned)
1822 {
1823 // Put pipe bits back in
1824 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1825
1826 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1827 {
1828 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1829 }
1830 }
1831 }
1832
1833 pMetaEq->shift(1);
1834 }
1835 else
1836 {
1837 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1838 UINT_32 compFragLog2 =
1839 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1840 maxCompFragLog2 : numSamplesLog2;
1841
1842 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1843
1844 // Make sure the metaaddr is cleared
1845 pMetaEq->resize(0);
1846 pMetaEq->resize(27);
1847
1848 if (IsThick(resourceType, swizzleMode))
1849 {
1850 Coordinate cx('x', 0);
1851 Coordinate cy('y', 0);
1852 Coordinate cz('z', 0);
1853
1854 if (maxMip > 0)
1855 {
1856 pMetaEq->mort3d(cy, cx, cz);
1857 }
1858 else
1859 {
1860 pMetaEq->mort3d(cx, cy, cz);
1861 }
1862 }
1863 else
1864 {
1865 Coordinate cx('x', 0);
1866 Coordinate cy('y', 0);
1867 Coordinate cs;
1868
1869 if (maxMip > 0)
1870 {
1871 pMetaEq->mort2d(cy, cx, compFragLog2);
1872 }
1873 else
1874 {
1875 pMetaEq->mort2d(cx, cy, compFragLog2);
1876 }
1877
1878 //------------------------------------------------------------------------------------------------------------------------
1879 // Put the compressible fragments at the lsb
1880 // the uncompressible frags will be at the msb of the micro address
1881 //------------------------------------------------------------------------------------------------------------------------
1882 for (UINT_32 s = 0; s < compFragLog2; s++)
1883 {
1884 cs.set('s', s);
1885 (*pMetaEq)[s].add(cs);
1886 }
1887 }
1888
1889 // Keep a copy of the pipe equations
1890 CoordEq origPipeEquation;
1891 pipeEquation.copy(origPipeEquation);
1892
1893 Coordinate co;
1894 // filter out everything under the compressed block size
1895 co.set('x', compBlkWidthLog2);
1896 pMetaEq->Filter('<', co, 0, 'x');
1897 co.set('y', compBlkHeightLog2);
1898 pMetaEq->Filter('<', co, 0, 'y');
1899 co.set('z', compBlkDepthLog2);
1900 pMetaEq->Filter('<', co, 0, 'z');
1901
1902 // For non-color, filter out sample bits
1903 if (dataSurfaceType != Gfx9DataColor)
1904 {
1905 co.set('x', 0);
1906 pMetaEq->Filter('<', co, 0, 's');
1907 }
1908
1909 // filter out everything above the metablock size
1910 co.set('x', metaBlkWidthLog2 - 1);
1911 pMetaEq->Filter('>', co, 0, 'x');
1912 co.set('y', metaBlkHeightLog2 - 1);
1913 pMetaEq->Filter('>', co, 0, 'y');
1914 co.set('z', metaBlkDepthLog2 - 1);
1915 pMetaEq->Filter('>', co, 0, 'z');
1916
1917 // filter out everything above the metablock size for the channel bits
1918 co.set('x', metaBlkWidthLog2 - 1);
1919 pipeEquation.Filter('>', co, 0, 'x');
1920 co.set('y', metaBlkHeightLog2 - 1);
1921 pipeEquation.Filter('>', co, 0, 'y');
1922 co.set('z', metaBlkDepthLog2 - 1);
1923 pipeEquation.Filter('>', co, 0, 'z');
1924
1925 // Make sure we still have the same number of channel bits
1926 if (pipeEquation.getsize() != numPipeTotalLog2)
1927 {
1928 ADDR_ASSERT_ALWAYS();
1929 }
1930
1931 // Loop through all channel and rb bits,
1932 // and make sure these components exist in the metadata address
1933 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1934 {
1935 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1936 {
1937 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1938 {
1939 ADDR_ASSERT_ALWAYS();
1940 }
1941 }
1942 }
1943
1944 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1945 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1946 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1947 CoordEq origRbEquation;
1948
1949 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1950
1951 CoordEq rbEquation = origRbEquation;
1952
1953 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1954 {
1955 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1956 {
1957 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1958 {
1959 ADDR_ASSERT_ALWAYS();
1960 }
1961 }
1962 }
1963
1964 if (m_settings.applyAliasFix)
1965 {
1966 co.set('z', -1);
1967 }
1968
1969 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1970 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1971 {
1972 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1973 {
1974 BOOL_32 isRbEquationInPipeEquation = FALSE;
1975
1976 if (m_settings.applyAliasFix)
1977 {
1978 CoordTerm filteredPipeEq;
1979 filteredPipeEq = pipeEquation[j];
1980
1981 filteredPipeEq.Filter('>', co, 0, 'z');
1982
1983 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1984 }
1985 else
1986 {
1987 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1988 }
1989
1990 if (isRbEquationInPipeEquation)
1991 {
1992 rbEquation[i].Clear();
1993 }
1994 }
1995 }
1996
1997 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1998
1999 // Loop through each bit of the channel, get the smallest coordinate,
2000 // and remove it from the metaaddr, and rb_equation
2001 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2002 {
2003 pipeEquation[i].getsmallest(co);
2004
2005 UINT_32 old_size = pMetaEq->getsize();
2006 pMetaEq->Filter('=', co);
2007 UINT_32 new_size = pMetaEq->getsize();
2008 if (new_size != old_size-1)
2009 {
2010 ADDR_ASSERT_ALWAYS();
2011 }
2012 pipeEquation.remove(co);
2013 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2014 {
2015 if (rbEquation[j].remove(co))
2016 {
2017 // if we actually removed something from this bit, then add the remaining
2018 // channel bits, as these can be removed for this bit
2019 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2020 {
2021 if (pipeEquation[i][k] != co)
2022 {
2023 rbEquation[j].add(pipeEquation[i][k]);
2024 rbAppendedWithPipeBits[j] = true;
2025 }
2026 }
2027 }
2028 }
2029 }
2030
2031 // Loop through the rb bits and see what remain;
2032 // filter out the smallest coordinate if it remains
2033 UINT_32 rbBitsLeft = 0;
2034 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2035 {
2036 BOOL_32 isRbEqAppended = FALSE;
2037
2038 if (m_settings.applyAliasFix)
2039 {
2040 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2041 }
2042 else
2043 {
2044 isRbEqAppended = (rbEquation[i].getsize() > 0);
2045 }
2046
2047 if (isRbEqAppended)
2048 {
2049 rbBitsLeft++;
2050 rbEquation[i].getsmallest(co);
2051 UINT_32 old_size = pMetaEq->getsize();
2052 pMetaEq->Filter('=', co);
2053 UINT_32 new_size = pMetaEq->getsize();
2054 if (new_size != old_size - 1)
2055 {
2056 // assert warning
2057 }
2058 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2059 {
2060 if (rbEquation[j].remove(co))
2061 {
2062 // if we actually removed something from this bit, then add the remaining
2063 // rb bits, as these can be removed for this bit
2064 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2065 {
2066 if (rbEquation[i][k] != co)
2067 {
2068 rbEquation[j].add(rbEquation[i][k]);
2069 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2070 }
2071 }
2072 }
2073 }
2074 }
2075 }
2076
2077 // capture the size of the metaaddr
2078 UINT_32 metaSize = pMetaEq->getsize();
2079 // resize to 49 bits...make this a nibble address
2080 pMetaEq->resize(49);
2081 // Concatenate the macro address above the current address
2082 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2083 {
2084 co.set('m', j);
2085 (*pMetaEq)[i].add(co);
2086 }
2087
2088 // Multiply by meta element size (in nibbles)
2089 if (dataSurfaceType == Gfx9DataColor)
2090 {
2091 pMetaEq->shift(1);
2092 }
2093 else if (dataSurfaceType == Gfx9DataDepthStencil)
2094 {
2095 pMetaEq->shift(3);
2096 }
2097
2098 //------------------------------------------------------------------------------------------
2099 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2100 // Shift up from pipe interleave number of channel
2101 // and rb bits left, and uncompressed fragments
2102 //------------------------------------------------------------------------------------------
2103
2104 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2105
2106 // Put in the channel bits
2107 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2108 {
2109 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2110 }
2111
2112 // Put in remaining rb bits
2113 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2114 {
2115 BOOL_32 isRbEqAppended = FALSE;
2116
2117 if (m_settings.applyAliasFix)
2118 {
2119 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2120 }
2121 else
2122 {
2123 isRbEqAppended = (rbEquation[i].getsize() > 0);
2124 }
2125
2126 if (isRbEqAppended)
2127 {
2128 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2129 // Mark any rb bit we add in to the rb mask
2130 j++;
2131 }
2132 }
2133
2134 //------------------------------------------------------------------------------------------
2135 // Put in the uncompressed fragment bits
2136 //------------------------------------------------------------------------------------------
2137 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2138 {
2139 co.set('s', compFragLog2 + i);
2140 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2141 }
2142 }
2143 }
2144
2145 /**
2146 ************************************************************************************************************************
2147 * Gfx9Lib::IsEquationSupported
2148 *
2149 * @brief
2150 * Check if equation is supported for given swizzle mode and resource type.
2151 *
2152 * @return
2153 * TRUE if supported
2154 ************************************************************************************************************************
2155 */
2156 BOOL_32 Gfx9Lib::IsEquationSupported(
2157 AddrResourceType rsrcType,
2158 AddrSwizzleMode swMode,
2159 UINT_32 elementBytesLog2) const
2160 {
2161 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2162 (IsLinear(swMode) == FALSE) &&
2163 (((IsTex2d(rsrcType) == TRUE) &&
2164 ((elementBytesLog2 < 4) ||
2165 ((IsRotateSwizzle(swMode) == FALSE) &&
2166 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2167 ((IsTex3d(rsrcType) == TRUE) &&
2168 (IsRotateSwizzle(swMode) == FALSE) &&
2169 (IsBlock256b(swMode) == FALSE)));
2170
2171 return supported;
2172 }
2173
2174 /**
2175 ************************************************************************************************************************
2176 * Gfx9Lib::InitEquationTable
2177 *
2178 * @brief
2179 * Initialize Equation table.
2180 *
2181 * @return
2182 * N/A
2183 ************************************************************************************************************************
2184 */
2185 VOID Gfx9Lib::InitEquationTable()
2186 {
2187 memset(m_equationTable, 0, sizeof(m_equationTable));
2188
2189 // Loop all possible resource type (2D/3D)
2190 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2191 {
2192 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2193
2194 // Loop all possible swizzle mode
2195 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
2196 {
2197 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2198
2199 // Loop all possible bpp
2200 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2201 {
2202 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2203
2204 // Check if the input is supported
2205 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2206 {
2207 ADDR_EQUATION equation;
2208 ADDR_E_RETURNCODE retCode;
2209
2210 memset(&equation, 0, sizeof(ADDR_EQUATION));
2211
2212 // Generate the equation
2213 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2214 {
2215 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2216 }
2217 else if (IsThin(rsrcType, swMode))
2218 {
2219 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2220 }
2221 else
2222 {
2223 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2224 }
2225
2226 // Only fill the equation into the table if the return code is ADDR_OK,
2227 // otherwise if the return code is not ADDR_OK, it indicates this is not
2228 // a valid input, we do nothing but just fill invalid equation index
2229 // into the lookup table.
2230 if (retCode == ADDR_OK)
2231 {
2232 equationIndex = m_numEquations;
2233 ADDR_ASSERT(equationIndex < EquationTableSize);
2234
2235 m_equationTable[equationIndex] = equation;
2236
2237 m_numEquations++;
2238 }
2239 else
2240 {
2241 ADDR_ASSERT_ALWAYS();
2242 }
2243 }
2244
2245 // Fill the index into the lookup table, if the combination is not supported
2246 // fill the invalid equation index
2247 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2248 }
2249 }
2250 }
2251 }
2252
2253 /**
2254 ************************************************************************************************************************
2255 * Gfx9Lib::HwlGetEquationIndex
2256 *
2257 * @brief
2258 * Interface function stub of GetEquationIndex
2259 *
2260 * @return
2261 * ADDR_E_RETURNCODE
2262 ************************************************************************************************************************
2263 */
2264 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2265 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2266 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2267 ) const
2268 {
2269 AddrResourceType rsrcType = pIn->resourceType;
2270 AddrSwizzleMode swMode = pIn->swizzleMode;
2271 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2272 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2273
2274 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2275 {
2276 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2277 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2278
2279 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2280 }
2281
2282 if (pOut->pMipInfo != NULL)
2283 {
2284 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2285 {
2286 pOut->pMipInfo[i].equationIndex = index;
2287 }
2288 }
2289
2290 return index;
2291 }
2292
2293 /**
2294 ************************************************************************************************************************
2295 * Gfx9Lib::HwlComputeBlock256Equation
2296 *
2297 * @brief
2298 * Interface function stub of ComputeBlock256Equation
2299 *
2300 * @return
2301 * ADDR_E_RETURNCODE
2302 ************************************************************************************************************************
2303 */
2304 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2305 AddrResourceType rsrcType,
2306 AddrSwizzleMode swMode,
2307 UINT_32 elementBytesLog2,
2308 ADDR_EQUATION* pEquation) const
2309 {
2310 ADDR_E_RETURNCODE ret = ADDR_OK;
2311
2312 pEquation->numBits = 8;
2313
2314 UINT_32 i = 0;
2315 for (; i < elementBytesLog2; i++)
2316 {
2317 InitChannel(1, 0 , i, &pEquation->addr[i]);
2318 }
2319
2320 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2321
2322 const UINT_32 maxBitsUsed = 4;
2323 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2324 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2325
2326 for (i = 0; i < maxBitsUsed; i++)
2327 {
2328 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2329 InitChannel(1, 1, i, &y[i]);
2330 }
2331
2332 if (IsStandardSwizzle(rsrcType, swMode))
2333 {
2334 switch (elementBytesLog2)
2335 {
2336 case 0:
2337 pixelBit[0] = x[0];
2338 pixelBit[1] = x[1];
2339 pixelBit[2] = x[2];
2340 pixelBit[3] = x[3];
2341 pixelBit[4] = y[0];
2342 pixelBit[5] = y[1];
2343 pixelBit[6] = y[2];
2344 pixelBit[7] = y[3];
2345 break;
2346 case 1:
2347 pixelBit[0] = x[0];
2348 pixelBit[1] = x[1];
2349 pixelBit[2] = x[2];
2350 pixelBit[3] = y[0];
2351 pixelBit[4] = y[1];
2352 pixelBit[5] = y[2];
2353 pixelBit[6] = x[3];
2354 break;
2355 case 2:
2356 pixelBit[0] = x[0];
2357 pixelBit[1] = x[1];
2358 pixelBit[2] = y[0];
2359 pixelBit[3] = y[1];
2360 pixelBit[4] = y[2];
2361 pixelBit[5] = x[2];
2362 break;
2363 case 3:
2364 pixelBit[0] = x[0];
2365 pixelBit[1] = y[0];
2366 pixelBit[2] = y[1];
2367 pixelBit[3] = x[1];
2368 pixelBit[4] = x[2];
2369 break;
2370 case 4:
2371 pixelBit[0] = y[0];
2372 pixelBit[1] = y[1];
2373 pixelBit[2] = x[0];
2374 pixelBit[3] = x[1];
2375 break;
2376 default:
2377 ADDR_ASSERT_ALWAYS();
2378 ret = ADDR_INVALIDPARAMS;
2379 break;
2380 }
2381 }
2382 else if (IsDisplaySwizzle(rsrcType, swMode))
2383 {
2384 switch (elementBytesLog2)
2385 {
2386 case 0:
2387 pixelBit[0] = x[0];
2388 pixelBit[1] = x[1];
2389 pixelBit[2] = x[2];
2390 pixelBit[3] = y[1];
2391 pixelBit[4] = y[0];
2392 pixelBit[5] = y[2];
2393 pixelBit[6] = x[3];
2394 pixelBit[7] = y[3];
2395 break;
2396 case 1:
2397 pixelBit[0] = x[0];
2398 pixelBit[1] = x[1];
2399 pixelBit[2] = x[2];
2400 pixelBit[3] = y[0];
2401 pixelBit[4] = y[1];
2402 pixelBit[5] = y[2];
2403 pixelBit[6] = x[3];
2404 break;
2405 case 2:
2406 pixelBit[0] = x[0];
2407 pixelBit[1] = x[1];
2408 pixelBit[2] = y[0];
2409 pixelBit[3] = x[2];
2410 pixelBit[4] = y[1];
2411 pixelBit[5] = y[2];
2412 break;
2413 case 3:
2414 pixelBit[0] = x[0];
2415 pixelBit[1] = y[0];
2416 pixelBit[2] = x[1];
2417 pixelBit[3] = x[2];
2418 pixelBit[4] = y[1];
2419 break;
2420 case 4:
2421 pixelBit[0] = x[0];
2422 pixelBit[1] = y[0];
2423 pixelBit[2] = x[1];
2424 pixelBit[3] = y[1];
2425 break;
2426 default:
2427 ADDR_ASSERT_ALWAYS();
2428 ret = ADDR_INVALIDPARAMS;
2429 break;
2430 }
2431 }
2432 else if (IsRotateSwizzle(swMode))
2433 {
2434 switch (elementBytesLog2)
2435 {
2436 case 0:
2437 pixelBit[0] = y[0];
2438 pixelBit[1] = y[1];
2439 pixelBit[2] = y[2];
2440 pixelBit[3] = x[1];
2441 pixelBit[4] = x[0];
2442 pixelBit[5] = x[2];
2443 pixelBit[6] = x[3];
2444 pixelBit[7] = y[3];
2445 break;
2446 case 1:
2447 pixelBit[0] = y[0];
2448 pixelBit[1] = y[1];
2449 pixelBit[2] = y[2];
2450 pixelBit[3] = x[0];
2451 pixelBit[4] = x[1];
2452 pixelBit[5] = x[2];
2453 pixelBit[6] = x[3];
2454 break;
2455 case 2:
2456 pixelBit[0] = y[0];
2457 pixelBit[1] = y[1];
2458 pixelBit[2] = x[0];
2459 pixelBit[3] = y[2];
2460 pixelBit[4] = x[1];
2461 pixelBit[5] = x[2];
2462 break;
2463 case 3:
2464 pixelBit[0] = y[0];
2465 pixelBit[1] = x[0];
2466 pixelBit[2] = y[1];
2467 pixelBit[3] = x[1];
2468 pixelBit[4] = x[2];
2469 break;
2470 default:
2471 ADDR_ASSERT_ALWAYS();
2472 case 4:
2473 ret = ADDR_INVALIDPARAMS;
2474 break;
2475 }
2476 }
2477 else
2478 {
2479 ADDR_ASSERT_ALWAYS();
2480 ret = ADDR_INVALIDPARAMS;
2481 }
2482
2483 // Post validation
2484 if (ret == ADDR_OK)
2485 {
2486 MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2487 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2488 (microBlockDim.w * (1 << elementBytesLog2)));
2489 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2490 }
2491
2492 return ret;
2493 }
2494
2495 /**
2496 ************************************************************************************************************************
2497 * Gfx9Lib::HwlComputeThinEquation
2498 *
2499 * @brief
2500 * Interface function stub of ComputeThinEquation
2501 *
2502 * @return
2503 * ADDR_E_RETURNCODE
2504 ************************************************************************************************************************
2505 */
2506 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2507 AddrResourceType rsrcType,
2508 AddrSwizzleMode swMode,
2509 UINT_32 elementBytesLog2,
2510 ADDR_EQUATION* pEquation) const
2511 {
2512 ADDR_E_RETURNCODE ret = ADDR_OK;
2513
2514 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2515
2516 UINT_32 maxXorBits = blockSizeLog2;
2517 if (IsNonPrtXor(swMode))
2518 {
2519 // For non-prt-xor, maybe need to initialize some more bits for xor
2520 // The highest xor bit used in equation will be max the following 3 items:
2521 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2522 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2523 // 3. blockSizeLog2
2524
2525 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2526 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2527 GetPipeXorBits(blockSizeLog2) +
2528 2 * GetBankXorBits(blockSizeLog2));
2529 }
2530
2531 const UINT_32 maxBitsUsed = 14;
2532 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2533 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2534 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2535
2536 const UINT_32 extraXorBits = 16;
2537 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2538 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2539
2540 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2541 {
2542 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2543 InitChannel(1, 1, i, &y[i]);
2544 }
2545
2546 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2547
2548 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2549 {
2550 InitChannel(1, 0 , i, &pixelBit[i]);
2551 }
2552
2553 UINT_32 xIdx = 0;
2554 UINT_32 yIdx = 0;
2555 UINT_32 lowBits = 0;
2556
2557 if (IsZOrderSwizzle(swMode))
2558 {
2559 if (elementBytesLog2 <= 3)
2560 {
2561 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2562 {
2563 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2564 }
2565
2566 lowBits = 6;
2567 }
2568 else
2569 {
2570 ret = ADDR_INVALIDPARAMS;
2571 }
2572 }
2573 else
2574 {
2575 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2576
2577 if (ret == ADDR_OK)
2578 {
2579 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2580 xIdx = Log2(microBlockDim.w);
2581 yIdx = Log2(microBlockDim.h);
2582 lowBits = 8;
2583 }
2584 }
2585
2586 if (ret == ADDR_OK)
2587 {
2588 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2589 {
2590 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2591 }
2592
2593 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2594 {
2595 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2596 }
2597
2598 if (IsXor(swMode))
2599 {
2600 // Fill XOR bits
2601 UINT_32 pipeStart = m_pipeInterleaveLog2;
2602 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2603
2604 UINT_32 bankStart = pipeStart + pipeXorBits;
2605 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2606
2607 for (UINT_32 i = 0; i < pipeXorBits; i++)
2608 {
2609 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2610 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2611 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2612
2613 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2614 }
2615
2616 for (UINT_32 i = 0; i < bankXorBits; i++)
2617 {
2618 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2619 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2620 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2621
2622 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2623 }
2624
2625 if (IsPrt(swMode) == FALSE)
2626 {
2627 for (UINT_32 i = 0; i < pipeXorBits; i++)
2628 {
2629 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2630 }
2631
2632 for (UINT_32 i = 0; i < bankXorBits; i++)
2633 {
2634 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2635 }
2636 }
2637 }
2638
2639 pEquation->numBits = blockSizeLog2;
2640 }
2641
2642 return ret;
2643 }
2644
2645 /**
2646 ************************************************************************************************************************
2647 * Gfx9Lib::HwlComputeThickEquation
2648 *
2649 * @brief
2650 * Interface function stub of ComputeThickEquation
2651 *
2652 * @return
2653 * ADDR_E_RETURNCODE
2654 ************************************************************************************************************************
2655 */
2656 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2657 AddrResourceType rsrcType,
2658 AddrSwizzleMode swMode,
2659 UINT_32 elementBytesLog2,
2660 ADDR_EQUATION* pEquation) const
2661 {
2662 ADDR_E_RETURNCODE ret = ADDR_OK;
2663
2664 ADDR_ASSERT(IsTex3d(rsrcType));
2665
2666 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2667
2668 UINT_32 maxXorBits = blockSizeLog2;
2669 if (IsNonPrtXor(swMode))
2670 {
2671 // For non-prt-xor, maybe need to initialize some more bits for xor
2672 // The highest xor bit used in equation will be max the following 3:
2673 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2674 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2675 // 3. blockSizeLog2
2676
2677 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2678 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2679 GetPipeXorBits(blockSizeLog2) +
2680 3 * GetBankXorBits(blockSizeLog2));
2681 }
2682
2683 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2684 {
2685 InitChannel(1, 0 , i, &pEquation->addr[i]);
2686 }
2687
2688 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2689
2690 const UINT_32 maxBitsUsed = 12;
2691 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2692 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2693 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2694 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2695
2696 const UINT_32 extraXorBits = 24;
2697 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2698 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2699
2700 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2701 {
2702 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2703 InitChannel(1, 1, i, &y[i]);
2704 InitChannel(1, 2, i, &z[i]);
2705 }
2706
2707 if (IsZOrderSwizzle(swMode))
2708 {
2709 switch (elementBytesLog2)
2710 {
2711 case 0:
2712 pixelBit[0] = x[0];
2713 pixelBit[1] = y[0];
2714 pixelBit[2] = x[1];
2715 pixelBit[3] = y[1];
2716 pixelBit[4] = z[0];
2717 pixelBit[5] = z[1];
2718 pixelBit[6] = x[2];
2719 pixelBit[7] = z[2];
2720 pixelBit[8] = y[2];
2721 pixelBit[9] = x[3];
2722 break;
2723 case 1:
2724 pixelBit[0] = x[0];
2725 pixelBit[1] = y[0];
2726 pixelBit[2] = x[1];
2727 pixelBit[3] = y[1];
2728 pixelBit[4] = z[0];
2729 pixelBit[5] = z[1];
2730 pixelBit[6] = z[2];
2731 pixelBit[7] = y[2];
2732 pixelBit[8] = x[2];
2733 break;
2734 case 2:
2735 pixelBit[0] = x[0];
2736 pixelBit[1] = y[0];
2737 pixelBit[2] = x[1];
2738 pixelBit[3] = z[0];
2739 pixelBit[4] = y[1];
2740 pixelBit[5] = z[1];
2741 pixelBit[6] = y[2];
2742 pixelBit[7] = x[2];
2743 break;
2744 case 3:
2745 pixelBit[0] = x[0];
2746 pixelBit[1] = y[0];
2747 pixelBit[2] = z[0];
2748 pixelBit[3] = x[1];
2749 pixelBit[4] = z[1];
2750 pixelBit[5] = y[1];
2751 pixelBit[6] = x[2];
2752 break;
2753 case 4:
2754 pixelBit[0] = x[0];
2755 pixelBit[1] = y[0];
2756 pixelBit[2] = z[0];
2757 pixelBit[3] = z[1];
2758 pixelBit[4] = y[1];
2759 pixelBit[5] = x[1];
2760 break;
2761 default:
2762 ADDR_ASSERT_ALWAYS();
2763 ret = ADDR_INVALIDPARAMS;
2764 break;
2765 }
2766 }
2767 else if (IsStandardSwizzle(rsrcType, swMode))
2768 {
2769 switch (elementBytesLog2)
2770 {
2771 case 0:
2772 pixelBit[0] = x[0];
2773 pixelBit[1] = x[1];
2774 pixelBit[2] = x[2];
2775 pixelBit[3] = x[3];
2776 pixelBit[4] = y[0];
2777 pixelBit[5] = y[1];
2778 pixelBit[6] = z[0];
2779 pixelBit[7] = z[1];
2780 pixelBit[8] = z[2];
2781 pixelBit[9] = y[2];
2782 break;
2783 case 1:
2784 pixelBit[0] = x[0];
2785 pixelBit[1] = x[1];
2786 pixelBit[2] = x[2];
2787 pixelBit[3] = y[0];
2788 pixelBit[4] = y[1];
2789 pixelBit[5] = z[0];
2790 pixelBit[6] = z[1];
2791 pixelBit[7] = z[2];
2792 pixelBit[8] = y[2];
2793 break;
2794 case 2:
2795 pixelBit[0] = x[0];
2796 pixelBit[1] = x[1];
2797 pixelBit[2] = y[0];
2798 pixelBit[3] = y[1];
2799 pixelBit[4] = z[0];
2800 pixelBit[5] = z[1];
2801 pixelBit[6] = y[2];
2802 pixelBit[7] = x[2];
2803 break;
2804 case 3:
2805 pixelBit[0] = x[0];
2806 pixelBit[1] = y[0];
2807 pixelBit[2] = y[1];
2808 pixelBit[3] = z[0];
2809 pixelBit[4] = z[1];
2810 pixelBit[5] = x[1];
2811 pixelBit[6] = x[2];
2812 break;
2813 case 4:
2814 pixelBit[0] = y[0];
2815 pixelBit[1] = y[1];
2816 pixelBit[2] = z[0];
2817 pixelBit[3] = z[1];
2818 pixelBit[4] = x[0];
2819 pixelBit[5] = x[1];
2820 break;
2821 default:
2822 ADDR_ASSERT_ALWAYS();
2823 ret = ADDR_INVALIDPARAMS;
2824 break;
2825 }
2826 }
2827 else
2828 {
2829 ADDR_ASSERT_ALWAYS();
2830 ret = ADDR_INVALIDPARAMS;
2831 }
2832
2833 if (ret == ADDR_OK)
2834 {
2835 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2836 UINT_32 xIdx = Log2(microBlockDim.w);
2837 UINT_32 yIdx = Log2(microBlockDim.h);
2838 UINT_32 zIdx = Log2(microBlockDim.d);
2839
2840 pixelBit = pEquation->addr;
2841
2842 const UINT_32 lowBits = 10;
2843 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2844 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2845
2846 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2847 {
2848 if ((i % 3) == 0)
2849 {
2850 pixelBit[i] = x[xIdx++];
2851 }
2852 else if ((i % 3) == 1)
2853 {
2854 pixelBit[i] = z[zIdx++];
2855 }
2856 else
2857 {
2858 pixelBit[i] = y[yIdx++];
2859 }
2860 }
2861
2862 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2863 {
2864 if ((i % 3) == 0)
2865 {
2866 xorExtra[i - blockSizeLog2] = x[xIdx++];
2867 }
2868 else if ((i % 3) == 1)
2869 {
2870 xorExtra[i - blockSizeLog2] = z[zIdx++];
2871 }
2872 else
2873 {
2874 xorExtra[i - blockSizeLog2] = y[yIdx++];
2875 }
2876 }
2877
2878 if (IsXor(swMode))
2879 {
2880 // Fill XOR bits
2881 UINT_32 pipeStart = m_pipeInterleaveLog2;
2882 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2883 for (UINT_32 i = 0; i < pipeXorBits; i++)
2884 {
2885 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2886 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2887 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2888
2889 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2890
2891 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2892 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2893 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2894
2895 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2896 }
2897
2898 UINT_32 bankStart = pipeStart + pipeXorBits;
2899 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2900 for (UINT_32 i = 0; i < bankXorBits; i++)
2901 {
2902 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2903 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2904 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2905
2906 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2907
2908 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2909 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2910 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2911
2912 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2913 }
2914 }
2915
2916 pEquation->numBits = blockSizeLog2;
2917 }
2918
2919 return ret;
2920 }
2921
2922 /**
2923 ************************************************************************************************************************
2924 * Gfx9Lib::IsValidDisplaySwizzleMode
2925 *
2926 * @brief
2927 * Check if a swizzle mode is supported by display engine
2928 *
2929 * @return
2930 * TRUE is swizzle mode is supported by display engine
2931 ************************************************************************************************************************
2932 */
2933 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2934 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2935 {
2936 BOOL_32 support = FALSE;
2937
2938 if (m_settings.isDce12)
2939 {
2940 switch (pIn->swizzleMode)
2941 {
2942 case ADDR_SW_256B_D:
2943 case ADDR_SW_256B_R:
2944 support = (pIn->bpp == 32);
2945 break;
2946
2947 case ADDR_SW_LINEAR:
2948 case ADDR_SW_4KB_D:
2949 case ADDR_SW_4KB_R:
2950 case ADDR_SW_64KB_D:
2951 case ADDR_SW_64KB_R:
2952 case ADDR_SW_VAR_D:
2953 case ADDR_SW_VAR_R:
2954 case ADDR_SW_4KB_D_X:
2955 case ADDR_SW_4KB_R_X:
2956 case ADDR_SW_64KB_D_X:
2957 case ADDR_SW_64KB_R_X:
2958 case ADDR_SW_VAR_D_X:
2959 case ADDR_SW_VAR_R_X:
2960 support = (pIn->bpp <= 64);
2961 break;
2962
2963 default:
2964 break;
2965 }
2966 }
2967 else if (m_settings.isDcn1)
2968 {
2969 switch (pIn->swizzleMode)
2970 {
2971 case ADDR_SW_4KB_D:
2972 case ADDR_SW_64KB_D:
2973 case ADDR_SW_VAR_D:
2974 case ADDR_SW_64KB_D_T:
2975 case ADDR_SW_4KB_D_X:
2976 case ADDR_SW_64KB_D_X:
2977 case ADDR_SW_VAR_D_X:
2978 support = (pIn->bpp == 64);
2979 break;
2980
2981 case ADDR_SW_LINEAR:
2982 case ADDR_SW_4KB_S:
2983 case ADDR_SW_64KB_S:
2984 case ADDR_SW_VAR_S:
2985 case ADDR_SW_64KB_S_T:
2986 case ADDR_SW_4KB_S_X:
2987 case ADDR_SW_64KB_S_X:
2988 case ADDR_SW_VAR_S_X:
2989 support = (pIn->bpp <= 64);
2990 break;
2991
2992 default:
2993 break;
2994 }
2995 }
2996 else
2997 {
2998 ADDR_NOT_IMPLEMENTED();
2999 }
3000
3001 return support;
3002 }
3003
3004 /**
3005 ************************************************************************************************************************
3006 * Gfx9Lib::HwlComputePipeBankXor
3007 *
3008 * @brief
3009 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3010 *
3011 * @return
3012 * PipeBankXor value
3013 ************************************************************************************************************************
3014 */
3015 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3016 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3017 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
3018 {
3019 if (IsXor(pIn->swizzleMode))
3020 {
3021 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3022 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3023 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3024
3025 UINT_32 pipeXor = 0;
3026 UINT_32 bankXor = 0;
3027
3028 const UINT_32 bankMask = (1 << bankBits) - 1;
3029 const UINT_32 index = pIn->surfIndex & bankMask;
3030
3031 const UINT_32 bpp = pIn->flags.fmask ?
3032 GetFmaskBpp(pIn->numSamples, pIn->nu