9be775f35f9b2936d8c7ff15b8a4d5eeaa385653
[mesa.git] / src / amd / addrlib / src / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2007-2018 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 #include "util/macros.h"
41
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
44
45 namespace Addr
46 {
47
48 /**
49 ************************************************************************************************************************
50 * Gfx9HwlInit
51 *
52 * @brief
53 * Creates an Gfx9Lib object.
54 *
55 * @return
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
58 */
59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
60 {
61 return V2::Gfx9Lib::CreateObj(pClient);
62 }
63
64 namespace V2
65 {
66
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
70
71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
77
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
82
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
87
88 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
89 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
90 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
91 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
92
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
97
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
102
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
107
108 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
109 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
110 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
111 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
113 };
114
115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
116 8, 6, 5, 4, 3, 2, 1, 0};
117
118 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
119
120 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
121
122 /**
123 ************************************************************************************************************************
124 * Gfx9Lib::Gfx9Lib
125 *
126 * @brief
127 * Constructor
128 *
129 ************************************************************************************************************************
130 */
131 Gfx9Lib::Gfx9Lib(const Client* pClient)
132 :
133 Lib(pClient),
134 m_numEquations(0)
135 {
136 m_class = AI_ADDRLIB;
137 memset(&m_settings, 0, sizeof(m_settings));
138 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
139 m_metaEqOverrideIndex = 0;
140 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
141 }
142
143 /**
144 ************************************************************************************************************************
145 * Gfx9Lib::~Gfx9Lib
146 *
147 * @brief
148 * Destructor
149 ************************************************************************************************************************
150 */
151 Gfx9Lib::~Gfx9Lib()
152 {
153 }
154
155 /**
156 ************************************************************************************************************************
157 * Gfx9Lib::HwlComputeHtileInfo
158 *
159 * @brief
160 * Interface function stub of AddrComputeHtilenfo
161 *
162 * @return
163 * ADDR_E_RETURNCODE
164 ************************************************************************************************************************
165 */
166 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
167 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
168 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
169 ) const
170 {
171 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
172 pIn->swizzleMode);
173
174 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
175
176 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
177
178 if ((numPipeTotal == 1) && (numRbTotal == 1))
179 {
180 numCompressBlkPerMetaBlkLog2 = 10;
181 }
182 else
183 {
184 if (m_settings.applyAliasFix)
185 {
186 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
187 }
188 else
189 {
190 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
191 }
192 }
193
194 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
195
196 Dim3d metaBlkDim = {8, 8, 1};
197 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
198 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
199 UINT_32 heightAmp = totalAmpBits - widthAmp;
200 metaBlkDim.w <<= widthAmp;
201 metaBlkDim.h <<= heightAmp;
202
203 #if DEBUG
204 Dim3d metaBlkDimDbg = {8, 8, 1};
205 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
206 {
207 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
208 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
209 {
210 metaBlkDimDbg.h <<= 1;
211 }
212 else
213 {
214 metaBlkDimDbg.w <<= 1;
215 }
216 }
217 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
218 #endif
219
220 UINT_32 numMetaBlkX;
221 UINT_32 numMetaBlkY;
222 UINT_32 numMetaBlkZ;
223
224 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
225 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
226 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
227
228 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
229 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
230
231 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
232 {
233 align *= (numPipeTotal >> 1);
234 }
235
236 align = Max(align, metaBlkSize);
237
238 if (m_settings.metaBaseAlignFix)
239 {
240 align = Max(align, GetBlockSize(pIn->swizzleMode));
241 }
242
243 if (m_settings.htileAlignFix)
244 {
245 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
246 const INT_32 htileCachelineSizeLog2 = 11;
247 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
248
249 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
250
251 align <<= rbMaskPadding;
252 }
253
254 pOut->pitch = numMetaBlkX * metaBlkDim.w;
255 pOut->height = numMetaBlkY * metaBlkDim.h;
256 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
257
258 pOut->metaBlkWidth = metaBlkDim.w;
259 pOut->metaBlkHeight = metaBlkDim.h;
260 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
261
262 pOut->baseAlign = align;
263 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
264
265 return ADDR_OK;
266 }
267
268 /**
269 ************************************************************************************************************************
270 * Gfx9Lib::HwlComputeCmaskInfo
271 *
272 * @brief
273 * Interface function stub of AddrComputeCmaskInfo
274 *
275 * @return
276 * ADDR_E_RETURNCODE
277 ************************************************************************************************************************
278 */
279 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
280 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
281 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
282 ) const
283 {
284 // TODO: Clarify with AddrLib team
285 // ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
286
287 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
288 pIn->swizzleMode);
289
290 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
291
292 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
293
294 if ((numPipeTotal == 1) && (numRbTotal == 1))
295 {
296 numCompressBlkPerMetaBlkLog2 = 13;
297 }
298 else
299 {
300 if (m_settings.applyAliasFix)
301 {
302 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
303 }
304 else
305 {
306 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
307 }
308
309 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
310 }
311
312 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
313
314 Dim2d metaBlkDim = {8, 8};
315 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
316 UINT_32 heightAmp = totalAmpBits >> 1;
317 UINT_32 widthAmp = totalAmpBits - heightAmp;
318 metaBlkDim.w <<= widthAmp;
319 metaBlkDim.h <<= heightAmp;
320
321 #if DEBUG
322 Dim2d metaBlkDimDbg = {8, 8};
323 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
324 {
325 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
326 {
327 metaBlkDimDbg.h <<= 1;
328 }
329 else
330 {
331 metaBlkDimDbg.w <<= 1;
332 }
333 }
334 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
335 #endif
336
337 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
338 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
339 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
340
341 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
342
343 if (m_settings.metaBaseAlignFix)
344 {
345 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
346 }
347
348 pOut->pitch = numMetaBlkX * metaBlkDim.w;
349 pOut->height = numMetaBlkY * metaBlkDim.h;
350 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
351 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
352 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
353
354 pOut->metaBlkWidth = metaBlkDim.w;
355 pOut->metaBlkHeight = metaBlkDim.h;
356
357 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
358
359 return ADDR_OK;
360 }
361
362 /**
363 ************************************************************************************************************************
364 * Gfx9Lib::GetMetaMipInfo
365 *
366 * @brief
367 * Get meta mip info
368 *
369 * @return
370 * N/A
371 ************************************************************************************************************************
372 */
373 VOID Gfx9Lib::GetMetaMipInfo(
374 UINT_32 numMipLevels, ///< [in] number of mip levels
375 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
376 BOOL_32 dataThick, ///< [in] data surface is thick
377 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
378 UINT_32 mip0Width, ///< [in] mip0 width
379 UINT_32 mip0Height, ///< [in] mip0 height
380 UINT_32 mip0Depth, ///< [in] mip0 depth
381 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
382 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
383 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
384 const
385 {
386 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
387 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
388 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
389 UINT_32 tailWidth = pMetaBlkDim->w;
390 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
391 UINT_32 tailDepth = pMetaBlkDim->d;
392 BOOL_32 inTail = FALSE;
393 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
394
395 if (numMipLevels > 1)
396 {
397 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
398 {
399 // Z major
400 major = ADDR_MAJOR_Z;
401 }
402 else if (numMetaBlkX >= numMetaBlkY)
403 {
404 // X major
405 major = ADDR_MAJOR_X;
406 }
407 else
408 {
409 // Y major
410 major = ADDR_MAJOR_Y;
411 }
412
413 inTail = ((mip0Width <= tailWidth) &&
414 (mip0Height <= tailHeight) &&
415 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
416
417 if (inTail == FALSE)
418 {
419 UINT_32 orderLimit;
420 UINT_32 *pMipDim;
421 UINT_32 *pOrderDim;
422
423 if (major == ADDR_MAJOR_Z)
424 {
425 // Z major
426 pMipDim = &numMetaBlkY;
427 pOrderDim = &numMetaBlkZ;
428 orderLimit = 4;
429 }
430 else if (major == ADDR_MAJOR_X)
431 {
432 // X major
433 pMipDim = &numMetaBlkY;
434 pOrderDim = &numMetaBlkX;
435 orderLimit = 4;
436 }
437 else
438 {
439 // Y major
440 pMipDim = &numMetaBlkX;
441 pOrderDim = &numMetaBlkY;
442 orderLimit = 2;
443 }
444
445 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
446 {
447 *pMipDim += 2;
448 }
449 else
450 {
451 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
452 }
453 }
454 }
455
456 if (pInfo != NULL)
457 {
458 UINT_32 mipWidth = mip0Width;
459 UINT_32 mipHeight = mip0Height;
460 UINT_32 mipDepth = mip0Depth;
461 Dim3d mipCoord = {0};
462
463 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
464 {
465 if (inTail)
466 {
467 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
468 pMetaBlkDim);
469 break;
470 }
471 else
472 {
473 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
474 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
475 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
476
477 pInfo[mip].inMiptail = FALSE;
478 pInfo[mip].startX = mipCoord.w;
479 pInfo[mip].startY = mipCoord.h;
480 pInfo[mip].startZ = mipCoord.d;
481 pInfo[mip].width = mipWidth;
482 pInfo[mip].height = mipHeight;
483 pInfo[mip].depth = dataThick ? mipDepth : 1;
484
485 if ((mip >= 3) || (mip & 1))
486 {
487 switch (major)
488 {
489 case ADDR_MAJOR_X:
490 mipCoord.w += mipWidth;
491 break;
492 case ADDR_MAJOR_Y:
493 mipCoord.h += mipHeight;
494 break;
495 case ADDR_MAJOR_Z:
496 mipCoord.d += mipDepth;
497 break;
498 default:
499 break;
500 }
501 }
502 else
503 {
504 switch (major)
505 {
506 case ADDR_MAJOR_X:
507 mipCoord.h += mipHeight;
508 break;
509 case ADDR_MAJOR_Y:
510 mipCoord.w += mipWidth;
511 break;
512 case ADDR_MAJOR_Z:
513 mipCoord.h += mipHeight;
514 break;
515 default:
516 break;
517 }
518 }
519
520 mipWidth = Max(mipWidth >> 1, 1u);
521 mipHeight = Max(mipHeight >> 1, 1u);
522 mipDepth = Max(mipDepth >> 1, 1u);
523
524 inTail = ((mipWidth <= tailWidth) &&
525 (mipHeight <= tailHeight) &&
526 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
527 }
528 }
529 }
530
531 *pNumMetaBlkX = numMetaBlkX;
532 *pNumMetaBlkY = numMetaBlkY;
533 *pNumMetaBlkZ = numMetaBlkZ;
534 }
535
536 /**
537 ************************************************************************************************************************
538 * Gfx9Lib::HwlComputeDccInfo
539 *
540 * @brief
541 * Interface function to compute DCC key info
542 *
543 * @return
544 * ADDR_E_RETURNCODE
545 ************************************************************************************************************************
546 */
547 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
548 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
549 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
550 ) const
551 {
552 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
553 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
554 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
555
556 if (dataLinear)
557 {
558 metaLinear = TRUE;
559 }
560 else if (metaLinear == TRUE)
561 {
562 pipeAligned = FALSE;
563 }
564
565 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
566
567 if (metaLinear)
568 {
569 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
570 ADDR_ASSERT_ALWAYS();
571
572 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
573 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
574 }
575 else
576 {
577 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
578
579 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
580
581 UINT_32 numFrags = Max(pIn->numFrags, 1u);
582 UINT_32 numSlices = Max(pIn->numSlices, 1u);
583
584 minMetaBlkSize /= numFrags;
585
586 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
587
588 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
589
590 if ((numPipeTotal > 1) || (numRbTotal > 1))
591 {
592 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
593
594 numCompressBlkPerMetaBlk =
595 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
596
597 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
598 {
599 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
600 }
601 }
602
603 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
604 Dim3d metaBlkDim = compressBlkDim;
605
606 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
607 {
608 if ((metaBlkDim.h < metaBlkDim.w) ||
609 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
610 {
611 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
612 {
613 metaBlkDim.h <<= 1;
614 }
615 else
616 {
617 metaBlkDim.d <<= 1;
618 }
619 }
620 else
621 {
622 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
623 {
624 metaBlkDim.w <<= 1;
625 }
626 else
627 {
628 metaBlkDim.d <<= 1;
629 }
630 }
631 }
632
633 UINT_32 numMetaBlkX;
634 UINT_32 numMetaBlkY;
635 UINT_32 numMetaBlkZ;
636
637 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
638 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
639 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
640
641 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
642
643 if (numFrags > m_maxCompFrag)
644 {
645 sizeAlign *= (numFrags / m_maxCompFrag);
646 }
647
648 if (m_settings.metaBaseAlignFix)
649 {
650 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
651 }
652
653 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
654 numCompressBlkPerMetaBlk * numFrags;
655 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
656 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
657
658 pOut->pitch = numMetaBlkX * metaBlkDim.w;
659 pOut->height = numMetaBlkY * metaBlkDim.h;
660 pOut->depth = numMetaBlkZ * metaBlkDim.d;
661
662 pOut->compressBlkWidth = compressBlkDim.w;
663 pOut->compressBlkHeight = compressBlkDim.h;
664 pOut->compressBlkDepth = compressBlkDim.d;
665
666 pOut->metaBlkWidth = metaBlkDim.w;
667 pOut->metaBlkHeight = metaBlkDim.h;
668 pOut->metaBlkDepth = metaBlkDim.d;
669
670 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
671 pOut->fastClearSizePerSlice =
672 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
673 }
674
675 return ADDR_OK;
676 }
677
678 /**
679 ************************************************************************************************************************
680 * Gfx9Lib::HwlComputeMaxBaseAlignments
681 *
682 * @brief
683 * Gets maximum alignments
684 * @return
685 * maximum alignments
686 ************************************************************************************************************************
687 */
688 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
689 {
690 return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
691 }
692
693 /**
694 ************************************************************************************************************************
695 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
696 *
697 * @brief
698 * Gets maximum alignments for metadata
699 * @return
700 * maximum alignments for metadata
701 ************************************************************************************************************************
702 */
703 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
704 {
705 // Max base alignment for Htile
706 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
707 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
708
709 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
710 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
711 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
712 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
713
714 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
715
716 if (maxNumPipeTotal > 2)
717 {
718 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
719 }
720
721 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
722
723 if (m_settings.metaBaseAlignFix)
724 {
725 maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
726 }
727
728 if (m_settings.htileAlignFix)
729 {
730 maxBaseAlignHtile *= maxNumPipeTotal;
731 }
732
733 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
734
735 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
736 UINT_32 maxBaseAlignDcc3D = 65536;
737
738 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
739 {
740 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
741 }
742
743 // Max base alignment for Msaa Dcc
744 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
745
746 if (m_settings.metaBaseAlignFix)
747 {
748 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
749 }
750
751 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
752 }
753
754 /**
755 ************************************************************************************************************************
756 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
757 *
758 * @brief
759 * Interface function stub of AddrComputeCmaskAddrFromCoord
760 *
761 * @return
762 * ADDR_E_RETURNCODE
763 ************************************************************************************************************************
764 */
765 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
766 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
767 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
768 {
769 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
770 input.size = sizeof(input);
771 input.cMaskFlags = pIn->cMaskFlags;
772 input.colorFlags = pIn->colorFlags;
773 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
774 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
775 input.numSlices = Max(pIn->numSlices, 1u);
776 input.swizzleMode = pIn->swizzleMode;
777 input.resourceType = pIn->resourceType;
778
779 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
780 output.size = sizeof(output);
781
782 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
783
784 if (returnCode == ADDR_OK)
785 {
786 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
787 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
788 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
789 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
790
791 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
792 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
793 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
794
795 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
796
797 UINT_32 xb = pIn->x / output.metaBlkWidth;
798 UINT_32 yb = pIn->y / output.metaBlkHeight;
799 UINT_32 zb = pIn->slice;
800
801 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
802 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
803 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
804
805 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
806
807 pOut->addr = address >> 1;
808 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
809
810 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
811 pIn->swizzleMode);
812
813 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
814
815 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
816 }
817
818 return returnCode;
819 }
820
821 /**
822 ************************************************************************************************************************
823 * Gfx9Lib::HwlComputeHtileAddrFromCoord
824 *
825 * @brief
826 * Interface function stub of AddrComputeHtileAddrFromCoord
827 *
828 * @return
829 * ADDR_E_RETURNCODE
830 ************************************************************************************************************************
831 */
832 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
833 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
834 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
835 {
836 ADDR_E_RETURNCODE returnCode = ADDR_OK;
837
838 if (pIn->numMipLevels > 1)
839 {
840 returnCode = ADDR_NOTIMPLEMENTED;
841 }
842 else
843 {
844 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
845 input.size = sizeof(input);
846 input.hTileFlags = pIn->hTileFlags;
847 input.depthFlags = pIn->depthflags;
848 input.swizzleMode = pIn->swizzleMode;
849 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
850 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
851 input.numSlices = Max(pIn->numSlices, 1u);
852 input.numMipLevels = Max(pIn->numMipLevels, 1u);
853
854 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
855 output.size = sizeof(output);
856
857 returnCode = ComputeHtileInfo(&input, &output);
858
859 if (returnCode == ADDR_OK)
860 {
861 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
862 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
863 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
864 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
865
866 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
867 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
868 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
869
870 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
871
872 UINT_32 xb = pIn->x / output.metaBlkWidth;
873 UINT_32 yb = pIn->y / output.metaBlkHeight;
874 UINT_32 zb = pIn->slice;
875
876 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
877 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
878 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
879
880 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
881
882 pOut->addr = address >> 1;
883
884 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
885 pIn->swizzleMode);
886
887 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
888
889 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
890 }
891 }
892
893 return returnCode;
894 }
895
896 /**
897 ************************************************************************************************************************
898 * Gfx9Lib::HwlComputeHtileCoordFromAddr
899 *
900 * @brief
901 * Interface function stub of AddrComputeHtileCoordFromAddr
902 *
903 * @return
904 * ADDR_E_RETURNCODE
905 ************************************************************************************************************************
906 */
907 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
908 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
909 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
910 {
911 ADDR_E_RETURNCODE returnCode = ADDR_OK;
912
913 if (pIn->numMipLevels > 1)
914 {
915 returnCode = ADDR_NOTIMPLEMENTED;
916 }
917 else
918 {
919 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
920 input.size = sizeof(input);
921 input.hTileFlags = pIn->hTileFlags;
922 input.swizzleMode = pIn->swizzleMode;
923 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
924 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
925 input.numSlices = Max(pIn->numSlices, 1u);
926 input.numMipLevels = Max(pIn->numMipLevels, 1u);
927
928 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
929 output.size = sizeof(output);
930
931 returnCode = ComputeHtileInfo(&input, &output);
932
933 if (returnCode == ADDR_OK)
934 {
935 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
936 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
937 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
938 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
939
940 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
941 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
942 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
943
944 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
945
946 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
947 pIn->swizzleMode);
948
949 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
950
951 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
952
953 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
954 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
955
956 UINT_32 x, y, z, s, m;
957 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
958
959 pOut->slice = m / sliceSizeInBlock;
960 pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
961 pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x;
962 }
963 }
964
965 return returnCode;
966 }
967
968 /**
969 ************************************************************************************************************************
970 * Gfx9Lib::HwlComputeDccAddrFromCoord
971 *
972 * @brief
973 * Interface function stub of AddrComputeDccAddrFromCoord
974 *
975 * @return
976 * ADDR_E_RETURNCODE
977 ************************************************************************************************************************
978 */
979 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
980 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
981 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
982 {
983 ADDR_E_RETURNCODE returnCode = ADDR_OK;
984
985 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
986 {
987 returnCode = ADDR_NOTIMPLEMENTED;
988 }
989 else
990 {
991 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
992 input.size = sizeof(input);
993 input.dccKeyFlags = pIn->dccKeyFlags;
994 input.colorFlags = pIn->colorFlags;
995 input.swizzleMode = pIn->swizzleMode;
996 input.resourceType = pIn->resourceType;
997 input.bpp = pIn->bpp;
998 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
999 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
1000 input.numSlices = Max(pIn->numSlices, 1u);
1001 input.numFrags = Max(pIn->numFrags, 1u);
1002 input.numMipLevels = Max(pIn->numMipLevels, 1u);
1003
1004 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
1005 output.size = sizeof(output);
1006
1007 returnCode = ComputeDccInfo(&input, &output);
1008
1009 if (returnCode == ADDR_OK)
1010 {
1011 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1012 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
1013 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
1014 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1015 UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
1016 UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
1017 UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
1018 UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
1019
1020 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1021 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1022 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1023 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1024
1025 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1026
1027 UINT_32 xb = pIn->x / output.metaBlkWidth;
1028 UINT_32 yb = pIn->y / output.metaBlkHeight;
1029 UINT_32 zb = pIn->slice / output.metaBlkDepth;
1030
1031 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
1032 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1033 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1034
1035 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
1036
1037 pOut->addr = address >> 1;
1038
1039 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1040 pIn->swizzleMode);
1041
1042 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1043
1044 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1045 }
1046 }
1047
1048 return returnCode;
1049 }
1050
1051 /**
1052 ************************************************************************************************************************
1053 * Gfx9Lib::HwlInitGlobalParams
1054 *
1055 * @brief
1056 * Initializes global parameters
1057 *
1058 * @return
1059 * TRUE if all settings are valid
1060 *
1061 ************************************************************************************************************************
1062 */
1063 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1064 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1065 {
1066 BOOL_32 valid = TRUE;
1067
1068 if (m_settings.isArcticIsland)
1069 {
1070 GB_ADDR_CONFIG gbAddrConfig;
1071
1072 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1073
1074 // These values are copied from CModel code
1075 switch (gbAddrConfig.bits.NUM_PIPES)
1076 {
1077 case ADDR_CONFIG_1_PIPE:
1078 m_pipes = 1;
1079 m_pipesLog2 = 0;
1080 break;
1081 case ADDR_CONFIG_2_PIPE:
1082 m_pipes = 2;
1083 m_pipesLog2 = 1;
1084 break;
1085 case ADDR_CONFIG_4_PIPE:
1086 m_pipes = 4;
1087 m_pipesLog2 = 2;
1088 break;
1089 case ADDR_CONFIG_8_PIPE:
1090 m_pipes = 8;
1091 m_pipesLog2 = 3;
1092 break;
1093 case ADDR_CONFIG_16_PIPE:
1094 m_pipes = 16;
1095 m_pipesLog2 = 4;
1096 break;
1097 case ADDR_CONFIG_32_PIPE:
1098 m_pipes = 32;
1099 m_pipesLog2 = 5;
1100 break;
1101 default:
1102 ADDR_ASSERT_ALWAYS();
1103 break;
1104 }
1105
1106 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1107 {
1108 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1109 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1110 m_pipeInterleaveLog2 = 8;
1111 break;
1112 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1113 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1114 m_pipeInterleaveLog2 = 9;
1115 break;
1116 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1117 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1118 m_pipeInterleaveLog2 = 10;
1119 break;
1120 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1121 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1122 m_pipeInterleaveLog2 = 11;
1123 break;
1124 default:
1125 ADDR_ASSERT_ALWAYS();
1126 break;
1127 }
1128
1129 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1130 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1131 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1132
1133 switch (gbAddrConfig.bits.NUM_BANKS)
1134 {
1135 case ADDR_CONFIG_1_BANK:
1136 m_banks = 1;
1137 m_banksLog2 = 0;
1138 break;
1139 case ADDR_CONFIG_2_BANK:
1140 m_banks = 2;
1141 m_banksLog2 = 1;
1142 break;
1143 case ADDR_CONFIG_4_BANK:
1144 m_banks = 4;
1145 m_banksLog2 = 2;
1146 break;
1147 case ADDR_CONFIG_8_BANK:
1148 m_banks = 8;
1149 m_banksLog2 = 3;
1150 break;
1151 case ADDR_CONFIG_16_BANK:
1152 m_banks = 16;
1153 m_banksLog2 = 4;
1154 break;
1155 default:
1156 ADDR_ASSERT_ALWAYS();
1157 break;
1158 }
1159
1160 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1161 {
1162 case ADDR_CONFIG_1_SHADER_ENGINE:
1163 m_se = 1;
1164 m_seLog2 = 0;
1165 break;
1166 case ADDR_CONFIG_2_SHADER_ENGINE:
1167 m_se = 2;
1168 m_seLog2 = 1;
1169 break;
1170 case ADDR_CONFIG_4_SHADER_ENGINE:
1171 m_se = 4;
1172 m_seLog2 = 2;
1173 break;
1174 case ADDR_CONFIG_8_SHADER_ENGINE:
1175 m_se = 8;
1176 m_seLog2 = 3;
1177 break;
1178 default:
1179 ADDR_ASSERT_ALWAYS();
1180 break;
1181 }
1182
1183 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1184 {
1185 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1186 m_rbPerSe = 1;
1187 m_rbPerSeLog2 = 0;
1188 break;
1189 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1190 m_rbPerSe = 2;
1191 m_rbPerSeLog2 = 1;
1192 break;
1193 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1194 m_rbPerSe = 4;
1195 m_rbPerSeLog2 = 2;
1196 break;
1197 default:
1198 ADDR_ASSERT_ALWAYS();
1199 break;
1200 }
1201
1202 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1203 {
1204 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1205 m_maxCompFrag = 1;
1206 m_maxCompFragLog2 = 0;
1207 break;
1208 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1209 m_maxCompFrag = 2;
1210 m_maxCompFragLog2 = 1;
1211 break;
1212 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1213 m_maxCompFrag = 4;
1214 m_maxCompFragLog2 = 2;
1215 break;
1216 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1217 m_maxCompFrag = 8;
1218 m_maxCompFragLog2 = 3;
1219 break;
1220 default:
1221 ADDR_ASSERT_ALWAYS();
1222 break;
1223 }
1224
1225 m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1226 ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1227 ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1228 m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1229
1230 if ((m_rbPerSeLog2 == 1) &&
1231 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1232 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1233 {
1234 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1235 ADDR_ASSERT(m_settings.isRaven == FALSE);
1236 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1237
1238 if (m_settings.isVega12)
1239 {
1240 m_settings.htileCacheRbConflict = 1;
1241 }
1242 }
1243 }
1244 else
1245 {
1246 valid = FALSE;
1247 ADDR_NOT_IMPLEMENTED();
1248 }
1249
1250 if (valid)
1251 {
1252 InitEquationTable();
1253 }
1254
1255 return valid;
1256 }
1257
1258 /**
1259 ************************************************************************************************************************
1260 * Gfx9Lib::HwlConvertChipFamily
1261 *
1262 * @brief
1263 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1264 * @return
1265 * ChipFamily
1266 ************************************************************************************************************************
1267 */
1268 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1269 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1270 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1271 {
1272 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1273
1274 switch (uChipFamily)
1275 {
1276 case FAMILY_AI:
1277 m_settings.isArcticIsland = 1;
1278 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1279 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1280 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1281 m_settings.isDce12 = 1;
1282
1283 if (m_settings.isVega10 == 0)
1284 {
1285 m_settings.htileAlignFix = 1;
1286 m_settings.applyAliasFix = 1;
1287 }
1288
1289 m_settings.metaBaseAlignFix = 1;
1290
1291 m_settings.depthPipeXorDisable = 1;
1292 break;
1293 case FAMILY_RV:
1294 m_settings.isArcticIsland = 1;
1295
1296 if (ASICREV_IS_RAVEN(uChipRevision))
1297 {
1298 m_settings.isRaven = 1;
1299
1300 m_settings.depthPipeXorDisable = 1;
1301 }
1302
1303 if (ASICREV_IS_RAVEN2(uChipRevision))
1304 {
1305 m_settings.isRaven = 1;
1306 }
1307
1308 if (m_settings.isRaven == 0)
1309 {
1310 m_settings.htileAlignFix = 1;
1311 m_settings.applyAliasFix = 1;
1312 }
1313
1314 m_settings.isDcn1 = m_settings.isRaven;
1315
1316 m_settings.metaBaseAlignFix = 1;
1317 break;
1318
1319 default:
1320 ADDR_ASSERT(!"This should be a Fusion");
1321 break;
1322 }
1323
1324 return family;
1325 }
1326
1327 /**
1328 ************************************************************************************************************************
1329 * Gfx9Lib::InitRbEquation
1330 *
1331 * @brief
1332 * Init RB equation
1333 * @return
1334 * N/A
1335 ************************************************************************************************************************
1336 */
1337 VOID Gfx9Lib::GetRbEquation(
1338 CoordEq* pRbEq, ///< [out] rb equation
1339 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1340 UINT_32 numSeLog2) ///< [in] number of shader engine
1341 const
1342 {
1343 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1344 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1345 Coordinate cx('x', rbRegion);
1346 Coordinate cy('y', rbRegion);
1347
1348 UINT_32 start = 0;
1349 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1350
1351 // Clear the rb equation
1352 pRbEq->resize(0);
1353 pRbEq->resize(numRbTotalLog2);
1354
1355 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1356 {
1357 // Special case when more than 1 SE, and 2 RB per SE
1358 (*pRbEq)[0].add(cx);
1359 (*pRbEq)[0].add(cy);
1360 cx++;
1361 cy++;
1362
1363 if (m_settings.applyAliasFix == false)
1364 {
1365 (*pRbEq)[0].add(cy);
1366 }
1367
1368 (*pRbEq)[0].add(cy);
1369 start++;
1370 }
1371
1372 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1373
1374 for (UINT_32 i = 0; i < numBits; i++)
1375 {
1376 UINT_32 idx =
1377 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1378
1379 if ((i % 2) == 1)
1380 {
1381 (*pRbEq)[idx].add(cx);
1382 cx++;
1383 }
1384 else
1385 {
1386 (*pRbEq)[idx].add(cy);
1387 cy++;
1388 }
1389 }
1390 }
1391
1392 /**
1393 ************************************************************************************************************************
1394 * Gfx9Lib::GetDataEquation
1395 *
1396 * @brief
1397 * Get data equation for fmask and Z
1398 * @return
1399 * N/A
1400 ************************************************************************************************************************
1401 */
1402 VOID Gfx9Lib::GetDataEquation(
1403 CoordEq* pDataEq, ///< [out] data surface equation
1404 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1405 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1406 AddrResourceType resourceType, ///< [in] data surface resource type
1407 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1408 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1409 const
1410 {
1411 Coordinate cx('x', 0);
1412 Coordinate cy('y', 0);
1413 Coordinate cz('z', 0);
1414 Coordinate cs('s', 0);
1415
1416 // Clear the equation
1417 pDataEq->resize(0);
1418 pDataEq->resize(27);
1419
1420 if (dataSurfaceType == Gfx9DataColor)
1421 {
1422 if (IsLinear(swizzleMode))
1423 {
1424 Coordinate cm('m', 0);
1425
1426 pDataEq->resize(49);
1427
1428 for (UINT_32 i = 0; i < 49; i++)
1429 {
1430 (*pDataEq)[i].add(cm);
1431 cm++;
1432 }
1433 }
1434 else if (IsThick(resourceType, swizzleMode))
1435 {
1436 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1437 UINT_32 i;
1438 if (IsStandardSwizzle(resourceType, swizzleMode))
1439 {
1440 // Standard 3d swizzle
1441 // Fill in bottom x bits
1442 for (i = elementBytesLog2; i < 4; i++)
1443 {
1444 (*pDataEq)[i].add(cx);
1445 cx++;
1446 }
1447 // Fill in 2 bits of y and then z
1448 for (i = 4; i < 6; i++)
1449 {
1450 (*pDataEq)[i].add(cy);
1451 cy++;
1452 }
1453 for (i = 6; i < 8; i++)
1454 {
1455 (*pDataEq)[i].add(cz);
1456 cz++;
1457 }
1458 if (elementBytesLog2 < 2)
1459 {
1460 // fill in z & y bit
1461 (*pDataEq)[8].add(cz);
1462 (*pDataEq)[9].add(cy);
1463 cz++;
1464 cy++;
1465 }
1466 else if (elementBytesLog2 == 2)
1467 {
1468 // fill in y and x bit
1469 (*pDataEq)[8].add(cy);
1470 (*pDataEq)[9].add(cx);
1471 cy++;
1472 cx++;
1473 }
1474 else
1475 {
1476 // fill in 2 x bits
1477 (*pDataEq)[8].add(cx);
1478 cx++;
1479 (*pDataEq)[9].add(cx);
1480 cx++;
1481 }
1482 }
1483 else
1484 {
1485 // Z 3d swizzle
1486 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1487 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1488 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1489 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1490 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1491 {
1492 (*pDataEq)[i].add(cz);
1493 cz++;
1494 }
1495 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1496 {
1497 // add an x and z
1498 (*pDataEq)[6].add(cx);
1499 (*pDataEq)[7].add(cz);
1500 cx++;
1501 cz++;
1502 }
1503 else if (elementBytesLog2 == 2)
1504 {
1505 // add a y and z
1506 (*pDataEq)[6].add(cy);
1507 (*pDataEq)[7].add(cz);
1508 cy++;
1509 cz++;
1510 }
1511 // add y and x
1512 (*pDataEq)[8].add(cy);
1513 (*pDataEq)[9].add(cx);
1514 cy++;
1515 cx++;
1516 }
1517 // Fill in bit 10 and up
1518 pDataEq->mort3d( cz, cy, cx, 10 );
1519 }
1520 else if (IsThin(resourceType, swizzleMode))
1521 {
1522 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1523 // Color 2D
1524 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1525 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1526 UINT_32 i;
1527 // Fill in bottom x bits
1528 for (i = elementBytesLog2; i < 4; i++)
1529 {
1530 (*pDataEq)[i].add(cx);
1531 cx++;
1532 }
1533 // Fill in bottom y bits
1534 for (i = 4; i < 4 + microYBits; i++)
1535 {
1536 (*pDataEq)[i].add(cy);
1537 cy++;
1538 }
1539 // Fill in last of the micro_x bits
1540 for (i = 4 + microYBits; i < 8; i++)
1541 {
1542 (*pDataEq)[i].add(cx);
1543 cx++;
1544 }
1545 // Fill in x/y bits below sample split
1546 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1547 // Fill in sample bits
1548 for (i = 0; i < numSamplesLog2; i++)
1549 {
1550 cs.set('s', i);
1551 (*pDataEq)[tileSplitStart + i].add(cs);
1552 }
1553 // Fill in x/y bits above sample split
1554 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1555 {
1556 pDataEq->mort2d(cx, cy, blockSizeLog2);
1557 }
1558 else
1559 {
1560 pDataEq->mort2d(cy, cx, blockSizeLog2);
1561 }
1562 }
1563 else
1564 {
1565 ADDR_ASSERT_ALWAYS();
1566 }
1567 }
1568 else
1569 {
1570 // Fmask or depth
1571 UINT_32 sampleStart = elementBytesLog2;
1572 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1573 UINT_32 ymajStart = 6 + numSamplesLog2;
1574
1575 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1576 {
1577 cs.set('s', s);
1578 (*pDataEq)[sampleStart + s].add(cs);
1579 }
1580
1581 // Put in the x-major order pixel bits
1582 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1583 // Put in the y-major order pixel bits
1584 pDataEq->mort2d(cy, cx, ymajStart);
1585 }
1586 }
1587
1588 /**
1589 ************************************************************************************************************************
1590 * Gfx9Lib::GetPipeEquation
1591 *
1592 * @brief
1593 * Get pipe equation
1594 * @return
1595 * N/A
1596 ************************************************************************************************************************
1597 */
1598 VOID Gfx9Lib::GetPipeEquation(
1599 CoordEq* pPipeEq, ///< [out] pipe equation
1600 CoordEq* pDataEq, ///< [in] data equation
1601 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1602 UINT_32 numPipeLog2, ///< [in] number of pipes
1603 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1604 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1605 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1606 AddrResourceType resourceType ///< [in] data surface resource type
1607 ) const
1608 {
1609 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1610 CoordEq dataEq;
1611
1612 pDataEq->copy(dataEq);
1613
1614 if (dataSurfaceType == Gfx9DataColor)
1615 {
1616 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1617 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1618 }
1619
1620 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1621
1622 // This section should only apply to z/stencil, maybe fmask
1623 // If the pipe bit is below the comp block size,
1624 // then keep moving up the address until we find a bit that is above
1625 UINT_32 pipeStart = 0;
1626
1627 if (dataSurfaceType != Gfx9DataColor)
1628 {
1629 Coordinate tileMin('x', 3);
1630
1631 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1632 {
1633 pipeStart++;
1634 }
1635
1636 // if pipe is 0, then the first pipe bit is above the comp block size,
1637 // so we don't need to do anything
1638 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1639 // we will get the same pipe equation
1640 if (pipeStart != 0)
1641 {
1642 for (UINT_32 i = 0; i < numPipeLog2; i++)
1643 {
1644 // Copy the jth bit above pipe interleave to the current pipe equation bit
1645 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1646 }
1647 }
1648 }
1649
1650 if (IsPrt(swizzleMode))
1651 {
1652 // Clear out bits above the block size if prt's are enabled
1653 dataEq.resize(blockSizeLog2);
1654 dataEq.resize(48);
1655 }
1656
1657 if (IsXor(swizzleMode))
1658 {
1659 CoordEq xorMask;
1660
1661 if (IsThick(resourceType, swizzleMode))
1662 {
1663 CoordEq xorMask2;
1664
1665 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1666
1667 xorMask.resize(numPipeLog2);
1668
1669 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1670 {
1671 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1672 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1673 }
1674 }
1675 else
1676 {
1677 // Xor in the bits above the pipe+gpu bits
1678 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1679
1680 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1681 {
1682 Coordinate co;
1683 CoordEq xorMask2;
1684 // if 1xaa and not prt, then xor in the z bits
1685 xorMask2.resize(0);
1686 xorMask2.resize(numPipeLog2);
1687 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1688 {
1689 co.set('z', numPipeLog2 - 1 - pipeIdx);
1690 xorMask2[pipeIdx].add(co);
1691 }
1692
1693 pPipeEq->xorin(xorMask2);
1694 }
1695 }
1696
1697 xorMask.reverse();
1698 pPipeEq->xorin(xorMask);
1699 }
1700 }
1701 /**
1702 ************************************************************************************************************************
1703 * Gfx9Lib::GetMetaEquation
1704 *
1705 * @brief
1706 * Get meta equation for cmask/htile/DCC
1707 * @return
1708 * Pointer to a calculated meta equation
1709 ************************************************************************************************************************
1710 */
1711 const CoordEq* Gfx9Lib::GetMetaEquation(
1712 const MetaEqParams& metaEqParams)
1713 {
1714 UINT_32 cachedMetaEqIndex;
1715
1716 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1717 {
1718 if (memcmp(&metaEqParams,
1719 &m_cachedMetaEqKey[cachedMetaEqIndex],
1720 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1721 {
1722 break;
1723 }
1724 }
1725
1726 CoordEq* pMetaEq = NULL;
1727
1728 if (cachedMetaEqIndex < MaxCachedMetaEq)
1729 {
1730 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1731 }
1732 else
1733 {
1734 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1735
1736 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1737
1738 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1739
1740 GenMetaEquation(pMetaEq,
1741 metaEqParams.maxMip,
1742 metaEqParams.elementBytesLog2,
1743 metaEqParams.numSamplesLog2,
1744 metaEqParams.metaFlag,
1745 metaEqParams.dataSurfaceType,
1746 metaEqParams.swizzleMode,
1747 metaEqParams.resourceType,
1748 metaEqParams.metaBlkWidthLog2,
1749 metaEqParams.metaBlkHeightLog2,
1750 metaEqParams.metaBlkDepthLog2,
1751 metaEqParams.compBlkWidthLog2,
1752 metaEqParams.compBlkHeightLog2,
1753 metaEqParams.compBlkDepthLog2);
1754 }
1755
1756 return pMetaEq;
1757 }
1758
1759 /**
1760 ************************************************************************************************************************
1761 * Gfx9Lib::GenMetaEquation
1762 *
1763 * @brief
1764 * Get meta equation for cmask/htile/DCC
1765 * @return
1766 * N/A
1767 ************************************************************************************************************************
1768 */
1769 VOID Gfx9Lib::GenMetaEquation(
1770 CoordEq* pMetaEq, ///< [out] meta equation
1771 UINT_32 maxMip, ///< [in] max mip Id
1772 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1773 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1774 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1775 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1776 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1777 AddrResourceType resourceType, ///< [in] data surface resource type
1778 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1779 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1780 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1781 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1782 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1783 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1784 const
1785 {
1786 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1787 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1788
1789 // Get the correct data address and rb equation
1790 CoordEq dataEq;
1791 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1792 elementBytesLog2, numSamplesLog2);
1793
1794 // Get pipe and rb equations
1795 CoordEq pipeEquation;
1796 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1797 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1798 numPipeTotalLog2 = pipeEquation.getsize();
1799
1800 if (metaFlag.linear)
1801 {
1802 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1803 ADDR_ASSERT_ALWAYS();
1804
1805 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1806
1807 dataEq.copy(*pMetaEq);
1808
1809 if (IsLinear(swizzleMode))
1810 {
1811 if (metaFlag.pipeAligned)
1812 {
1813 // Remove the pipe bits
1814 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1815 pMetaEq->shift(-shift, pipeInterleaveLog2);
1816 }
1817 // Divide by comp block size, which for linear (which is always color) is 256 B
1818 pMetaEq->shift(-8);
1819
1820 if (metaFlag.pipeAligned)
1821 {
1822 // Put pipe bits back in
1823 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1824
1825 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1826 {
1827 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1828 }
1829 }
1830 }
1831
1832 pMetaEq->shift(1);
1833 }
1834 else
1835 {
1836 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1837 UINT_32 compFragLog2 =
1838 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1839 maxCompFragLog2 : numSamplesLog2;
1840
1841 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1842
1843 // Make sure the metaaddr is cleared
1844 pMetaEq->resize(0);
1845 pMetaEq->resize(27);
1846
1847 if (IsThick(resourceType, swizzleMode))
1848 {
1849 Coordinate cx('x', 0);
1850 Coordinate cy('y', 0);
1851 Coordinate cz('z', 0);
1852
1853 if (maxMip > 0)
1854 {
1855 pMetaEq->mort3d(cy, cx, cz);
1856 }
1857 else
1858 {
1859 pMetaEq->mort3d(cx, cy, cz);
1860 }
1861 }
1862 else
1863 {
1864 Coordinate cx('x', 0);
1865 Coordinate cy('y', 0);
1866 Coordinate cs;
1867
1868 if (maxMip > 0)
1869 {
1870 pMetaEq->mort2d(cy, cx, compFragLog2);
1871 }
1872 else
1873 {
1874 pMetaEq->mort2d(cx, cy, compFragLog2);
1875 }
1876
1877 //------------------------------------------------------------------------------------------------------------------------
1878 // Put the compressible fragments at the lsb
1879 // the uncompressible frags will be at the msb of the micro address
1880 //------------------------------------------------------------------------------------------------------------------------
1881 for (UINT_32 s = 0; s < compFragLog2; s++)
1882 {
1883 cs.set('s', s);
1884 (*pMetaEq)[s].add(cs);
1885 }
1886 }
1887
1888 // Keep a copy of the pipe equations
1889 CoordEq origPipeEquation;
1890 pipeEquation.copy(origPipeEquation);
1891
1892 Coordinate co;
1893 // filter out everything under the compressed block size
1894 co.set('x', compBlkWidthLog2);
1895 pMetaEq->Filter('<', co, 0, 'x');
1896 co.set('y', compBlkHeightLog2);
1897 pMetaEq->Filter('<', co, 0, 'y');
1898 co.set('z', compBlkDepthLog2);
1899 pMetaEq->Filter('<', co, 0, 'z');
1900
1901 // For non-color, filter out sample bits
1902 if (dataSurfaceType != Gfx9DataColor)
1903 {
1904 co.set('x', 0);
1905 pMetaEq->Filter('<', co, 0, 's');
1906 }
1907
1908 // filter out everything above the metablock size
1909 co.set('x', metaBlkWidthLog2 - 1);
1910 pMetaEq->Filter('>', co, 0, 'x');
1911 co.set('y', metaBlkHeightLog2 - 1);
1912 pMetaEq->Filter('>', co, 0, 'y');
1913 co.set('z', metaBlkDepthLog2 - 1);
1914 pMetaEq->Filter('>', co, 0, 'z');
1915
1916 // filter out everything above the metablock size for the channel bits
1917 co.set('x', metaBlkWidthLog2 - 1);
1918 pipeEquation.Filter('>', co, 0, 'x');
1919 co.set('y', metaBlkHeightLog2 - 1);
1920 pipeEquation.Filter('>', co, 0, 'y');
1921 co.set('z', metaBlkDepthLog2 - 1);
1922 pipeEquation.Filter('>', co, 0, 'z');
1923
1924 // Make sure we still have the same number of channel bits
1925 if (pipeEquation.getsize() != numPipeTotalLog2)
1926 {
1927 ADDR_ASSERT_ALWAYS();
1928 }
1929
1930 // Loop through all channel and rb bits,
1931 // and make sure these components exist in the metadata address
1932 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1933 {
1934 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1935 {
1936 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1937 {
1938 ADDR_ASSERT_ALWAYS();
1939 }
1940 }
1941 }
1942
1943 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1944 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1945 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1946 CoordEq origRbEquation;
1947
1948 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1949
1950 CoordEq rbEquation = origRbEquation;
1951
1952 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1953 {
1954 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1955 {
1956 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1957 {
1958 ADDR_ASSERT_ALWAYS();
1959 }
1960 }
1961 }
1962
1963 if (m_settings.applyAliasFix)
1964 {
1965 co.set('z', -1);
1966 }
1967
1968 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1969 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1970 {
1971 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1972 {
1973 BOOL_32 isRbEquationInPipeEquation = FALSE;
1974
1975 if (m_settings.applyAliasFix)
1976 {
1977 CoordTerm filteredPipeEq;
1978 filteredPipeEq = pipeEquation[j];
1979
1980 filteredPipeEq.Filter('>', co, 0, 'z');
1981
1982 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1983 }
1984 else
1985 {
1986 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1987 }
1988
1989 if (isRbEquationInPipeEquation)
1990 {
1991 rbEquation[i].Clear();
1992 }
1993 }
1994 }
1995
1996 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1997
1998 // Loop through each bit of the channel, get the smallest coordinate,
1999 // and remove it from the metaaddr, and rb_equation
2000 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2001 {
2002 pipeEquation[i].getsmallest(co);
2003
2004 UINT_32 old_size = pMetaEq->getsize();
2005 pMetaEq->Filter('=', co);
2006 UINT_32 new_size = pMetaEq->getsize();
2007 if (new_size != old_size-1)
2008 {
2009 ADDR_ASSERT_ALWAYS();
2010 }
2011 pipeEquation.remove(co);
2012 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2013 {
2014 if (rbEquation[j].remove(co))
2015 {
2016 // if we actually removed something from this bit, then add the remaining
2017 // channel bits, as these can be removed for this bit
2018 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2019 {
2020 if (pipeEquation[i][k] != co)
2021 {
2022 rbEquation[j].add(pipeEquation[i][k]);
2023 rbAppendedWithPipeBits[j] = true;
2024 }
2025 }
2026 }
2027 }
2028 }
2029
2030 // Loop through the rb bits and see what remain;
2031 // filter out the smallest coordinate if it remains
2032 UINT_32 rbBitsLeft = 0;
2033 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2034 {
2035 BOOL_32 isRbEqAppended = FALSE;
2036
2037 if (m_settings.applyAliasFix)
2038 {
2039 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2040 }
2041 else
2042 {
2043 isRbEqAppended = (rbEquation[i].getsize() > 0);
2044 }
2045
2046 if (isRbEqAppended)
2047 {
2048 rbBitsLeft++;
2049 rbEquation[i].getsmallest(co);
2050 UINT_32 old_size = pMetaEq->getsize();
2051 pMetaEq->Filter('=', co);
2052 UINT_32 new_size = pMetaEq->getsize();
2053 if (new_size != old_size - 1)
2054 {
2055 // assert warning
2056 }
2057 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2058 {
2059 if (rbEquation[j].remove(co))
2060 {
2061 // if we actually removed something from this bit, then add the remaining
2062 // rb bits, as these can be removed for this bit
2063 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2064 {
2065 if (rbEquation[i][k] != co)
2066 {
2067 rbEquation[j].add(rbEquation[i][k]);
2068 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2069 }
2070 }
2071 }
2072 }
2073 }
2074 }
2075
2076 // capture the size of the metaaddr
2077 UINT_32 metaSize = pMetaEq->getsize();
2078 // resize to 49 bits...make this a nibble address
2079 pMetaEq->resize(49);
2080 // Concatenate the macro address above the current address
2081 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2082 {
2083 co.set('m', j);
2084 (*pMetaEq)[i].add(co);
2085 }
2086
2087 // Multiply by meta element size (in nibbles)
2088 if (dataSurfaceType == Gfx9DataColor)
2089 {
2090 pMetaEq->shift(1);
2091 }
2092 else if (dataSurfaceType == Gfx9DataDepthStencil)
2093 {
2094 pMetaEq->shift(3);
2095 }
2096
2097 //------------------------------------------------------------------------------------------
2098 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2099 // Shift up from pipe interleave number of channel
2100 // and rb bits left, and uncompressed fragments
2101 //------------------------------------------------------------------------------------------
2102
2103 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2104
2105 // Put in the channel bits
2106 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2107 {
2108 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2109 }
2110
2111 // Put in remaining rb bits
2112 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2113 {
2114 BOOL_32 isRbEqAppended = FALSE;
2115
2116 if (m_settings.applyAliasFix)
2117 {
2118 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2119 }
2120 else
2121 {
2122 isRbEqAppended = (rbEquation[i].getsize() > 0);
2123 }
2124
2125 if (isRbEqAppended)
2126 {
2127 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2128 // Mark any rb bit we add in to the rb mask
2129 j++;
2130 }
2131 }
2132
2133 //------------------------------------------------------------------------------------------
2134 // Put in the uncompressed fragment bits
2135 //------------------------------------------------------------------------------------------
2136 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2137 {
2138 co.set('s', compFragLog2 + i);
2139 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2140 }
2141 }
2142 }
2143
2144 /**
2145 ************************************************************************************************************************
2146 * Gfx9Lib::IsEquationSupported
2147 *
2148 * @brief
2149 * Check if equation is supported for given swizzle mode and resource type.
2150 *
2151 * @return
2152 * TRUE if supported
2153 ************************************************************************************************************************
2154 */
2155 BOOL_32 Gfx9Lib::IsEquationSupported(
2156 AddrResourceType rsrcType,
2157 AddrSwizzleMode swMode,
2158 UINT_32 elementBytesLog2) const
2159 {
2160 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2161 (IsLinear(swMode) == FALSE) &&
2162 (((IsTex2d(rsrcType) == TRUE) &&
2163 ((elementBytesLog2 < 4) ||
2164 ((IsRotateSwizzle(swMode) == FALSE) &&
2165 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2166 ((IsTex3d(rsrcType) == TRUE) &&
2167 (IsRotateSwizzle(swMode) == FALSE) &&
2168 (IsBlock256b(swMode) == FALSE)));
2169
2170 return supported;
2171 }
2172
2173 /**
2174 ************************************************************************************************************************
2175 * Gfx9Lib::InitEquationTable
2176 *
2177 * @brief
2178 * Initialize Equation table.
2179 *
2180 * @return
2181 * N/A
2182 ************************************************************************************************************************
2183 */
2184 VOID Gfx9Lib::InitEquationTable()
2185 {
2186 memset(m_equationTable, 0, sizeof(m_equationTable));
2187
2188 // Loop all possible resource type (2D/3D)
2189 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2190 {
2191 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2192
2193 // Loop all possible swizzle mode
2194 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
2195 {
2196 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2197
2198 // Loop all possible bpp
2199 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2200 {
2201 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2202
2203 // Check if the input is supported
2204 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2205 {
2206 ADDR_EQUATION equation;
2207 ADDR_E_RETURNCODE retCode;
2208
2209 memset(&equation, 0, sizeof(ADDR_EQUATION));
2210
2211 // Generate the equation
2212 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2213 {
2214 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2215 }
2216 else if (IsThin(rsrcType, swMode))
2217 {
2218 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2219 }
2220 else
2221 {
2222 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2223 }
2224
2225 // Only fill the equation into the table if the return code is ADDR_OK,
2226 // otherwise if the return code is not ADDR_OK, it indicates this is not
2227 // a valid input, we do nothing but just fill invalid equation index
2228 // into the lookup table.
2229 if (retCode == ADDR_OK)
2230 {
2231 equationIndex = m_numEquations;
2232 ADDR_ASSERT(equationIndex < EquationTableSize);
2233
2234 m_equationTable[equationIndex] = equation;
2235
2236 m_numEquations++;
2237 }
2238 else
2239 {
2240 ADDR_ASSERT_ALWAYS();
2241 }
2242 }
2243
2244 // Fill the index into the lookup table, if the combination is not supported
2245 // fill the invalid equation index
2246 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2247 }
2248 }
2249 }
2250 }
2251
2252 /**
2253 ************************************************************************************************************************
2254 * Gfx9Lib::HwlGetEquationIndex
2255 *
2256 * @brief
2257 * Interface function stub of GetEquationIndex
2258 *
2259 * @return
2260 * ADDR_E_RETURNCODE
2261 ************************************************************************************************************************
2262 */
2263 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2264 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2265 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2266 ) const
2267 {
2268 AddrResourceType rsrcType = pIn->resourceType;
2269 AddrSwizzleMode swMode = pIn->swizzleMode;
2270 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2271 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2272
2273 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2274 {
2275 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2276 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2277
2278 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2279 }
2280
2281 if (pOut->pMipInfo != NULL)
2282 {
2283 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2284 {
2285 pOut->pMipInfo[i].equationIndex = index;
2286 }
2287 }
2288
2289 return index;
2290 }
2291
2292 /**
2293 ************************************************************************************************************************
2294 * Gfx9Lib::HwlComputeBlock256Equation
2295 *
2296 * @brief
2297 * Interface function stub of ComputeBlock256Equation
2298 *
2299 * @return
2300 * ADDR_E_RETURNCODE
2301 ************************************************************************************************************************
2302 */
2303 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2304 AddrResourceType rsrcType,
2305 AddrSwizzleMode swMode,
2306 UINT_32 elementBytesLog2,
2307 ADDR_EQUATION* pEquation) const
2308 {
2309 ADDR_E_RETURNCODE ret = ADDR_OK;
2310
2311 pEquation->numBits = 8;
2312
2313 UINT_32 i = 0;
2314 for (; i < elementBytesLog2; i++)
2315 {
2316 InitChannel(1, 0 , i, &pEquation->addr[i]);
2317 }
2318
2319 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2320
2321 const UINT_32 maxBitsUsed = 4;
2322 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2323 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2324
2325 for (i = 0; i < maxBitsUsed; i++)
2326 {
2327 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2328 InitChannel(1, 1, i, &y[i]);
2329 }
2330
2331 if (IsStandardSwizzle(rsrcType, swMode))
2332 {
2333 switch (elementBytesLog2)
2334 {
2335 case 0:
2336 pixelBit[0] = x[0];
2337 pixelBit[1] = x[1];
2338 pixelBit[2] = x[2];
2339 pixelBit[3] = x[3];
2340 pixelBit[4] = y[0];
2341 pixelBit[5] = y[1];
2342 pixelBit[6] = y[2];
2343 pixelBit[7] = y[3];
2344 break;
2345 case 1:
2346 pixelBit[0] = x[0];
2347 pixelBit[1] = x[1];
2348 pixelBit[2] = x[2];
2349 pixelBit[3] = y[0];
2350 pixelBit[4] = y[1];
2351 pixelBit[5] = y[2];
2352 pixelBit[6] = x[3];
2353 break;
2354 case 2:
2355 pixelBit[0] = x[0];
2356 pixelBit[1] = x[1];
2357 pixelBit[2] = y[0];
2358 pixelBit[3] = y[1];
2359 pixelBit[4] = y[2];
2360 pixelBit[5] = x[2];
2361 break;
2362 case 3:
2363 pixelBit[0] = x[0];
2364 pixelBit[1] = y[0];
2365 pixelBit[2] = y[1];
2366 pixelBit[3] = x[1];
2367 pixelBit[4] = x[2];
2368 break;
2369 case 4:
2370 pixelBit[0] = y[0];
2371 pixelBit[1] = y[1];
2372 pixelBit[2] = x[0];
2373 pixelBit[3] = x[1];
2374 break;
2375 default:
2376 ADDR_ASSERT_ALWAYS();
2377 ret = ADDR_INVALIDPARAMS;
2378 break;
2379 }
2380 }
2381 else if (IsDisplaySwizzle(rsrcType, swMode))
2382 {
2383 switch (elementBytesLog2)
2384 {
2385 case 0:
2386 pixelBit[0] = x[0];
2387 pixelBit[1] = x[1];
2388 pixelBit[2] = x[2];
2389 pixelBit[3] = y[1];
2390 pixelBit[4] = y[0];
2391 pixelBit[5] = y[2];
2392 pixelBit[6] = x[3];
2393 pixelBit[7] = y[3];
2394 break;
2395 case 1:
2396 pixelBit[0] = x[0];
2397 pixelBit[1] = x[1];
2398 pixelBit[2] = x[2];
2399 pixelBit[3] = y[0];
2400 pixelBit[4] = y[1];
2401 pixelBit[5] = y[2];
2402 pixelBit[6] = x[3];
2403 break;
2404 case 2:
2405 pixelBit[0] = x[0];
2406 pixelBit[1] = x[1];
2407 pixelBit[2] = y[0];
2408 pixelBit[3] = x[2];
2409 pixelBit[4] = y[1];
2410 pixelBit[5] = y[2];
2411 break;
2412 case 3:
2413 pixelBit[0] = x[0];
2414 pixelBit[1] = y[0];
2415 pixelBit[2] = x[1];
2416 pixelBit[3] = x[2];
2417 pixelBit[4] = y[1];
2418 break;
2419 case 4:
2420 pixelBit[0] = x[0];
2421 pixelBit[1] = y[0];
2422 pixelBit[2] = x[1];
2423 pixelBit[3] = y[1];
2424 break;
2425 default:
2426 ADDR_ASSERT_ALWAYS();
2427 ret = ADDR_INVALIDPARAMS;
2428 break;
2429 }
2430 }
2431 else if (IsRotateSwizzle(swMode))
2432 {
2433 switch (elementBytesLog2)
2434 {
2435 case 0:
2436 pixelBit[0] = y[0];
2437 pixelBit[1] = y[1];
2438 pixelBit[2] = y[2];
2439 pixelBit[3] = x[1];
2440 pixelBit[4] = x[0];
2441 pixelBit[5] = x[2];
2442 pixelBit[6] = x[3];
2443 pixelBit[7] = y[3];
2444 break;
2445 case 1:
2446 pixelBit[0] = y[0];
2447 pixelBit[1] = y[1];
2448 pixelBit[2] = y[2];
2449 pixelBit[3] = x[0];
2450 pixelBit[4] = x[1];
2451 pixelBit[5] = x[2];
2452 pixelBit[6] = x[3];
2453 break;
2454 case 2:
2455 pixelBit[0] = y[0];
2456 pixelBit[1] = y[1];
2457 pixelBit[2] = x[0];
2458 pixelBit[3] = y[2];
2459 pixelBit[4] = x[1];
2460 pixelBit[5] = x[2];
2461 break;
2462 case 3:
2463 pixelBit[0] = y[0];
2464 pixelBit[1] = x[0];
2465 pixelBit[2] = y[1];
2466 pixelBit[3] = x[1];
2467 pixelBit[4] = x[2];
2468 break;
2469 default:
2470 ADDR_ASSERT_ALWAYS();
2471 case 4:
2472 ret = ADDR_INVALIDPARAMS;
2473 break;
2474 }
2475 }
2476 else
2477 {
2478 ADDR_ASSERT_ALWAYS();
2479 ret = ADDR_INVALIDPARAMS;
2480 }
2481
2482 // Post validation
2483 if (ret == ADDR_OK)
2484 {
2485 MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2486 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2487 (microBlockDim.w * (1 << elementBytesLog2)));
2488 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2489 }
2490
2491 return ret;
2492 }
2493
2494 /**
2495 ************************************************************************************************************************
2496 * Gfx9Lib::HwlComputeThinEquation
2497 *
2498 * @brief
2499 * Interface function stub of ComputeThinEquation
2500 *
2501 * @return
2502 * ADDR_E_RETURNCODE
2503 ************************************************************************************************************************
2504 */
2505 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2506 AddrResourceType rsrcType,
2507 AddrSwizzleMode swMode,
2508 UINT_32 elementBytesLog2,
2509 ADDR_EQUATION* pEquation) const
2510 {
2511 ADDR_E_RETURNCODE ret = ADDR_OK;
2512
2513 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2514
2515 UINT_32 maxXorBits = blockSizeLog2;
2516 if (IsNonPrtXor(swMode))
2517 {
2518 // For non-prt-xor, maybe need to initialize some more bits for xor
2519 // The highest xor bit used in equation will be max the following 3 items:
2520 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2521 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2522 // 3. blockSizeLog2
2523
2524 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2525 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2526 GetPipeXorBits(blockSizeLog2) +
2527 2 * GetBankXorBits(blockSizeLog2));
2528 }
2529
2530 const UINT_32 maxBitsUsed = 14;
2531 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2532 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2533 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2534
2535 const UINT_32 extraXorBits = 16;
2536 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2537 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2538
2539 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2540 {
2541 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2542 InitChannel(1, 1, i, &y[i]);
2543 }
2544
2545 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2546
2547 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2548 {
2549 InitChannel(1, 0 , i, &pixelBit[i]);
2550 }
2551
2552 UINT_32 xIdx = 0;
2553 UINT_32 yIdx = 0;
2554 UINT_32 lowBits = 0;
2555
2556 if (IsZOrderSwizzle(swMode))
2557 {
2558 if (elementBytesLog2 <= 3)
2559 {
2560 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2561 {
2562 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2563 }
2564
2565 lowBits = 6;
2566 }
2567 else
2568 {
2569 ret = ADDR_INVALIDPARAMS;
2570 }
2571 }
2572 else
2573 {
2574 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2575
2576 if (ret == ADDR_OK)
2577 {
2578 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2579 xIdx = Log2(microBlockDim.w);
2580 yIdx = Log2(microBlockDim.h);
2581 lowBits = 8;
2582 }
2583 }
2584
2585 if (ret == ADDR_OK)
2586 {
2587 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2588 {
2589 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2590 }
2591
2592 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2593 {
2594 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2595 }
2596
2597 if (IsXor(swMode))
2598 {
2599 // Fill XOR bits
2600 UINT_32 pipeStart = m_pipeInterleaveLog2;
2601 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2602
2603 UINT_32 bankStart = pipeStart + pipeXorBits;
2604 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2605
2606 for (UINT_32 i = 0; i < pipeXorBits; i++)
2607 {
2608 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2609 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2610 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2611
2612 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2613 }
2614
2615 for (UINT_32 i = 0; i < bankXorBits; i++)
2616 {
2617 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2618 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2619 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2620
2621 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2622 }
2623
2624 if (IsPrt(swMode) == FALSE)
2625 {
2626 for (UINT_32 i = 0; i < pipeXorBits; i++)
2627 {
2628 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2629 }
2630
2631 for (UINT_32 i = 0; i < bankXorBits; i++)
2632 {
2633 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2634 }
2635 }
2636 }
2637
2638 pEquation->numBits = blockSizeLog2;
2639 }
2640
2641 return ret;
2642 }
2643
2644 /**
2645 ************************************************************************************************************************
2646 * Gfx9Lib::HwlComputeThickEquation
2647 *
2648 * @brief
2649 * Interface function stub of ComputeThickEquation
2650 *
2651 * @return
2652 * ADDR_E_RETURNCODE
2653 ************************************************************************************************************************
2654 */
2655 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2656 AddrResourceType rsrcType,
2657 AddrSwizzleMode swMode,
2658 UINT_32 elementBytesLog2,
2659 ADDR_EQUATION* pEquation) const
2660 {
2661 ADDR_E_RETURNCODE ret = ADDR_OK;
2662
2663 ADDR_ASSERT(IsTex3d(rsrcType));
2664
2665 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2666
2667 UINT_32 maxXorBits = blockSizeLog2;
2668 if (IsNonPrtXor(swMode))
2669 {
2670 // For non-prt-xor, maybe need to initialize some more bits for xor
2671 // The highest xor bit used in equation will be max the following 3:
2672 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2673 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2674 // 3. blockSizeLog2
2675
2676 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2677 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2678 GetPipeXorBits(blockSizeLog2) +
2679 3 * GetBankXorBits(blockSizeLog2));
2680 }
2681
2682 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2683 {
2684 InitChannel(1, 0 , i, &pEquation->addr[i]);
2685 }
2686
2687 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2688
2689 const UINT_32 maxBitsUsed = 12;
2690 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2691 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2692 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2693 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2694
2695 const UINT_32 extraXorBits = 24;
2696 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2697 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2698
2699 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2700 {
2701 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2702 InitChannel(1, 1, i, &y[i]);
2703 InitChannel(1, 2, i, &z[i]);
2704 }
2705
2706 if (IsZOrderSwizzle(swMode))
2707 {
2708 switch (elementBytesLog2)
2709 {
2710 case 0:
2711 pixelBit[0] = x[0];
2712 pixelBit[1] = y[0];
2713 pixelBit[2] = x[1];
2714 pixelBit[3] = y[1];
2715 pixelBit[4] = z[0];
2716 pixelBit[5] = z[1];
2717 pixelBit[6] = x[2];
2718 pixelBit[7] = z[2];
2719 pixelBit[8] = y[2];
2720 pixelBit[9] = x[3];
2721 break;
2722 case 1:
2723 pixelBit[0] = x[0];
2724 pixelBit[1] = y[0];
2725 pixelBit[2] = x[1];
2726 pixelBit[3] = y[1];
2727 pixelBit[4] = z[0];
2728 pixelBit[5] = z[1];
2729 pixelBit[6] = z[2];
2730 pixelBit[7] = y[2];
2731 pixelBit[8] = x[2];
2732 break;
2733 case 2:
2734 pixelBit[0] = x[0];
2735 pixelBit[1] = y[0];
2736 pixelBit[2] = x[1];
2737 pixelBit[3] = z[0];
2738 pixelBit[4] = y[1];
2739 pixelBit[5] = z[1];
2740 pixelBit[6] = y[2];
2741 pixelBit[7] = x[2];
2742 break;
2743 case 3:
2744 pixelBit[0] = x[0];
2745 pixelBit[1] = y[0];
2746 pixelBit[2] = z[0];
2747 pixelBit[3] = x[1];
2748 pixelBit[4] = z[1];
2749 pixelBit[5] = y[1];
2750 pixelBit[6] = x[2];
2751 break;
2752 case 4:
2753 pixelBit[0] = x[0];
2754 pixelBit[1] = y[0];
2755 pixelBit[2] = z[0];
2756 pixelBit[3] = z[1];
2757 pixelBit[4] = y[1];
2758 pixelBit[5] = x[1];
2759 break;
2760 default:
2761 ADDR_ASSERT_ALWAYS();
2762 ret = ADDR_INVALIDPARAMS;
2763 break;
2764 }
2765 }
2766 else if (IsStandardSwizzle(rsrcType, swMode))
2767 {
2768 switch (elementBytesLog2)
2769 {
2770 case 0:
2771 pixelBit[0] = x[0];
2772 pixelBit[1] = x[1];
2773 pixelBit[2] = x[2];
2774 pixelBit[3] = x[3];
2775 pixelBit[4] = y[0];
2776 pixelBit[5] = y[1];
2777 pixelBit[6] = z[0];
2778 pixelBit[7] = z[1];
2779 pixelBit[8] = z[2];
2780 pixelBit[9] = y[2];
2781 break;
2782 case 1:
2783 pixelBit[0] = x[0];
2784 pixelBit[1] = x[1];
2785 pixelBit[2] = x[2];
2786 pixelBit[3] = y[0];
2787 pixelBit[4] = y[1];
2788 pixelBit[5] = z[0];
2789 pixelBit[6] = z[1];
2790 pixelBit[7] = z[2];
2791 pixelBit[8] = y[2];
2792 break;
2793 case 2:
2794 pixelBit[0] = x[0];
2795 pixelBit[1] = x[1];
2796 pixelBit[2] = y[0];
2797 pixelBit[3] = y[1];
2798 pixelBit[4] = z[0];
2799 pixelBit[5] = z[1];
2800 pixelBit[6] = y[2];
2801 pixelBit[7] = x[2];
2802 break;
2803 case 3:
2804 pixelBit[0] = x[0];
2805 pixelBit[1] = y[0];
2806 pixelBit[2] = y[1];
2807 pixelBit[3] = z[0];
2808 pixelBit[4] = z[1];
2809 pixelBit[5] = x[1];
2810 pixelBit[6] = x[2];
2811 break;
2812 case 4:
2813 pixelBit[0] = y[0];
2814 pixelBit[1] = y[1];
2815 pixelBit[2] = z[0];
2816 pixelBit[3] = z[1];
2817 pixelBit[4] = x[0];
2818 pixelBit[5] = x[1];
2819 break;
2820 default:
2821 ADDR_ASSERT_ALWAYS();
2822 ret = ADDR_INVALIDPARAMS;
2823 break;
2824 }
2825 }
2826 else
2827 {
2828 ADDR_ASSERT_ALWAYS();
2829 ret = ADDR_INVALIDPARAMS;
2830 }
2831
2832 if (ret == ADDR_OK)
2833 {
2834 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2835 UINT_32 xIdx = Log2(microBlockDim.w);
2836 UINT_32 yIdx = Log2(microBlockDim.h);
2837 UINT_32 zIdx = Log2(microBlockDim.d);
2838
2839 pixelBit = pEquation->addr;
2840
2841 const UINT_32 lowBits = 10;
2842 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2843 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2844
2845 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2846 {
2847 if ((i % 3) == 0)
2848 {
2849 pixelBit[i] = x[xIdx++];
2850 }
2851 else if ((i % 3) == 1)
2852 {
2853 pixelBit[i] = z[zIdx++];
2854 }
2855 else
2856 {
2857 pixelBit[i] = y[yIdx++];
2858 }
2859 }
2860
2861 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2862 {
2863 if ((i % 3) == 0)
2864 {
2865 xorExtra[i - blockSizeLog2] = x[xIdx++];
2866 }
2867 else if ((i % 3) == 1)
2868 {
2869 xorExtra[i - blockSizeLog2] = z[zIdx++];
2870 }
2871 else
2872 {
2873 xorExtra[i - blockSizeLog2] = y[yIdx++];
2874 }
2875 }
2876
2877 if (IsXor(swMode))
2878 {
2879 // Fill XOR bits
2880 UINT_32 pipeStart = m_pipeInterleaveLog2;
2881 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2882 for (UINT_32 i = 0; i < pipeXorBits; i++)
2883 {
2884 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2885 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2886 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2887
2888 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2889
2890 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2891 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2892 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2893
2894 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2895 }
2896
2897 UINT_32 bankStart = pipeStart + pipeXorBits;
2898 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2899 for (UINT_32 i = 0; i < bankXorBits; i++)
2900 {
2901 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2902 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2903 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2904
2905 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2906
2907 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2908 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2909 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2910
2911 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2912 }
2913 }
2914
2915 pEquation->numBits = blockSizeLog2;
2916 }
2917
2918 return ret;
2919 }
2920
2921 /**
2922 ************************************************************************************************************************
2923 * Gfx9Lib::IsValidDisplaySwizzleMode
2924 *
2925 * @brief
2926 * Check if a swizzle mode is supported by display engine
2927 *
2928 * @return
2929 * TRUE is swizzle mode is supported by display engine
2930 ************************************************************************************************************************
2931 */
2932 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2933 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2934 {
2935 BOOL_32 support = FALSE;
2936
2937 const AddrResourceType resourceType = pIn->resourceType;
2938 (void)resourceType;
2939 const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
2940
2941 if (m_settings.isDce12)
2942 {
2943 switch (swizzleMode)
2944 {
2945 case ADDR_SW_256B_D:
2946 case ADDR_SW_256B_R:
2947 support = (pIn->bpp == 32);
2948 break;
2949
2950 case ADDR_SW_LINEAR:
2951 case ADDR_SW_4KB_D:
2952 case ADDR_SW_4KB_R:
2953 case ADDR_SW_64KB_D:
2954 case ADDR_SW_64KB_R:
2955 case ADDR_SW_VAR_D:
2956 case ADDR_SW_VAR_R:
2957 case ADDR_SW_4KB_D_X:
2958 case ADDR_SW_4KB_R_X:
2959 case ADDR_SW_64KB_D_X:
2960 case ADDR_SW_64KB_R_X:
2961 case ADDR_SW_VAR_D_X:
2962 case ADDR_SW_VAR_R_X:
2963 support = (pIn->bpp <= 64);
2964 break;
2965
2966 default:
2967 break;
2968 }
2969 }
2970 else if (m_settings.isDcn1)
2971 {
2972 switch (swizzleMode)
2973 {
2974 case ADDR_SW_4KB_D:
2975 case ADDR_SW_64KB_D:
2976 case ADDR_SW_VAR_D:
2977 case ADDR_SW_64KB_D_T:
2978 case ADDR_SW_4KB_D_X:
2979 case ADDR_SW_64KB_D_X:
2980 case ADDR_SW_VAR_D_X:
2981 support = (pIn->bpp == 64);
2982 break;
2983
2984 case ADDR_SW_LINEAR:
2985 case ADDR_SW_4KB_S:
2986 case ADDR_SW_64KB_S:
2987 case ADDR_SW_VAR_S:
2988 case ADDR_SW_64KB_S_T:
2989 case ADDR_SW_4KB_S_X:
2990 case ADDR_SW_64KB_S_X:
2991 case ADDR_SW_VAR_S_X:
2992 support = (pIn->bpp <= 64);
2993 break;
2994
2995 default:
2996 break;
2997 }
2998 }
2999 else
3000 {
3001 ADDR_NOT_IMPLEMENTED();
3002 }
3003
3004 return support;
3005 }
3006
3007 /**
3008 ************************************************************************************************************************
3009 * Gfx9Lib::HwlComputePipeBankXor
3010 *
3011 * @brief
3012 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3013 *
3014 * @return
3015 * PipeBankXor value
3016 ************************************************************************************************************************
3017 */
3018 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3019 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3020 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
3021 {
3022 if (IsXor(pIn->swizzleMode))
3023 {
3024 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3025 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3026 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3027
3028 UINT_32 pipeXor = 0;
3029 UINT_32 bankXor = 0;
3030
3031 const UINT_32 bankMask = (1 << bankBits) - 1;
3032 const UINT_32 index = pIn->surfIndex & bankMask;
3033
3034 const UINT_32 bpp = pIn->flags.fmask ?
3035 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3036 if (bankBits == 4)
3037 {
3038 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3039 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3040
3041 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3042 }
3043 else if (bankBits > 0)
3044 {
3045 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3046 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3047 bankXor = (index * bankIncrease) & bankMask;
3048 }
3049
3050 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3051 }
3052 else
3053 {
3054 pOut->pipeBankXor = 0;
3055 }
3056
3057 return ADDR_OK;
3058 }
3059
3060 /**
3061 ************************************************************************************************************************
3062 * Gfx9Lib::HwlComputeSlicePipeBankXor
3063 *
3064 * @brief
3065 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3066 *
3067 * @return
3068 * PipeBankXor value
3069 ************************************************************************************************************************
3070 */
3071 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3072 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3073 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3074 {
3075 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3076 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3077 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3078
3079 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3080 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3081
3082 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3083
3084 return ADDR_OK;
3085 }
3086
3087 /**
3088 ************************************************************************************************************************
3089 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3090 *
3091 * @brief
3092 * Compute sub resource offset to support swizzle pattern
3093 *
3094 * @return
3095 * Offset
3096 ************************************************************************************************************************
3097 */
3098 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3099 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3100 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3101 {
3102 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3103
3104 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3105 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3106 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3107 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3108 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3109 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3110
3111 pOut->offset = pIn->slice * pIn->sliceSize +
3112 pIn->macroBlockOffset +
3113 (pIn->mipTailOffset ^ pipeBankXor) -
3114 static_cast<UINT_64>(pipeBankXor);
3115 return ADDR_OK;
3116 }
3117
3118 /**
3119 ************************************************************************************************************************
3120 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3121 *
3122 * @brief
3123 * Compute surface info sanity check
3124 *
3125 * @return
3126 * Offset
3127 ************************************************************************************************************************
3128 */
3129 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3130 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3131 {
3132 BOOL_32 invalid = FALSE;
3133
3134 if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3135 {
3136 invalid = TRUE;
3137 }
3138 else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) ||
3139 (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
3140 {
3141 invalid = TRUE;
3142 }
3143
3144 BOOL_32 mipmap = (pIn->numMipLevels > 1);
3145 BOOL_32 msaa = (pIn->numFrags > 1);
3146
3147 ADDR2_SURFACE_FLAGS flags = pIn->flags;
3148 BOOL_32 zbuffer = (flags.depth || flags.stencil);
3149 BOOL_32 color = flags.color;
3150 BOOL_32 display = flags.display || flags.rotated;
3151
3152 AddrResourceType rsrcType = pIn->resourceType;
3153 BOOL_32 tex3d = IsTex3d(rsrcType);
3154 BOOL_32 thin3d = tex3d && flags.view3dAs2dArray;
3155 AddrSwizzleMode swizzle = pIn->swizzleMode;
3156 BOOL_32 linear = IsLinear(swizzle);
3157 BOOL_32 blk256B = IsBlock256b(swizzle);
3158 BOOL_32 blkVar = IsBlockVariable(swizzle);
3159 BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3160 BOOL_32 prt = flags.prt;
3161 BOOL_32 stereo = flags.qbStereo;
3162
3163 if (invalid == FALSE)
3164 {
3165 if ((pIn->numFrags > 1) &&
3166 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3167 {
3168 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3169 invalid = TRUE;
3170 }
3171 }
3172
3173 if (invalid == FALSE)
3174 {
3175 switch (rsrcType)
3176 {
3177 case ADDR_RSRC_TEX_1D:
3178 invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
3179 break;
3180 case ADDR_RSRC_TEX_2D:
3181 invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
3182 break;
3183 case ADDR_RSRC_TEX_3D:
3184 invalid = msaa || zbuffer || display || stereo;
3185 break;
3186 default:
3187 invalid = TRUE;
3188 break;
3189 }
3190 }
3191
3192 if (invalid == FALSE)
3193 {
3194 if (display)
3195 {
3196 invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
3197 }
3198 }
3199
3200 if (invalid == FALSE)
3201 {
3202 if (linear)
3203 {
3204 invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
3205 zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
3206 }
3207 else
3208 {
3209 if (blk256B || blkVar || isNonPrtXor)
3210 {
3211 invalid = prt;
3212 if (blk256B)
3213 {
3214 invalid = invalid || zbuffer || tex3d || mipmap || msaa;
3215 }
3216 }
3217
3218 if (invalid == FALSE)
3219 {
3220 if (IsZOrderSwizzle(swizzle))
3221 {
3222 invalid = (color && msaa) || thin3d;
3223 }
3224 else if (IsStandardSwizzle(swizzle))
3225 {
3226 invalid = zbuffer || thin3d;
3227 }
3228 else if (IsDisplaySwizzle(swizzle))
3229 {
3230 invalid = zbuffer || (prt && (ADDR_RSRC_TEX_3D == rsrcType));
3231 }
3232 else if (IsRotateSwizzle(swizzle))
3233 {
3234 invalid = zbuffer || (pIn->bpp > 64) || tex3d;
3235 }
3236 else
3237 {
3238 ADDR_ASSERT(!"invalid swizzle mode");
3239 invalid = TRUE;
3240 }
3241 }
3242 }
3243 }
3244
3245 ADDR_ASSERT(invalid == FALSE);
3246
3247 return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
3248 }
3249
3250 /**
3251 ************************************************************************************************************************
3252 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3253 *
3254 * @brief
3255 * Internal function to get suggested surface information for cliet to use
3256 *
3257 * @return
3258 * ADDR_E_RETURNCODE
3259 ************************************************************************************************************************
3260 */
3261 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3262 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3263 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3264 {
3265 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3266 ElemLib* pElemLib = GetElemLib();
3267
3268 UINT_32 bpp = pIn->bpp;
3269 UINT_32 width = pIn->width;
3270 UINT_32 height = pIn->height;
3271 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3272 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3273
3274 if (pIn->flags.fmask)
3275 {
3276 bpp = GetFmaskBpp(numSamples, numFrags);
3277 numFrags = 1;
3278 numSamples = 1;
3279 pOut->resourceType = ADDR_RSRC_TEX_2D;
3280 }
3281 else
3282 {
3283 // Set format to INVALID will skip this conversion
3284 if (pIn->format != ADDR_FMT_INVALID)
3285 {
3286 UINT_32 expandX, expandY;
3287
3288 // Don't care for this case
3289 ElemMode elemMode = ADDR_UNCOMPRESSED;
3290
3291 // Get compression/expansion factors and element mode which indicates compression/expansion
3292 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3293 &elemMode,
3294 &expandX,
3295 &expandY);
3296
3297 UINT_32 basePitch = 0;
3298 GetElemLib()->AdjustSurfaceInfo(elemMode,
3299 expandX,
3300 expandY,
3301 &bpp,
3302 &basePitch,
3303 &width,
3304 &height);
3305 }
3306
3307 // The output may get changed for volume(3D) texture resource in future
3308 pOut->resourceType = pIn->resourceType;
3309 }
3310
3311 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3312 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3313 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
3314 const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated;
3315
3316 // Forbid swizzle mode(s) by client setting, for simplicity we never allow VAR swizzle mode for GFX9
3317 ADDR2_SWMODE_SET allowedSwModeSet = {};
3318 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3319 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
3320 allowedSwModeSet.value |= pIn->forbiddenBlock.macro4KB ? 0 : Gfx9Blk4KBSwModeMask;
3321 allowedSwModeSet.value |= pIn->forbiddenBlock.macro64KB ? 0 : Gfx9Blk64KBSwModeMask;
3322
3323 if (pIn->preferredSwSet.value != 0)
3324 {
3325 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3326 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3327 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3328 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3329 }
3330
3331 if (pIn->noXor)
3332 {
3333 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3334 }
3335
3336 if (pIn->maxAlign > 0)
3337 {
3338 if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3339 {
3340 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3341 }
3342
3343 if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3344 {
3345 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3346 }
3347
3348 if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3349 {
3350 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3351 }
3352 }
3353
3354 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3355 switch (pOut->resourceType)
3356 {
3357 case ADDR_RSRC_TEX_1D:
3358 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3359 break;
3360
3361 case ADDR_RSRC_TEX_2D:
3362 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3363
3364 if (bpp > 64)
3365 {
3366 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3367 }
3368 break;
3369
3370 case ADDR_RSRC_TEX_3D:
3371 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3372
3373 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3374 {
3375 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3376 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3377 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3378 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3379 }
3380
3381 if ((bpp == 128) && pIn->flags.color)
3382 {
3383 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3384 }
3385
3386 if (pIn->flags.view3dAs2dArray)
3387 {
3388 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3389 }
3390 break;
3391
3392 default:
3393 ADDR_ASSERT_ALWAYS();
3394 allowedSwModeSet.value = 0;
3395 break;
3396 }
3397
3398 if (pIn->format == ADDR_FMT_32_32_32)
3399 {
3400 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3401 }
3402
3403 if (ElemLib::IsBlockCompressed(pIn->format))
3404 {
3405 if (pIn->flags.texture)
3406 {
3407 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3408 }
3409 else
3410 {
3411 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3412 }
3413 }
3414
3415 if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3416 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3417 {
3418 allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3419 }
3420
3421 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3422 {
3423 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3424
3425 if (pIn->flags.noMetadata == FALSE)
3426 {
3427 if (pIn->flags.depth &&
3428 pIn->flags.texture &&
3429 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3430 {
3431 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3432 // equation from wrong address within memory range a tile covered and use the
3433 // garbage data for compressed Z reading which finally leads to corruption.
3434 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3435 }
3436
3437 if (m_settings.htileCacheRbConflict &&
3438 (pIn->flags.depth || pIn->flags.stencil) &&
3439 (numSlices > 1) &&
3440 (pIn->flags.metaRbUnaligned == FALSE) &&
3441 (pIn->flags.metaPipeUnaligned == FALSE))
3442 {
3443 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3444 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3445 }
3446 }
3447 }
3448
3449 if (msaa)
3450 {
3451 allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3452 }
3453
3454 if ((numFrags > 1) &&
3455 (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3456 {
3457 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3458 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3459 }
3460
3461 if (numMipLevels > 1)
3462 {
3463 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3464 }
3465
3466 if (displayRsrc)
3467 {
3468 if (m_settings.isDce12)
3469 {
3470 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3471 }
3472 else if (m_settings.isDcn1)
3473 {
3474 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3475 }
3476 else
3477 {
3478 ADDR_NOT_IMPLEMENTED();
3479 }
3480 }
3481
3482 if (allowedSwModeSet.value != 0)
3483 {
3484 #if DEBUG
3485 // Post sanity check, at least AddrLib should accept the output generated by its own
3486 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3487 localIn.flags = pIn->flags;
3488 localIn.resourceType = pOut->resourceType;
3489 localIn.format = pIn->format;
3490 localIn.bpp = bpp;
3491 localIn.width = width;
3492 localIn.height = height;
3493 localIn.numSlices =<