amd/addrlib: fix the C++ one definition rule violation
[mesa.git] / src / amd / addrlib / src / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 #include "util/macros.h"
41
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
44
45 namespace Addr
46 {
47
48 /**
49 ************************************************************************************************************************
50 * Gfx9HwlInit
51 *
52 * @brief
53 * Creates an Gfx9Lib object.
54 *
55 * @return
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
58 */
59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
60 {
61 return V2::Gfx9Lib::CreateObj(pClient);
62 }
63
64 namespace V2
65 {
66
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
70
71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_R
77
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_R
82
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_R
87
88 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
91 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
92
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0}, // ADDR_SW_64KB_R_T
97
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_4KB_R_x
102
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_64KB_R_X
107
108 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
109 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
110 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
111 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
113 };
114
115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
116
117 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
118
119 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
120
121 /**
122 ************************************************************************************************************************
123 * Gfx9Lib::Gfx9Lib
124 *
125 * @brief
126 * Constructor
127 *
128 ************************************************************************************************************************
129 */
130 Gfx9Lib::Gfx9Lib(const Client* pClient)
131 :
132 Lib(pClient)
133 {
134 m_class = AI_ADDRLIB;
135 memset(&m_settings, 0, sizeof(m_settings));
136 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
137 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
138 m_metaEqOverrideIndex = 0;
139 }
140
141 /**
142 ************************************************************************************************************************
143 * Gfx9Lib::~Gfx9Lib
144 *
145 * @brief
146 * Destructor
147 ************************************************************************************************************************
148 */
149 Gfx9Lib::~Gfx9Lib()
150 {
151 }
152
153 /**
154 ************************************************************************************************************************
155 * Gfx9Lib::HwlComputeHtileInfo
156 *
157 * @brief
158 * Interface function stub of AddrComputeHtilenfo
159 *
160 * @return
161 * ADDR_E_RETURNCODE
162 ************************************************************************************************************************
163 */
164 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
165 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
166 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
167 ) const
168 {
169 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
170 pIn->swizzleMode);
171
172 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
173
174 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
175
176 if ((numPipeTotal == 1) && (numRbTotal == 1))
177 {
178 numCompressBlkPerMetaBlkLog2 = 10;
179 }
180 else
181 {
182 if (m_settings.applyAliasFix)
183 {
184 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
185 }
186 else
187 {
188 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
189 }
190 }
191
192 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
193
194 Dim3d metaBlkDim = {8, 8, 1};
195 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
196 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
197 UINT_32 heightAmp = totalAmpBits - widthAmp;
198 metaBlkDim.w <<= widthAmp;
199 metaBlkDim.h <<= heightAmp;
200
201 #if DEBUG
202 Dim3d metaBlkDimDbg = {8, 8, 1};
203 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
204 {
205 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
206 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
207 {
208 metaBlkDimDbg.h <<= 1;
209 }
210 else
211 {
212 metaBlkDimDbg.w <<= 1;
213 }
214 }
215 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
216 #endif
217
218 UINT_32 numMetaBlkX;
219 UINT_32 numMetaBlkY;
220 UINT_32 numMetaBlkZ;
221
222 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
223 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
224 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
225
226 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
227 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
228
229 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
230 {
231 align *= (numPipeTotal >> 1);
232 }
233
234 align = Max(align, metaBlkSize);
235
236 if (m_settings.metaBaseAlignFix)
237 {
238 align = Max(align, GetBlockSize(pIn->swizzleMode));
239 }
240
241 if (m_settings.htileAlignFix)
242 {
243 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
244 const INT_32 htileCachelineSizeLog2 = 11;
245 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
246
247 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
248
249 align <<= rbMaskPadding;
250 }
251
252 pOut->pitch = numMetaBlkX * metaBlkDim.w;
253 pOut->height = numMetaBlkY * metaBlkDim.h;
254 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
255
256 pOut->metaBlkWidth = metaBlkDim.w;
257 pOut->metaBlkHeight = metaBlkDim.h;
258 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
259
260 pOut->baseAlign = align;
261 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
262
263 return ADDR_OK;
264 }
265
266 /**
267 ************************************************************************************************************************
268 * Gfx9Lib::HwlComputeCmaskInfo
269 *
270 * @brief
271 * Interface function stub of AddrComputeCmaskInfo
272 *
273 * @return
274 * ADDR_E_RETURNCODE
275 ************************************************************************************************************************
276 */
277 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
278 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
279 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
280 ) const
281 {
282 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
283
284 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
285 pIn->swizzleMode);
286
287 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
288
289 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
290
291 if ((numPipeTotal == 1) && (numRbTotal == 1))
292 {
293 numCompressBlkPerMetaBlkLog2 = 13;
294 }
295 else
296 {
297 if (m_settings.applyAliasFix)
298 {
299 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
300 }
301 else
302 {
303 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
304 }
305
306 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
307 }
308
309 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
310
311 Dim2d metaBlkDim = {8, 8};
312 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
313 UINT_32 heightAmp = totalAmpBits >> 1;
314 UINT_32 widthAmp = totalAmpBits - heightAmp;
315 metaBlkDim.w <<= widthAmp;
316 metaBlkDim.h <<= heightAmp;
317
318 #if DEBUG
319 Dim2d metaBlkDimDbg = {8, 8};
320 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
321 {
322 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
323 {
324 metaBlkDimDbg.h <<= 1;
325 }
326 else
327 {
328 metaBlkDimDbg.w <<= 1;
329 }
330 }
331 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
332 #endif
333
334 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
335 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
336 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
337
338 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
339
340 if (m_settings.metaBaseAlignFix)
341 {
342 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
343 }
344
345 pOut->pitch = numMetaBlkX * metaBlkDim.w;
346 pOut->height = numMetaBlkY * metaBlkDim.h;
347 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
348 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
349 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
350
351 pOut->metaBlkWidth = metaBlkDim.w;
352 pOut->metaBlkHeight = metaBlkDim.h;
353
354 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
355
356 return ADDR_OK;
357 }
358
359 /**
360 ************************************************************************************************************************
361 * Gfx9Lib::GetMetaMipInfo
362 *
363 * @brief
364 * Get meta mip info
365 *
366 * @return
367 * N/A
368 ************************************************************************************************************************
369 */
370 VOID Gfx9Lib::GetMetaMipInfo(
371 UINT_32 numMipLevels, ///< [in] number of mip levels
372 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
373 BOOL_32 dataThick, ///< [in] data surface is thick
374 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
375 UINT_32 mip0Width, ///< [in] mip0 width
376 UINT_32 mip0Height, ///< [in] mip0 height
377 UINT_32 mip0Depth, ///< [in] mip0 depth
378 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
379 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
380 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
381 const
382 {
383 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
384 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
385 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
386 UINT_32 tailWidth = pMetaBlkDim->w;
387 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
388 UINT_32 tailDepth = pMetaBlkDim->d;
389 BOOL_32 inTail = FALSE;
390 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
391
392 if (numMipLevels > 1)
393 {
394 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
395 {
396 // Z major
397 major = ADDR_MAJOR_Z;
398 }
399 else if (numMetaBlkX >= numMetaBlkY)
400 {
401 // X major
402 major = ADDR_MAJOR_X;
403 }
404 else
405 {
406 // Y major
407 major = ADDR_MAJOR_Y;
408 }
409
410 inTail = ((mip0Width <= tailWidth) &&
411 (mip0Height <= tailHeight) &&
412 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
413
414 if (inTail == FALSE)
415 {
416 UINT_32 orderLimit;
417 UINT_32 *pMipDim;
418 UINT_32 *pOrderDim;
419
420 if (major == ADDR_MAJOR_Z)
421 {
422 // Z major
423 pMipDim = &numMetaBlkY;
424 pOrderDim = &numMetaBlkZ;
425 orderLimit = 4;
426 }
427 else if (major == ADDR_MAJOR_X)
428 {
429 // X major
430 pMipDim = &numMetaBlkY;
431 pOrderDim = &numMetaBlkX;
432 orderLimit = 4;
433 }
434 else
435 {
436 // Y major
437 pMipDim = &numMetaBlkX;
438 pOrderDim = &numMetaBlkY;
439 orderLimit = 2;
440 }
441
442 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
443 {
444 *pMipDim += 2;
445 }
446 else
447 {
448 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
449 }
450 }
451 }
452
453 if (pInfo != NULL)
454 {
455 UINT_32 mipWidth = mip0Width;
456 UINT_32 mipHeight = mip0Height;
457 UINT_32 mipDepth = mip0Depth;
458 Dim3d mipCoord = {0};
459
460 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
461 {
462 if (inTail)
463 {
464 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
465 pMetaBlkDim);
466 break;
467 }
468 else
469 {
470 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
471 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
472 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
473
474 pInfo[mip].inMiptail = FALSE;
475 pInfo[mip].startX = mipCoord.w;
476 pInfo[mip].startY = mipCoord.h;
477 pInfo[mip].startZ = mipCoord.d;
478 pInfo[mip].width = mipWidth;
479 pInfo[mip].height = mipHeight;
480 pInfo[mip].depth = dataThick ? mipDepth : 1;
481
482 if ((mip >= 3) || (mip & 1))
483 {
484 switch (major)
485 {
486 case ADDR_MAJOR_X:
487 mipCoord.w += mipWidth;
488 break;
489 case ADDR_MAJOR_Y:
490 mipCoord.h += mipHeight;
491 break;
492 case ADDR_MAJOR_Z:
493 mipCoord.d += mipDepth;
494 break;
495 default:
496 break;
497 }
498 }
499 else
500 {
501 switch (major)
502 {
503 case ADDR_MAJOR_X:
504 mipCoord.h += mipHeight;
505 break;
506 case ADDR_MAJOR_Y:
507 mipCoord.w += mipWidth;
508 break;
509 case ADDR_MAJOR_Z:
510 mipCoord.h += mipHeight;
511 break;
512 default:
513 break;
514 }
515 }
516
517 mipWidth = Max(mipWidth >> 1, 1u);
518 mipHeight = Max(mipHeight >> 1, 1u);
519 mipDepth = Max(mipDepth >> 1, 1u);
520
521 inTail = ((mipWidth <= tailWidth) &&
522 (mipHeight <= tailHeight) &&
523 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
524 }
525 }
526 }
527
528 *pNumMetaBlkX = numMetaBlkX;
529 *pNumMetaBlkY = numMetaBlkY;
530 *pNumMetaBlkZ = numMetaBlkZ;
531 }
532
533 /**
534 ************************************************************************************************************************
535 * Gfx9Lib::HwlComputeDccInfo
536 *
537 * @brief
538 * Interface function to compute DCC key info
539 *
540 * @return
541 * ADDR_E_RETURNCODE
542 ************************************************************************************************************************
543 */
544 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
545 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
546 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
547 ) const
548 {
549 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
550 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
551 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
552
553 if (dataLinear)
554 {
555 metaLinear = TRUE;
556 }
557 else if (metaLinear == TRUE)
558 {
559 pipeAligned = FALSE;
560 }
561
562 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
563
564 if (metaLinear)
565 {
566 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
567 ADDR_ASSERT_ALWAYS();
568
569 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
570 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
571 }
572 else
573 {
574 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
575
576 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
577
578 UINT_32 numFrags = Max(pIn->numFrags, 1u);
579 UINT_32 numSlices = Max(pIn->numSlices, 1u);
580
581 minMetaBlkSize /= numFrags;
582
583 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
584
585 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
586
587 if ((numPipeTotal > 1) || (numRbTotal > 1))
588 {
589 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
590
591 numCompressBlkPerMetaBlk =
592 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
593
594 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
595 {
596 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
597 }
598 }
599
600 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
601 Dim3d metaBlkDim = compressBlkDim;
602
603 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
604 {
605 if ((metaBlkDim.h < metaBlkDim.w) ||
606 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
607 {
608 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
609 {
610 metaBlkDim.h <<= 1;
611 }
612 else
613 {
614 metaBlkDim.d <<= 1;
615 }
616 }
617 else
618 {
619 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
620 {
621 metaBlkDim.w <<= 1;
622 }
623 else
624 {
625 metaBlkDim.d <<= 1;
626 }
627 }
628 }
629
630 UINT_32 numMetaBlkX;
631 UINT_32 numMetaBlkY;
632 UINT_32 numMetaBlkZ;
633
634 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
635 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
636 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
637
638 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
639
640 if (numFrags > m_maxCompFrag)
641 {
642 sizeAlign *= (numFrags / m_maxCompFrag);
643 }
644
645 if (m_settings.metaBaseAlignFix)
646 {
647 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
648 }
649
650 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
651 numCompressBlkPerMetaBlk * numFrags;
652 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
653 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
654
655 pOut->pitch = numMetaBlkX * metaBlkDim.w;
656 pOut->height = numMetaBlkY * metaBlkDim.h;
657 pOut->depth = numMetaBlkZ * metaBlkDim.d;
658
659 pOut->compressBlkWidth = compressBlkDim.w;
660 pOut->compressBlkHeight = compressBlkDim.h;
661 pOut->compressBlkDepth = compressBlkDim.d;
662
663 pOut->metaBlkWidth = metaBlkDim.w;
664 pOut->metaBlkHeight = metaBlkDim.h;
665 pOut->metaBlkDepth = metaBlkDim.d;
666
667 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
668 pOut->fastClearSizePerSlice =
669 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
670 }
671
672 return ADDR_OK;
673 }
674
675 /**
676 ************************************************************************************************************************
677 * Gfx9Lib::HwlComputeMaxBaseAlignments
678 *
679 * @brief
680 * Gets maximum alignments
681 * @return
682 * maximum alignments
683 ************************************************************************************************************************
684 */
685 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
686 {
687 return Size64K;
688 }
689
690 /**
691 ************************************************************************************************************************
692 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
693 *
694 * @brief
695 * Gets maximum alignments for metadata
696 * @return
697 * maximum alignments for metadata
698 ************************************************************************************************************************
699 */
700 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
701 {
702 // Max base alignment for Htile
703 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
704 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
705
706 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
707 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
708 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
709 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
710
711 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
712
713 if (maxNumPipeTotal > 2)
714 {
715 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
716 }
717
718 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
719
720 if (m_settings.metaBaseAlignFix)
721 {
722 maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
723 }
724
725 if (m_settings.htileAlignFix)
726 {
727 maxBaseAlignHtile *= maxNumPipeTotal;
728 }
729
730 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
731
732 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
733 UINT_32 maxBaseAlignDcc3D = 65536;
734
735 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
736 {
737 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
738 }
739
740 // Max base alignment for Msaa Dcc
741 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
742
743 if (m_settings.metaBaseAlignFix)
744 {
745 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
746 }
747
748 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
749 }
750
751 /**
752 ************************************************************************************************************************
753 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
754 *
755 * @brief
756 * Interface function stub of AddrComputeCmaskAddrFromCoord
757 *
758 * @return
759 * ADDR_E_RETURNCODE
760 ************************************************************************************************************************
761 */
762 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
763 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
764 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
765 {
766 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
767 input.size = sizeof(input);
768 input.cMaskFlags = pIn->cMaskFlags;
769 input.colorFlags = pIn->colorFlags;
770 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
771 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
772 input.numSlices = Max(pIn->numSlices, 1u);
773 input.swizzleMode = pIn->swizzleMode;
774 input.resourceType = pIn->resourceType;
775
776 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
777 output.size = sizeof(output);
778
779 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
780
781 if (returnCode == ADDR_OK)
782 {
783 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
784 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
785 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
786 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
787
788 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
789 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
790 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
791
792 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
793
794 UINT_32 xb = pIn->x / output.metaBlkWidth;
795 UINT_32 yb = pIn->y / output.metaBlkHeight;
796 UINT_32 zb = pIn->slice;
797
798 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
799 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
800 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
801
802 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
803 UINT_64 address = pMetaEq->solve(coords);
804
805 pOut->addr = address >> 1;
806 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
807
808 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
809 pIn->swizzleMode);
810
811 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
812
813 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
814 }
815
816 return returnCode;
817 }
818
819 /**
820 ************************************************************************************************************************
821 * Gfx9Lib::HwlComputeHtileAddrFromCoord
822 *
823 * @brief
824 * Interface function stub of AddrComputeHtileAddrFromCoord
825 *
826 * @return
827 * ADDR_E_RETURNCODE
828 ************************************************************************************************************************
829 */
830 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
831 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
832 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
833 {
834 ADDR_E_RETURNCODE returnCode = ADDR_OK;
835
836 if (pIn->numMipLevels > 1)
837 {
838 returnCode = ADDR_NOTIMPLEMENTED;
839 }
840 else
841 {
842 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
843 input.size = sizeof(input);
844 input.hTileFlags = pIn->hTileFlags;
845 input.depthFlags = pIn->depthflags;
846 input.swizzleMode = pIn->swizzleMode;
847 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
848 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
849 input.numSlices = Max(pIn->numSlices, 1u);
850 input.numMipLevels = Max(pIn->numMipLevels, 1u);
851
852 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
853 output.size = sizeof(output);
854
855 returnCode = ComputeHtileInfo(&input, &output);
856
857 if (returnCode == ADDR_OK)
858 {
859 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
860 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
861 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
862 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
863
864 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
865 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
866 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
867
868 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
869
870 UINT_32 xb = pIn->x / output.metaBlkWidth;
871 UINT_32 yb = pIn->y / output.metaBlkHeight;
872 UINT_32 zb = pIn->slice;
873
874 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
875 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
876 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
877
878 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
879 UINT_64 address = pMetaEq->solve(coords);
880
881 pOut->addr = address >> 1;
882
883 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
884 pIn->swizzleMode);
885
886 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
887
888 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
889 }
890 }
891
892 return returnCode;
893 }
894
895 /**
896 ************************************************************************************************************************
897 * Gfx9Lib::HwlComputeHtileCoordFromAddr
898 *
899 * @brief
900 * Interface function stub of AddrComputeHtileCoordFromAddr
901 *
902 * @return
903 * ADDR_E_RETURNCODE
904 ************************************************************************************************************************
905 */
906 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
907 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
908 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
909 {
910 ADDR_E_RETURNCODE returnCode = ADDR_OK;
911
912 if (pIn->numMipLevels > 1)
913 {
914 returnCode = ADDR_NOTIMPLEMENTED;
915 }
916 else
917 {
918 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
919 input.size = sizeof(input);
920 input.hTileFlags = pIn->hTileFlags;
921 input.swizzleMode = pIn->swizzleMode;
922 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
923 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
924 input.numSlices = Max(pIn->numSlices, 1u);
925 input.numMipLevels = Max(pIn->numMipLevels, 1u);
926
927 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
928 output.size = sizeof(output);
929
930 returnCode = ComputeHtileInfo(&input, &output);
931
932 if (returnCode == ADDR_OK)
933 {
934 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
935 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
936 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
937 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
938
939 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
940 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
941 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
942
943 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
944
945 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
946 pIn->swizzleMode);
947
948 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
949
950 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
951
952 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
953 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
954
955 UINT_32 coords[NUM_DIMS];
956 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
957
958 pOut->slice = coords[DIM_M] / sliceSizeInBlock;
959 pOut->y = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
960 pOut->x = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
961 }
962 }
963
964 return returnCode;
965 }
966
967 /**
968 ************************************************************************************************************************
969 * Gfx9Lib::HwlComputeDccAddrFromCoord
970 *
971 * @brief
972 * Interface function stub of AddrComputeDccAddrFromCoord
973 *
974 * @return
975 * ADDR_E_RETURNCODE
976 ************************************************************************************************************************
977 */
978 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
979 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
980 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
981 {
982 ADDR_E_RETURNCODE returnCode = ADDR_OK;
983
984 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
985 {
986 returnCode = ADDR_NOTIMPLEMENTED;
987 }
988 else
989 {
990 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
991 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
992 UINT_32 metaBlkWidthLog2 = Log2(pIn->metaBlkWidth);
993 UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
994 UINT_32 metaBlkDepthLog2 = Log2(pIn->metaBlkDepth);
995 UINT_32 compBlkWidthLog2 = Log2(pIn->compressBlkWidth);
996 UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
997 UINT_32 compBlkDepthLog2 = Log2(pIn->compressBlkDepth);
998
999 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1000 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1001 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1002 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1003
1004 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1005
1006 UINT_32 xb = pIn->x / pIn->metaBlkWidth;
1007 UINT_32 yb = pIn->y / pIn->metaBlkHeight;
1008 UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
1009
1010 UINT_32 pitchInBlock = pIn->pitch / pIn->metaBlkWidth;
1011 UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
1012 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1013
1014 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
1015 UINT_64 address = pMetaEq->solve(coords);
1016
1017 pOut->addr = address >> 1;
1018
1019 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1020 pIn->swizzleMode);
1021
1022 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1023
1024 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1025 }
1026
1027 return returnCode;
1028 }
1029
1030 /**
1031 ************************************************************************************************************************
1032 * Gfx9Lib::HwlInitGlobalParams
1033 *
1034 * @brief
1035 * Initializes global parameters
1036 *
1037 * @return
1038 * TRUE if all settings are valid
1039 *
1040 ************************************************************************************************************************
1041 */
1042 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1043 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1044 {
1045 BOOL_32 valid = TRUE;
1046
1047 if (m_settings.isArcticIsland)
1048 {
1049 GB_ADDR_CONFIG_gfx9 gbAddrConfig;
1050
1051 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1052
1053 // These values are copied from CModel code
1054 switch (gbAddrConfig.bits.NUM_PIPES)
1055 {
1056 case ADDR_CONFIG_1_PIPE:
1057 m_pipes = 1;
1058 m_pipesLog2 = 0;
1059 break;
1060 case ADDR_CONFIG_2_PIPE:
1061 m_pipes = 2;
1062 m_pipesLog2 = 1;
1063 break;
1064 case ADDR_CONFIG_4_PIPE:
1065 m_pipes = 4;
1066 m_pipesLog2 = 2;
1067 break;
1068 case ADDR_CONFIG_8_PIPE:
1069 m_pipes = 8;
1070 m_pipesLog2 = 3;
1071 break;
1072 case ADDR_CONFIG_16_PIPE:
1073 m_pipes = 16;
1074 m_pipesLog2 = 4;
1075 break;
1076 case ADDR_CONFIG_32_PIPE:
1077 m_pipes = 32;
1078 m_pipesLog2 = 5;
1079 break;
1080 default:
1081 ADDR_ASSERT_ALWAYS();
1082 break;
1083 }
1084
1085 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1086 {
1087 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1088 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1089 m_pipeInterleaveLog2 = 8;
1090 break;
1091 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1092 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1093 m_pipeInterleaveLog2 = 9;
1094 break;
1095 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1096 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1097 m_pipeInterleaveLog2 = 10;
1098 break;
1099 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1100 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1101 m_pipeInterleaveLog2 = 11;
1102 break;
1103 default:
1104 ADDR_ASSERT_ALWAYS();
1105 break;
1106 }
1107
1108 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1109 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1110 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1111
1112 switch (gbAddrConfig.bits.NUM_BANKS)
1113 {
1114 case ADDR_CONFIG_1_BANK:
1115 m_banks = 1;
1116 m_banksLog2 = 0;
1117 break;
1118 case ADDR_CONFIG_2_BANK:
1119 m_banks = 2;
1120 m_banksLog2 = 1;
1121 break;
1122 case ADDR_CONFIG_4_BANK:
1123 m_banks = 4;
1124 m_banksLog2 = 2;
1125 break;
1126 case ADDR_CONFIG_8_BANK:
1127 m_banks = 8;
1128 m_banksLog2 = 3;
1129 break;
1130 case ADDR_CONFIG_16_BANK:
1131 m_banks = 16;
1132 m_banksLog2 = 4;
1133 break;
1134 default:
1135 ADDR_ASSERT_ALWAYS();
1136 break;
1137 }
1138
1139 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1140 {
1141 case ADDR_CONFIG_1_SHADER_ENGINE:
1142 m_se = 1;
1143 m_seLog2 = 0;
1144 break;
1145 case ADDR_CONFIG_2_SHADER_ENGINE:
1146 m_se = 2;
1147 m_seLog2 = 1;
1148 break;
1149 case ADDR_CONFIG_4_SHADER_ENGINE:
1150 m_se = 4;
1151 m_seLog2 = 2;
1152 break;
1153 case ADDR_CONFIG_8_SHADER_ENGINE:
1154 m_se = 8;
1155 m_seLog2 = 3;
1156 break;
1157 default:
1158 ADDR_ASSERT_ALWAYS();
1159 break;
1160 }
1161
1162 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1163 {
1164 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1165 m_rbPerSe = 1;
1166 m_rbPerSeLog2 = 0;
1167 break;
1168 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1169 m_rbPerSe = 2;
1170 m_rbPerSeLog2 = 1;
1171 break;
1172 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1173 m_rbPerSe = 4;
1174 m_rbPerSeLog2 = 2;
1175 break;
1176 default:
1177 ADDR_ASSERT_ALWAYS();
1178 break;
1179 }
1180
1181 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1182 {
1183 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1184 m_maxCompFrag = 1;
1185 m_maxCompFragLog2 = 0;
1186 break;
1187 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1188 m_maxCompFrag = 2;
1189 m_maxCompFragLog2 = 1;
1190 break;
1191 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1192 m_maxCompFrag = 4;
1193 m_maxCompFragLog2 = 2;
1194 break;
1195 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1196 m_maxCompFrag = 8;
1197 m_maxCompFragLog2 = 3;
1198 break;
1199 default:
1200 ADDR_ASSERT_ALWAYS();
1201 break;
1202 }
1203
1204 if ((m_rbPerSeLog2 == 1) &&
1205 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1206 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1207 {
1208 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1209 ADDR_ASSERT(m_settings.isRaven == FALSE);
1210
1211 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1212
1213 if (m_settings.isVega12)
1214 {
1215 m_settings.htileCacheRbConflict = 1;
1216 }
1217 }
1218
1219 // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1220 m_blockVarSizeLog2 = 0;
1221 }
1222 else
1223 {
1224 valid = FALSE;
1225 ADDR_NOT_IMPLEMENTED();
1226 }
1227
1228 if (valid)
1229 {
1230 InitEquationTable();
1231 }
1232
1233 return valid;
1234 }
1235
1236 /**
1237 ************************************************************************************************************************
1238 * Gfx9Lib::HwlConvertChipFamily
1239 *
1240 * @brief
1241 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1242 * @return
1243 * ChipFamily
1244 ************************************************************************************************************************
1245 */
1246 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1247 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1248 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1249 {
1250 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1251
1252 switch (uChipFamily)
1253 {
1254 case FAMILY_AI:
1255 m_settings.isArcticIsland = 1;
1256 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1257 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1258 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1259 m_settings.isDce12 = 1;
1260
1261 if (m_settings.isVega10 == 0)
1262 {
1263 m_settings.htileAlignFix = 1;
1264 m_settings.applyAliasFix = 1;
1265 }
1266
1267 m_settings.metaBaseAlignFix = 1;
1268
1269 m_settings.depthPipeXorDisable = 1;
1270 break;
1271 case FAMILY_RV:
1272 m_settings.isArcticIsland = 1;
1273
1274 if (ASICREV_IS_RAVEN(uChipRevision))
1275 {
1276 m_settings.isRaven = 1;
1277
1278 m_settings.depthPipeXorDisable = 1;
1279 }
1280
1281 if (ASICREV_IS_RAVEN2(uChipRevision))
1282 {
1283 m_settings.isRaven = 1;
1284 }
1285
1286 if (m_settings.isRaven == 0)
1287 {
1288 m_settings.htileAlignFix = 1;
1289 m_settings.applyAliasFix = 1;
1290 }
1291
1292 if (ASICREV_IS_RENOIR(uChipRevision))
1293 {
1294 m_settings.isRaven = 1;
1295 }
1296
1297 m_settings.isDcn1 = m_settings.isRaven;
1298
1299 m_settings.metaBaseAlignFix = 1;
1300 break;
1301
1302 default:
1303 ADDR_ASSERT(!"This should be a Fusion");
1304 break;
1305 }
1306
1307 return family;
1308 }
1309
1310 /**
1311 ************************************************************************************************************************
1312 * Gfx9Lib::InitRbEquation
1313 *
1314 * @brief
1315 * Init RB equation
1316 * @return
1317 * N/A
1318 ************************************************************************************************************************
1319 */
1320 VOID Gfx9Lib::GetRbEquation(
1321 CoordEq* pRbEq, ///< [out] rb equation
1322 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1323 UINT_32 numSeLog2) ///< [in] number of shader engine
1324 const
1325 {
1326 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1327 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1328 Coordinate cx(DIM_X, rbRegion);
1329 Coordinate cy(DIM_Y, rbRegion);
1330
1331 UINT_32 start = 0;
1332 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1333
1334 // Clear the rb equation
1335 pRbEq->resize(0);
1336 pRbEq->resize(numRbTotalLog2);
1337
1338 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1339 {
1340 // Special case when more than 1 SE, and 2 RB per SE
1341 (*pRbEq)[0].add(cx);
1342 (*pRbEq)[0].add(cy);
1343 cx++;
1344 cy++;
1345
1346 if (m_settings.applyAliasFix == false)
1347 {
1348 (*pRbEq)[0].add(cy);
1349 }
1350
1351 (*pRbEq)[0].add(cy);
1352 start++;
1353 }
1354
1355 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1356
1357 for (UINT_32 i = 0; i < numBits; i++)
1358 {
1359 UINT_32 idx =
1360 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1361
1362 if ((i % 2) == 1)
1363 {
1364 (*pRbEq)[idx].add(cx);
1365 cx++;
1366 }
1367 else
1368 {
1369 (*pRbEq)[idx].add(cy);
1370 cy++;
1371 }
1372 }
1373 }
1374
1375 /**
1376 ************************************************************************************************************************
1377 * Gfx9Lib::GetDataEquation
1378 *
1379 * @brief
1380 * Get data equation for fmask and Z
1381 * @return
1382 * N/A
1383 ************************************************************************************************************************
1384 */
1385 VOID Gfx9Lib::GetDataEquation(
1386 CoordEq* pDataEq, ///< [out] data surface equation
1387 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1388 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1389 AddrResourceType resourceType, ///< [in] data surface resource type
1390 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1391 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1392 const
1393 {
1394 Coordinate cx(DIM_X, 0);
1395 Coordinate cy(DIM_Y, 0);
1396 Coordinate cz(DIM_Z, 0);
1397 Coordinate cs(DIM_S, 0);
1398
1399 // Clear the equation
1400 pDataEq->resize(0);
1401 pDataEq->resize(27);
1402
1403 if (dataSurfaceType == Gfx9DataColor)
1404 {
1405 if (IsLinear(swizzleMode))
1406 {
1407 Coordinate cm(DIM_M, 0);
1408
1409 pDataEq->resize(49);
1410
1411 for (UINT_32 i = 0; i < 49; i++)
1412 {
1413 (*pDataEq)[i].add(cm);
1414 cm++;
1415 }
1416 }
1417 else if (IsThick(resourceType, swizzleMode))
1418 {
1419 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1420 UINT_32 i;
1421 if (IsStandardSwizzle(resourceType, swizzleMode))
1422 {
1423 // Standard 3d swizzle
1424 // Fill in bottom x bits
1425 for (i = elementBytesLog2; i < 4; i++)
1426 {
1427 (*pDataEq)[i].add(cx);
1428 cx++;
1429 }
1430 // Fill in 2 bits of y and then z
1431 for (i = 4; i < 6; i++)
1432 {
1433 (*pDataEq)[i].add(cy);
1434 cy++;
1435 }
1436 for (i = 6; i < 8; i++)
1437 {
1438 (*pDataEq)[i].add(cz);
1439 cz++;
1440 }
1441 if (elementBytesLog2 < 2)
1442 {
1443 // fill in z & y bit
1444 (*pDataEq)[8].add(cz);
1445 (*pDataEq)[9].add(cy);
1446 cz++;
1447 cy++;
1448 }
1449 else if (elementBytesLog2 == 2)
1450 {
1451 // fill in y and x bit
1452 (*pDataEq)[8].add(cy);
1453 (*pDataEq)[9].add(cx);
1454 cy++;
1455 cx++;
1456 }
1457 else
1458 {
1459 // fill in 2 x bits
1460 (*pDataEq)[8].add(cx);
1461 cx++;
1462 (*pDataEq)[9].add(cx);
1463 cx++;
1464 }
1465 }
1466 else
1467 {
1468 // Z 3d swizzle
1469 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1470 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1471 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1472 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1473 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1474 {
1475 (*pDataEq)[i].add(cz);
1476 cz++;
1477 }
1478 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1479 {
1480 // add an x and z
1481 (*pDataEq)[6].add(cx);
1482 (*pDataEq)[7].add(cz);
1483 cx++;
1484 cz++;
1485 }
1486 else if (elementBytesLog2 == 2)
1487 {
1488 // add a y and z
1489 (*pDataEq)[6].add(cy);
1490 (*pDataEq)[7].add(cz);
1491 cy++;
1492 cz++;
1493 }
1494 // add y and x
1495 (*pDataEq)[8].add(cy);
1496 (*pDataEq)[9].add(cx);
1497 cy++;
1498 cx++;
1499 }
1500 // Fill in bit 10 and up
1501 pDataEq->mort3d( cz, cy, cx, 10 );
1502 }
1503 else if (IsThin(resourceType, swizzleMode))
1504 {
1505 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1506 // Color 2D
1507 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1508 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1509 UINT_32 i;
1510 // Fill in bottom x bits
1511 for (i = elementBytesLog2; i < 4; i++)
1512 {
1513 (*pDataEq)[i].add(cx);
1514 cx++;
1515 }
1516 // Fill in bottom y bits
1517 for (i = 4; i < 4 + microYBits; i++)
1518 {
1519 (*pDataEq)[i].add(cy);
1520 cy++;
1521 }
1522 // Fill in last of the micro_x bits
1523 for (i = 4 + microYBits; i < 8; i++)
1524 {
1525 (*pDataEq)[i].add(cx);
1526 cx++;
1527 }
1528 // Fill in x/y bits below sample split
1529 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1530 // Fill in sample bits
1531 for (i = 0; i < numSamplesLog2; i++)
1532 {
1533 cs.set(DIM_S, i);
1534 (*pDataEq)[tileSplitStart + i].add(cs);
1535 }
1536 // Fill in x/y bits above sample split
1537 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1538 {
1539 pDataEq->mort2d(cx, cy, blockSizeLog2);
1540 }
1541 else
1542 {
1543 pDataEq->mort2d(cy, cx, blockSizeLog2);
1544 }
1545 }
1546 else
1547 {
1548 ADDR_ASSERT_ALWAYS();
1549 }
1550 }
1551 else
1552 {
1553 // Fmask or depth
1554 UINT_32 sampleStart = elementBytesLog2;
1555 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1556 UINT_32 ymajStart = 6 + numSamplesLog2;
1557
1558 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1559 {
1560 cs.set(DIM_S, s);
1561 (*pDataEq)[sampleStart + s].add(cs);
1562 }
1563
1564 // Put in the x-major order pixel bits
1565 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1566 // Put in the y-major order pixel bits
1567 pDataEq->mort2d(cy, cx, ymajStart);
1568 }
1569 }
1570
1571 /**
1572 ************************************************************************************************************************
1573 * Gfx9Lib::GetPipeEquation
1574 *
1575 * @brief
1576 * Get pipe equation
1577 * @return
1578 * N/A
1579 ************************************************************************************************************************
1580 */
1581 VOID Gfx9Lib::GetPipeEquation(
1582 CoordEq* pPipeEq, ///< [out] pipe equation
1583 CoordEq* pDataEq, ///< [in] data equation
1584 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1585 UINT_32 numPipeLog2, ///< [in] number of pipes
1586 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1587 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1588 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1589 AddrResourceType resourceType ///< [in] data surface resource type
1590 ) const
1591 {
1592 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1593 CoordEq dataEq;
1594
1595 pDataEq->copy(dataEq);
1596
1597 if (dataSurfaceType == Gfx9DataColor)
1598 {
1599 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1600 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1601 }
1602
1603 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1604
1605 // This section should only apply to z/stencil, maybe fmask
1606 // If the pipe bit is below the comp block size,
1607 // then keep moving up the address until we find a bit that is above
1608 UINT_32 pipeStart = 0;
1609
1610 if (dataSurfaceType != Gfx9DataColor)
1611 {
1612 Coordinate tileMin(DIM_X, 3);
1613
1614 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1615 {
1616 pipeStart++;
1617 }
1618
1619 // if pipe is 0, then the first pipe bit is above the comp block size,
1620 // so we don't need to do anything
1621 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1622 // we will get the same pipe equation
1623 if (pipeStart != 0)
1624 {
1625 for (UINT_32 i = 0; i < numPipeLog2; i++)
1626 {
1627 // Copy the jth bit above pipe interleave to the current pipe equation bit
1628 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1629 }
1630 }
1631 }
1632
1633 if (IsPrt(swizzleMode))
1634 {
1635 // Clear out bits above the block size if prt's are enabled
1636 dataEq.resize(blockSizeLog2);
1637 dataEq.resize(48);
1638 }
1639
1640 if (IsXor(swizzleMode))
1641 {
1642 CoordEq xorMask;
1643
1644 if (IsThick(resourceType, swizzleMode))
1645 {
1646 CoordEq xorMask2;
1647
1648 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1649
1650 xorMask.resize(numPipeLog2);
1651
1652 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1653 {
1654 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1655 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1656 }
1657 }
1658 else
1659 {
1660 // Xor in the bits above the pipe+gpu bits
1661 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1662
1663 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1664 {
1665 Coordinate co;
1666 CoordEq xorMask2;
1667 // if 1xaa and not prt, then xor in the z bits
1668 xorMask2.resize(0);
1669 xorMask2.resize(numPipeLog2);
1670 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1671 {
1672 co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1673 xorMask2[pipeIdx].add(co);
1674 }
1675
1676 pPipeEq->xorin(xorMask2);
1677 }
1678 }
1679
1680 xorMask.reverse();
1681 pPipeEq->xorin(xorMask);
1682 }
1683 }
1684 /**
1685 ************************************************************************************************************************
1686 * Gfx9Lib::GetMetaEquation
1687 *
1688 * @brief
1689 * Get meta equation for cmask/htile/DCC
1690 * @return
1691 * Pointer to a calculated meta equation
1692 ************************************************************************************************************************
1693 */
1694 const CoordEq* Gfx9Lib::GetMetaEquation(
1695 const MetaEqParams& metaEqParams)
1696 {
1697 UINT_32 cachedMetaEqIndex;
1698
1699 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1700 {
1701 if (memcmp(&metaEqParams,
1702 &m_cachedMetaEqKey[cachedMetaEqIndex],
1703 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1704 {
1705 break;
1706 }
1707 }
1708
1709 CoordEq* pMetaEq = NULL;
1710
1711 if (cachedMetaEqIndex < MaxCachedMetaEq)
1712 {
1713 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1714 }
1715 else
1716 {
1717 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1718
1719 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1720
1721 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1722
1723 GenMetaEquation(pMetaEq,
1724 metaEqParams.maxMip,
1725 metaEqParams.elementBytesLog2,
1726 metaEqParams.numSamplesLog2,
1727 metaEqParams.metaFlag,
1728 metaEqParams.dataSurfaceType,
1729 metaEqParams.swizzleMode,
1730 metaEqParams.resourceType,
1731 metaEqParams.metaBlkWidthLog2,
1732 metaEqParams.metaBlkHeightLog2,
1733 metaEqParams.metaBlkDepthLog2,
1734 metaEqParams.compBlkWidthLog2,
1735 metaEqParams.compBlkHeightLog2,
1736 metaEqParams.compBlkDepthLog2);
1737 }
1738
1739 return pMetaEq;
1740 }
1741
1742 /**
1743 ************************************************************************************************************************
1744 * Gfx9Lib::GenMetaEquation
1745 *
1746 * @brief
1747 * Get meta equation for cmask/htile/DCC
1748 * @return
1749 * N/A
1750 ************************************************************************************************************************
1751 */
1752 VOID Gfx9Lib::GenMetaEquation(
1753 CoordEq* pMetaEq, ///< [out] meta equation
1754 UINT_32 maxMip, ///< [in] max mip Id
1755 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1756 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1757 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1758 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1759 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1760 AddrResourceType resourceType, ///< [in] data surface resource type
1761 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1762 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1763 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1764 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1765 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1766 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1767 const
1768 {
1769 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1770 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1771
1772 // Get the correct data address and rb equation
1773 CoordEq dataEq;
1774 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1775 elementBytesLog2, numSamplesLog2);
1776
1777 // Get pipe and rb equations
1778 CoordEq pipeEquation;
1779 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1780 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1781 numPipeTotalLog2 = pipeEquation.getsize();
1782
1783 if (metaFlag.linear)
1784 {
1785 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1786 ADDR_ASSERT_ALWAYS();
1787
1788 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1789
1790 dataEq.copy(*pMetaEq);
1791
1792 if (IsLinear(swizzleMode))
1793 {
1794 if (metaFlag.pipeAligned)
1795 {
1796 // Remove the pipe bits
1797 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1798 pMetaEq->shift(-shift, pipeInterleaveLog2);
1799 }
1800 // Divide by comp block size, which for linear (which is always color) is 256 B
1801 pMetaEq->shift(-8);
1802
1803 if (metaFlag.pipeAligned)
1804 {
1805 // Put pipe bits back in
1806 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1807
1808 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1809 {
1810 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1811 }
1812 }
1813 }
1814
1815 pMetaEq->shift(1);
1816 }
1817 else
1818 {
1819 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1820 UINT_32 compFragLog2 =
1821 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1822 maxCompFragLog2 : numSamplesLog2;
1823
1824 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1825
1826 // Make sure the metaaddr is cleared
1827 pMetaEq->resize(0);
1828 pMetaEq->resize(27);
1829
1830 if (IsThick(resourceType, swizzleMode))
1831 {
1832 Coordinate cx(DIM_X, 0);
1833 Coordinate cy(DIM_Y, 0);
1834 Coordinate cz(DIM_Z, 0);
1835
1836 if (maxMip > 0)
1837 {
1838 pMetaEq->mort3d(cy, cx, cz);
1839 }
1840 else
1841 {
1842 pMetaEq->mort3d(cx, cy, cz);
1843 }
1844 }
1845 else
1846 {
1847 Coordinate cx(DIM_X, 0);
1848 Coordinate cy(DIM_Y, 0);
1849 Coordinate cs;
1850
1851 if (maxMip > 0)
1852 {
1853 pMetaEq->mort2d(cy, cx, compFragLog2);
1854 }
1855 else
1856 {
1857 pMetaEq->mort2d(cx, cy, compFragLog2);
1858 }
1859
1860 //------------------------------------------------------------------------------------------------------------------------
1861 // Put the compressible fragments at the lsb
1862 // the uncompressible frags will be at the msb of the micro address
1863 //------------------------------------------------------------------------------------------------------------------------
1864 for (UINT_32 s = 0; s < compFragLog2; s++)
1865 {
1866 cs.set(DIM_S, s);
1867 (*pMetaEq)[s].add(cs);
1868 }
1869 }
1870
1871 // Keep a copy of the pipe equations
1872 CoordEq origPipeEquation;
1873 pipeEquation.copy(origPipeEquation);
1874
1875 Coordinate co;
1876 // filter out everything under the compressed block size
1877 co.set(DIM_X, compBlkWidthLog2);
1878 pMetaEq->Filter('<', co, 0, DIM_X);
1879 co.set(DIM_Y, compBlkHeightLog2);
1880 pMetaEq->Filter('<', co, 0, DIM_Y);
1881 co.set(DIM_Z, compBlkDepthLog2);
1882 pMetaEq->Filter('<', co, 0, DIM_Z);
1883
1884 // For non-color, filter out sample bits
1885 if (dataSurfaceType != Gfx9DataColor)
1886 {
1887 co.set(DIM_X, 0);
1888 pMetaEq->Filter('<', co, 0, DIM_S);
1889 }
1890
1891 // filter out everything above the metablock size
1892 co.set(DIM_X, metaBlkWidthLog2 - 1);
1893 pMetaEq->Filter('>', co, 0, DIM_X);
1894 co.set(DIM_Y, metaBlkHeightLog2 - 1);
1895 pMetaEq->Filter('>', co, 0, DIM_Y);
1896 co.set(DIM_Z, metaBlkDepthLog2 - 1);
1897 pMetaEq->Filter('>', co, 0, DIM_Z);
1898
1899 // filter out everything above the metablock size for the channel bits
1900 co.set(DIM_X, metaBlkWidthLog2 - 1);
1901 pipeEquation.Filter('>', co, 0, DIM_X);
1902 co.set(DIM_Y, metaBlkHeightLog2 - 1);
1903 pipeEquation.Filter('>', co, 0, DIM_Y);
1904 co.set(DIM_Z, metaBlkDepthLog2 - 1);
1905 pipeEquation.Filter('>', co, 0, DIM_Z);
1906
1907 // Make sure we still have the same number of channel bits
1908 if (pipeEquation.getsize() != numPipeTotalLog2)
1909 {
1910 ADDR_ASSERT_ALWAYS();
1911 }
1912
1913 // Loop through all channel and rb bits,
1914 // and make sure these components exist in the metadata address
1915 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1916 {
1917 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1918 {
1919 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1920 {
1921 ADDR_ASSERT_ALWAYS();
1922 }
1923 }
1924 }
1925
1926 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1927 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1928 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1929 CoordEq origRbEquation;
1930
1931 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1932
1933 CoordEq rbEquation = origRbEquation;
1934
1935 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1936 {
1937 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1938 {
1939 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1940 {
1941 ADDR_ASSERT_ALWAYS();
1942 }
1943 }
1944 }
1945
1946 if (m_settings.applyAliasFix)
1947 {
1948 co.set(DIM_Z, -1);
1949 }
1950
1951 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1952 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1953 {
1954 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1955 {
1956 BOOL_32 isRbEquationInPipeEquation = FALSE;
1957
1958 if (m_settings.applyAliasFix)
1959 {
1960 CoordTerm filteredPipeEq;
1961 filteredPipeEq = pipeEquation[j];
1962
1963 filteredPipeEq.Filter('>', co, 0, DIM_Z);
1964
1965 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1966 }
1967 else
1968 {
1969 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1970 }
1971
1972 if (isRbEquationInPipeEquation)
1973 {
1974 rbEquation[i].Clear();
1975 }
1976 }
1977 }
1978
1979 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1980
1981 // Loop through each bit of the channel, get the smallest coordinate,
1982 // and remove it from the metaaddr, and rb_equation
1983 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1984 {
1985 pipeEquation[i].getsmallest(co);
1986
1987 UINT_32 old_size = pMetaEq->getsize();
1988 pMetaEq->Filter('=', co);
1989 UINT_32 new_size = pMetaEq->getsize();
1990 if (new_size != old_size-1)
1991 {
1992 ADDR_ASSERT_ALWAYS();
1993 }
1994 pipeEquation.remove(co);
1995 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
1996 {
1997 if (rbEquation[j].remove(co))
1998 {
1999 // if we actually removed something from this bit, then add the remaining
2000 // channel bits, as these can be removed for this bit
2001 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2002 {
2003 if (pipeEquation[i][k] != co)
2004 {
2005 rbEquation[j].add(pipeEquation[i][k]);
2006 rbAppendedWithPipeBits[j] = true;
2007 }
2008 }
2009 }
2010 }
2011 }
2012
2013 // Loop through the rb bits and see what remain;
2014 // filter out the smallest coordinate if it remains
2015 UINT_32 rbBitsLeft = 0;
2016 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2017 {
2018 BOOL_32 isRbEqAppended = FALSE;
2019
2020 if (m_settings.applyAliasFix)
2021 {
2022 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2023 }
2024 else
2025 {
2026 isRbEqAppended = (rbEquation[i].getsize() > 0);
2027 }
2028
2029 if (isRbEqAppended)
2030 {
2031 rbBitsLeft++;
2032 rbEquation[i].getsmallest(co);
2033 UINT_32 old_size = pMetaEq->getsize();
2034 pMetaEq->Filter('=', co);
2035 UINT_32 new_size = pMetaEq->getsize();
2036 if (new_size != old_size - 1)
2037 {
2038 // assert warning
2039 }
2040 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2041 {
2042 if (rbEquation[j].remove(co))
2043 {
2044 // if we actually removed something from this bit, then add the remaining
2045 // rb bits, as these can be removed for this bit
2046 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2047 {
2048 if (rbEquation[i][k] != co)
2049 {
2050 rbEquation[j].add(rbEquation[i][k]);
2051 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2052 }
2053 }
2054 }
2055 }
2056 }
2057 }
2058
2059 // capture the size of the metaaddr
2060 UINT_32 metaSize = pMetaEq->getsize();
2061 // resize to 49 bits...make this a nibble address
2062 pMetaEq->resize(49);
2063 // Concatenate the macro address above the current address
2064 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2065 {
2066 co.set(DIM_M, j);
2067 (*pMetaEq)[i].add(co);
2068 }
2069
2070 // Multiply by meta element size (in nibbles)
2071 if (dataSurfaceType == Gfx9DataColor)
2072 {
2073 pMetaEq->shift(1);
2074 }
2075 else if (dataSurfaceType == Gfx9DataDepthStencil)
2076 {
2077 pMetaEq->shift(3);
2078 }
2079
2080 //------------------------------------------------------------------------------------------
2081 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2082 // Shift up from pipe interleave number of channel
2083 // and rb bits left, and uncompressed fragments
2084 //------------------------------------------------------------------------------------------
2085
2086 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2087
2088 // Put in the channel bits
2089 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2090 {
2091 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2092 }
2093
2094 // Put in remaining rb bits
2095 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2096 {
2097 BOOL_32 isRbEqAppended = FALSE;
2098
2099 if (m_settings.applyAliasFix)
2100 {
2101 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2102 }
2103 else
2104 {
2105 isRbEqAppended = (rbEquation[i].getsize() > 0);
2106 }
2107
2108 if (isRbEqAppended)
2109 {
2110 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2111 // Mark any rb bit we add in to the rb mask
2112 j++;
2113 }
2114 }
2115
2116 //------------------------------------------------------------------------------------------
2117 // Put in the uncompressed fragment bits
2118 //------------------------------------------------------------------------------------------
2119 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2120 {
2121 co.set(DIM_S, compFragLog2 + i);
2122 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2123 }
2124 }
2125 }
2126
2127 /**
2128 ************************************************************************************************************************
2129 * Gfx9Lib::IsEquationSupported
2130 *
2131 * @brief
2132 * Check if equation is supported for given swizzle mode and resource type.
2133 *
2134 * @return
2135 * TRUE if supported
2136 ************************************************************************************************************************
2137 */
2138 BOOL_32 Gfx9Lib::IsEquationSupported(
2139 AddrResourceType rsrcType,
2140 AddrSwizzleMode swMode,
2141 UINT_32 elementBytesLog2) const
2142 {
2143 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2144 (IsValidSwMode(swMode) == TRUE) &&
2145 (IsLinear(swMode) == FALSE) &&
2146 (((IsTex2d(rsrcType) == TRUE) &&
2147 ((elementBytesLog2 < 4) ||
2148 ((IsRotateSwizzle(swMode) == FALSE) &&
2149 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2150 ((IsTex3d(rsrcType) == TRUE) &&
2151 (IsRotateSwizzle(swMode) == FALSE) &&
2152 (IsBlock256b(swMode) == FALSE)));
2153
2154 return supported;
2155 }
2156
2157 /**
2158 ************************************************************************************************************************
2159 * Gfx9Lib::InitEquationTable
2160 *
2161 * @brief
2162 * Initialize Equation table.
2163 *
2164 * @return
2165 * N/A
2166 ************************************************************************************************************************
2167 */
2168 VOID Gfx9Lib::InitEquationTable()
2169 {
2170 memset(m_equationTable, 0, sizeof(m_equationTable));
2171
2172 // Loop all possible resource type (2D/3D)
2173 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2174 {
2175 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2176
2177 // Loop all possible swizzle mode
2178 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2179 {
2180 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2181
2182 // Loop all possible bpp
2183 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2184 {
2185 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2186
2187 // Check if the input is supported
2188 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2189 {
2190 ADDR_EQUATION equation;
2191 ADDR_E_RETURNCODE retCode;
2192
2193 memset(&equation, 0, sizeof(ADDR_EQUATION));
2194
2195 // Generate the equation
2196 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2197 {
2198 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2199 }
2200 else if (IsThin(rsrcType, swMode))
2201 {
2202 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2203 }
2204 else
2205 {
2206 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2207 }
2208
2209 // Only fill the equation into the table if the return code is ADDR_OK,
2210 // otherwise if the return code is not ADDR_OK, it indicates this is not
2211 // a valid input, we do nothing but just fill invalid equation index
2212 // into the lookup table.
2213 if (retCode == ADDR_OK)
2214 {
2215 equationIndex = m_numEquations;
2216 ADDR_ASSERT(equationIndex < EquationTableSize);
2217
2218 m_equationTable[equationIndex] = equation;
2219
2220 m_numEquations++;
2221 }
2222 else
2223 {
2224 ADDR_ASSERT_ALWAYS();
2225 }
2226 }
2227
2228 // Fill the index into the lookup table, if the combination is not supported
2229 // fill the invalid equation index
2230 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2231 }
2232 }
2233 }
2234 }
2235
2236 /**
2237 ************************************************************************************************************************
2238 * Gfx9Lib::HwlGetEquationIndex
2239 *
2240 * @brief
2241 * Interface function stub of GetEquationIndex
2242 *
2243 * @return
2244 * ADDR_E_RETURNCODE
2245 ************************************************************************************************************************
2246 */
2247 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2248 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2249 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2250 ) const
2251 {
2252 AddrResourceType rsrcType = pIn->resourceType;
2253 AddrSwizzleMode swMode = pIn->swizzleMode;
2254 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2255 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2256
2257 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2258 {
2259 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2260 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2261
2262 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2263 }
2264
2265 if (pOut->pMipInfo != NULL)
2266 {
2267 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2268 {
2269 pOut->pMipInfo[i].equationIndex = index;
2270 }
2271 }
2272
2273 return index;
2274 }
2275
2276 /**
2277 ************************************************************************************************************************
2278 * Gfx9Lib::HwlComputeBlock256Equation
2279 *
2280 * @brief
2281 * Interface function stub of ComputeBlock256Equation
2282 *
2283 * @return
2284 * ADDR_E_RETURNCODE
2285 ************************************************************************************************************************
2286 */
2287 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2288 AddrResourceType rsrcType,
2289 AddrSwizzleMode swMode,
2290 UINT_32 elementBytesLog2,
2291 ADDR_EQUATION* pEquation) const
2292 {
2293 ADDR_E_RETURNCODE ret = ADDR_OK;
2294
2295 pEquation->numBits = 8;
2296
2297 UINT_32 i = 0;
2298 for (; i < elementBytesLog2; i++)
2299 {
2300 InitChannel(1, 0 , i, &pEquation->addr[i]);
2301 }
2302
2303 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2304
2305 const UINT_32 maxBitsUsed = 4;
2306 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2307 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2308
2309 for (i = 0; i < maxBitsUsed; i++)
2310 {
2311 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2312 InitChannel(1, 1, i, &y[i]);
2313 }
2314
2315 if (IsStandardSwizzle(rsrcType, swMode))
2316 {
2317 switch (elementBytesLog2)
2318 {
2319 case 0:
2320 pixelBit[0] = x[0];
2321 pixelBit[1] = x[1];
2322 pixelBit[2] = x[2];
2323 pixelBit[3] = x[3];
2324 pixelBit[4] = y[0];
2325 pixelBit[5] = y[1];
2326 pixelBit[6] = y[2];
2327 pixelBit[7] = y[3];
2328 break;
2329 case 1:
2330 pixelBit[0] = x[0];
2331 pixelBit[1] = x[1];
2332 pixelBit[2] = x[2];
2333 pixelBit[3] = y[0];
2334 pixelBit[4] = y[1];
2335 pixelBit[5] = y[2];
2336 pixelBit[6] = x[3];
2337 break;
2338 case 2:
2339 pixelBit[0] = x[0];
2340 pixelBit[1] = x[1];
2341 pixelBit[2] = y[0];
2342 pixelBit[3] = y[1];
2343 pixelBit[4] = y[2];
2344 pixelBit[5] = x[2];
2345 break;
2346 case 3:
2347 pixelBit[0] = x[0];
2348 pixelBit[1] = y[0];
2349 pixelBit[2] = y[1];
2350 pixelBit[3] = x[1];
2351 pixelBit[4] = x[2];
2352 break;
2353 case 4:
2354 pixelBit[0] = y[0];
2355 pixelBit[1] = y[1];
2356 pixelBit[2] = x[0];
2357 pixelBit[3] = x[1];
2358 break;
2359 default:
2360 ADDR_ASSERT_ALWAYS();
2361 ret = ADDR_INVALIDPARAMS;
2362 break;
2363 }
2364 }
2365 else if (IsDisplaySwizzle(rsrcType, swMode))
2366 {
2367 switch (elementBytesLog2)
2368 {
2369 case 0:
2370 pixelBit[0] = x[0];
2371 pixelBit[1] = x[1];
2372 pixelBit[2] = x[2];
2373 pixelBit[3] = y[1];
2374 pixelBit[4] = y[0];
2375 pixelBit[5] = y[2];
2376 pixelBit[6] = x[3];
2377 pixelBit[7] = y[3];
2378 break;
2379 case 1:
2380 pixelBit[0] = x[0];
2381 pixelBit[1] = x[1];
2382 pixelBit[2] = x[2];
2383 pixelBit[3] = y[0];
2384 pixelBit[4] = y[1];
2385 pixelBit[5] = y[2];
2386 pixelBit[6] = x[3];
2387 break;
2388 case 2:
2389 pixelBit[0] = x[0];
2390 pixelBit[1] = x[1];
2391 pixelBit[2] = y[0];
2392 pixelBit[3] = x[2];
2393 pixelBit[4] = y[1];
2394 pixelBit[5] = y[2];
2395 break;
2396 case 3:
2397 pixelBit[0] = x[0];
2398 pixelBit[1] = y[0];
2399 pixelBit[2] = x[1];
2400 pixelBit[3] = x[2];
2401 pixelBit[4] = y[1];
2402 break;
2403 case 4:
2404 pixelBit[0] = x[0];
2405 pixelBit[1] = y[0];
2406 pixelBit[2] = x[1];
2407 pixelBit[3] = y[1];
2408 break;
2409 default:
2410 ADDR_ASSERT_ALWAYS();
2411 ret = ADDR_INVALIDPARAMS;
2412 break;
2413 }
2414 }
2415 else if (IsRotateSwizzle(swMode))
2416 {
2417 switch (elementBytesLog2)
2418 {
2419 case 0:
2420 pixelBit[0] = y[0];
2421 pixelBit[1] = y[1];
2422 pixelBit[2] = y[2];
2423 pixelBit[3] = x[1];
2424 pixelBit[4] = x[0];
2425 pixelBit[5] = x[2];
2426 pixelBit[6] = x[3];
2427 pixelBit[7] = y[3];
2428 break;
2429 case 1:
2430 pixelBit[0] = y[0];
2431 pixelBit[1] = y[1];
2432 pixelBit[2] = y[2];
2433 pixelBit[3] = x[0];
2434 pixelBit[4] = x[1];
2435 pixelBit[5] = x[2];
2436 pixelBit[6] = x[3];
2437 break;
2438 case 2:
2439 pixelBit[0] = y[0];
2440 pixelBit[1] = y[1];
2441 pixelBit[2] = x[0];
2442 pixelBit[3] = y[2];
2443 pixelBit[4] = x[1];
2444 pixelBit[5] = x[2];
2445 break;
2446 case 3:
2447 pixelBit[0] = y[0];
2448 pixelBit[1] = x[0];
2449 pixelBit[2] = y[1];
2450 pixelBit[3] = x[1];
2451 pixelBit[4] = x[2];
2452 break;
2453 default:
2454 ADDR_ASSERT_ALWAYS();
2455 case 4:
2456 ret = ADDR_INVALIDPARAMS;
2457 break;
2458 }
2459 }
2460 else
2461 {
2462 ADDR_ASSERT_ALWAYS();
2463 ret = ADDR_INVALIDPARAMS;
2464 }
2465
2466 // Post validation
2467 if (ret == ADDR_OK)
2468 {
2469 ASSERTED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2470 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2471 (microBlockDim.w * (1 << elementBytesLog2)));
2472 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2473 }
2474
2475 return ret;
2476 }
2477
2478 /**
2479 ************************************************************************************************************************
2480 * Gfx9Lib::HwlComputeThinEquation
2481 *
2482 * @brief
2483 * Interface function stub of ComputeThinEquation
2484 *
2485 * @return
2486 * ADDR_E_RETURNCODE
2487 ************************************************************************************************************************
2488 */
2489 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2490 AddrResourceType rsrcType,
2491 AddrSwizzleMode swMode,
2492 UINT_32 elementBytesLog2,
2493 ADDR_EQUATION* pEquation) const
2494 {
2495 ADDR_E_RETURNCODE ret = ADDR_OK;
2496
2497 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2498
2499 UINT_32 maxXorBits = blockSizeLog2;
2500 if (IsNonPrtXor(swMode))
2501 {
2502 // For non-prt-xor, maybe need to initialize some more bits for xor
2503 // The highest xor bit used in equation will be max the following 3 items:
2504 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2505 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2506 // 3. blockSizeLog2
2507
2508 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2509 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2510 GetPipeXorBits(blockSizeLog2) +
2511 2 * GetBankXorBits(blockSizeLog2));
2512 }
2513
2514 const UINT_32 maxBitsUsed = 14;
2515 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2516 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2517 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2518
2519 const UINT_32 extraXorBits = 16;
2520 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2521 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2522
2523 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2524 {
2525 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2526 InitChannel(1, 1, i, &y[i]);
2527 }
2528
2529 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2530
2531 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2532 {
2533 InitChannel(1, 0 , i, &pixelBit[i]);
2534 }
2535
2536 UINT_32 xIdx = 0;
2537 UINT_32 yIdx = 0;
2538 UINT_32 lowBits = 0;
2539
2540 if (IsZOrderSwizzle(swMode))
2541 {
2542 if (elementBytesLog2 <= 3)
2543 {
2544 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2545 {
2546 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2547 }
2548
2549 lowBits = 6;
2550 }
2551 else
2552 {
2553 ret = ADDR_INVALIDPARAMS;
2554 }
2555 }
2556 else
2557 {
2558 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2559
2560 if (ret == ADDR_OK)
2561 {
2562 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2563 xIdx = Log2(microBlockDim.w);
2564 yIdx = Log2(microBlockDim.h);
2565 lowBits = 8;
2566 }
2567 }
2568
2569 if (ret == ADDR_OK)
2570 {
2571 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2572 {
2573 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2574 }
2575
2576 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2577 {
2578 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2579 }
2580
2581 if (IsXor(swMode))
2582 {
2583 // Fill XOR bits
2584 UINT_32 pipeStart = m_pipeInterleaveLog2;
2585 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2586
2587 UINT_32 bankStart = pipeStart + pipeXorBits;
2588 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2589
2590 for (UINT_32 i = 0; i < pipeXorBits; i++)
2591 {
2592 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2593 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2594 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2595
2596 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2597 }
2598
2599 for (UINT_32 i = 0; i < bankXorBits; i++)
2600 {
2601 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2602 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2603 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2604
2605 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2606 }
2607
2608 if (IsPrt(swMode) == FALSE)
2609 {
2610 for (UINT_32 i = 0; i < pipeXorBits; i++)
2611 {
2612 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2613 }
2614
2615 for (UINT_32 i = 0; i < bankXorBits; i++)
2616 {
2617 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2618 }
2619 }
2620 }
2621
2622 pEquation->numBits = blockSizeLog2;
2623 }
2624
2625 return ret;
2626 }
2627
2628 /**
2629 ************************************************************************************************************************
2630 * Gfx9Lib::HwlComputeThickEquation
2631 *
2632 * @brief
2633 * Interface function stub of ComputeThickEquation
2634 *
2635 * @return
2636 * ADDR_E_RETURNCODE
2637 ************************************************************************************************************************
2638 */
2639 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2640 AddrResourceType rsrcType,
2641 AddrSwizzleMode swMode,
2642 UINT_32 elementBytesLog2,
2643 ADDR_EQUATION* pEquation) const
2644 {
2645 ADDR_E_RETURNCODE ret = ADDR_OK;
2646
2647 ADDR_ASSERT(IsTex3d(rsrcType));
2648
2649 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2650
2651 UINT_32 maxXorBits = blockSizeLog2;
2652 if (IsNonPrtXor(swMode))
2653 {
2654 // For non-prt-xor, maybe need to initialize some more bits for xor
2655 // The highest xor bit used in equation will be max the following 3:
2656 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2657 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2658 // 3. blockSizeLog2
2659
2660 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2661 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2662 GetPipeXorBits(blockSizeLog2) +
2663 3 * GetBankXorBits(blockSizeLog2));
2664 }
2665
2666 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2667 {
2668 InitChannel(1, 0 , i, &pEquation->addr[i]);
2669 }
2670
2671 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2672
2673 const UINT_32 maxBitsUsed = 12;
2674 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2675 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2676 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2677 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2678
2679 const UINT_32 extraXorBits = 24;
2680 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2681 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2682
2683 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2684 {
2685 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2686 InitChannel(1, 1, i, &y[i]);
2687 InitChannel(1, 2, i, &z[i]);
2688 }
2689
2690 if (IsZOrderSwizzle(swMode))
2691 {
2692 switch (elementBytesLog2)
2693 {
2694 case 0:
2695 pixelBit[0] = x[0];
2696 pixelBit[1] = y[0];
2697 pixelBit[2] = x[1];
2698 pixelBit[3] = y[1];
2699 pixelBit[4] = z[0];
2700 pixelBit[5] = z[1];
2701 pixelBit[6] = x[2];
2702 pixelBit[7] = z[2];
2703 pixelBit[8] = y[2];
2704 pixelBit[9] = x[3];
2705 break;
2706 case 1:
2707 pixelBit[0] = x[0];
2708 pixelBit[1] = y[0];
2709 pixelBit[2] = x[1];
2710 pixelBit[3] = y[1];
2711 pixelBit[4] = z[0];
2712 pixelBit[5] = z[1];
2713 pixelBit[6] = z[2];
2714 pixelBit[7] = y[2];
2715 pixelBit[8] = x[2];
2716 break;
2717 case 2:
2718 pixelBit[0] = x[0];
2719 pixelBit[1] = y[0];
2720 pixelBit[2] = x[1];
2721 pixelBit[3] = z[0];
2722 pixelBit[4] = y[1];
2723 pixelBit[5] = z[1];
2724 pixelBit[6] = y[2];
2725 pixelBit[7] = x[2];
2726 break;
2727 case 3:
2728 pixelBit[0] = x[0];
2729 pixelBit[1] = y[0];
2730 pixelBit[2] = z[0];
2731 pixelBit[3] = x[1];
2732 pixelBit[4] = z[1];
2733 pixelBit[5] = y[1];
2734 pixelBit[6] = x[2];
2735 break;
2736 case 4:
2737 pixelBit[0] = x[0];
2738 pixelBit[1] = y[0];
2739 pixelBit[2] = z[0];
2740 pixelBit[3] = z[1];
2741 pixelBit[4] = y[1];
2742 pixelBit[5] = x[1];
2743 break;
2744 default:
2745 ADDR_ASSERT_ALWAYS();
2746 ret = ADDR_INVALIDPARAMS;
2747 break;
2748 }
2749 }
2750 else if (IsStandardSwizzle(rsrcType, swMode))
2751 {
2752 switch (elementBytesLog2)
2753 {
2754 case 0:
2755 pixelBit[0] = x[0];
2756 pixelBit[1] = x[1];
2757 pixelBit[2] = x[2];
2758 pixelBit[3] = x[3];
2759 pixelBit[4] = y[0];
2760 pixelBit[5] = y[1];
2761 pixelBit[6] = z[0];
2762 pixelBit[7] = z[1];
2763 pixelBit[8] = z[2];
2764 pixelBit[9] = y[2];
2765 break;
2766 case 1:
2767 pixelBit[0] = x[0];
2768 pixelBit[1] = x[1];
2769 pixelBit[2] = x[2];
2770 pixelBit[3] = y[0];
2771 pixelBit[4] = y[1];
2772 pixelBit[5] = z[0];
2773 pixelBit[6] = z[1];
2774 pixelBit[7] = z[2];
2775 pixelBit[8] = y[2];
2776 break;
2777 case 2:
2778 pixelBit[0] = x[0];
2779 pixelBit[1] = x[1];
2780 pixelBit[2] = y[0];
2781 pixelBit[3] = y[1];
2782 pixelBit[4] = z[0];
2783 pixelBit[5] = z[1];
2784 pixelBit[6] = y[2];
2785 pixelBit[7] = x[2];
2786 break;
2787 case 3:
2788 pixelBit[0] = x[0];
2789 pixelBit[1] = y[0];
2790 pixelBit[2] = y[1];
2791 pixelBit[3] = z[0];
2792 pixelBit[4] = z[1];
2793 pixelBit[5] = x[1];
2794 pixelBit[6] = x[2];
2795 break;
2796 case 4:
2797 pixelBit[0] = y[0];
2798 pixelBit[1] = y[1];
2799 pixelBit[2] = z[0];
2800 pixelBit[3] = z[1];
2801 pixelBit[4] = x[0];
2802 pixelBit[5] = x[1];
2803 break;
2804 default:
2805 ADDR_ASSERT_ALWAYS();
2806 ret = ADDR_INVALIDPARAMS;
2807 break;
2808 }
2809 }
2810 else
2811 {
2812 ADDR_ASSERT_ALWAYS();
2813 ret = ADDR_INVALIDPARAMS;
2814 }
2815
2816 if (ret == ADDR_OK)
2817 {
2818 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2819 UINT_32 xIdx = Log2(microBlockDim.w);
2820 UINT_32 yIdx = Log2(microBlockDim.h);
2821 UINT_32 zIdx = Log2(microBlockDim.d);
2822
2823 pixelBit = pEquation->addr;
2824
2825 const UINT_32 lowBits = 10;
2826 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2827 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2828
2829 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2830 {
2831 if ((i % 3) == 0)
2832 {
2833 pixelBit[i] = x[xIdx++];
2834 }
2835 else if ((i % 3) == 1)
2836 {
2837 pixelBit[i] = z[zIdx++];
2838 }
2839 else
2840 {
2841 pixelBit[i] = y[yIdx++];
2842 }
2843 }
2844
2845 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2846 {
2847 if ((i % 3) == 0)
2848 {
2849 xorExtra[i - blockSizeLog2] = x[xIdx++];
2850 }
2851 else if ((i % 3) == 1)
2852 {
2853 xorExtra[i - blockSizeLog2] = z[zIdx++];
2854 }
2855 else
2856 {
2857 xorExtra[i - blockSizeLog2] = y[yIdx++];
2858 }
2859 }
2860
2861 if (IsXor(swMode))
2862 {
2863 // Fill XOR bits
2864 UINT_32 pipeStart = m_pipeInterleaveLog2;
2865 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2866 for (UINT_32 i = 0; i < pipeXorBits; i++)
2867 {
2868 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2869 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2870 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2871
2872 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2873
2874 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2875 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2876 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2877
2878 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2879 }
2880
2881 UINT_32 bankStart = pipeStart + pipeXorBits;
2882 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2883 for (UINT_32 i = 0; i < bankXorBits; i++)
2884 {
2885 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2886 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2887 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2888
2889 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2890
2891 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2892 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2893 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2894
2895 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2896 }
2897 }
2898
2899 pEquation->numBits = blockSizeLog2;
2900 }
2901
2902 return ret;
2903 }
2904
2905 /**
2906 ************************************************************************************************************************
2907 * Gfx9Lib::IsValidDisplaySwizzleMode
2908 *
2909 * @brief
2910 * Check if a swizzle mode is supported by display engine
2911 *
2912 * @return
2913 * TRUE is swizzle mode is supported by display engine
2914 ************************************************************************************************************************
2915 */
2916 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2917 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2918 {
2919 BOOL_32 support = FALSE;
2920
2921 if (m_settings.isDce12)
2922 {
2923 switch (pIn->swizzleMode)
2924 {
2925 case ADDR_SW_256B_D:
2926 case ADDR_SW_256B_R:
2927 support = (pIn->bpp == 32);
2928 break;
2929
2930 case ADDR_SW_LINEAR:
2931 case ADDR_SW_4KB_D:
2932 case ADDR_SW_4KB_R:
2933 case ADDR_SW_64KB_D:
2934 case ADDR_SW_64KB_R:
2935 case ADDR_SW_4KB_D_X:
2936 case ADDR_SW_4KB_R_X:
2937 case ADDR_SW_64KB_D_X:
2938 case ADDR_SW_64KB_R_X:
2939 support = (pIn->bpp <= 64);
2940 break;
2941
2942 default:
2943 break;
2944 }
2945 }
2946 else if (m_settings.isDcn1)
2947 {
2948 switch (pIn->swizzleMode)
2949 {
2950 case ADDR_SW_4KB_D:
2951 case ADDR_SW_64KB_D:
2952 case ADDR_SW_64KB_D_T:
2953 case ADDR_SW_4KB_D_X:
2954 case ADDR_SW_64KB_D_X:
2955 support = (pIn->bpp == 64);
2956 break;
2957
2958 case ADDR_SW_LINEAR:
2959 case ADDR_SW_4KB_S:
2960 case ADDR_SW_64KB_S:
2961 case ADDR_SW_64KB_S_T:
2962 case ADDR_SW_4KB_S_X:
2963 case ADDR_SW_64KB_S_X:
2964 support = (pIn->bpp <= 64);
2965 break;
2966
2967 default:
2968 break;
2969 }
2970 }
2971 else
2972 {
2973 ADDR_NOT_IMPLEMENTED();
2974 }
2975
2976 return support;
2977 }
2978
2979 /**
2980 ************************************************************************************************************************
2981 * Gfx9Lib::HwlComputePipeBankXor
2982 *
2983 * @brief
2984 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2985 *
2986 * @return
2987 * PipeBankXor value
2988 ************************************************************************************************************************
2989 */
2990 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
2991 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
2992 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
2993 {
2994 if (IsXor(pIn->swizzleMode))
2995 {
2996 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2997 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
2998 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
2999
3000 UINT_32 pipeXor = 0;
3001 UINT_32 bankXor = 0;
3002
3003 const UINT_32 bankMask = (1 << bankBits) - 1;
3004 const UINT_32 index = pIn->surfIndex & bankMask;
3005
3006 const UINT_32 bpp = pIn->flags.fmask ?
3007 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3008 if (bankBits == 4)
3009 {
3010 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3011 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3012
3013 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3014 }
3015 else if (bankBits > 0)
3016 {
3017 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3018 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3019 bankXor = (index * bankIncrease) & bankMask;
3020 }
3021
3022 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3023 }
3024 else
3025 {
3026 pOut->pipeBankXor = 0;
3027 }
3028
3029