amd/addrlib: Clean up unused colorFlags argument
[mesa.git] / src / amd / addrlib / src / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 #include "util/macros.h"
41
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
44
45 namespace Addr
46 {
47
48 /**
49 ************************************************************************************************************************
50 * Gfx9HwlInit
51 *
52 * @brief
53 * Creates an Gfx9Lib object.
54 *
55 * @return
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
58 */
59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
60 {
61 return V2::Gfx9Lib::CreateObj(pClient);
62 }
63
64 namespace V2
65 {
66
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
70
71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_R
77
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_R
82
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_R
87
88 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
91 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
92
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0}, // ADDR_SW_64KB_R_T
97
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_4KB_R_x
102
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_64KB_R_X
107
108 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
109 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
110 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
111 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
113 };
114
115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
116
117 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
118
119 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
120
121 /**
122 ************************************************************************************************************************
123 * Gfx9Lib::Gfx9Lib
124 *
125 * @brief
126 * Constructor
127 *
128 ************************************************************************************************************************
129 */
130 Gfx9Lib::Gfx9Lib(const Client* pClient)
131 :
132 Lib(pClient)
133 {
134 m_class = AI_ADDRLIB;
135 memset(&m_settings, 0, sizeof(m_settings));
136 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
137 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
138 m_metaEqOverrideIndex = 0;
139 }
140
141 /**
142 ************************************************************************************************************************
143 * Gfx9Lib::~Gfx9Lib
144 *
145 * @brief
146 * Destructor
147 ************************************************************************************************************************
148 */
149 Gfx9Lib::~Gfx9Lib()
150 {
151 }
152
153 /**
154 ************************************************************************************************************************
155 * Gfx9Lib::HwlComputeHtileInfo
156 *
157 * @brief
158 * Interface function stub of AddrComputeHtilenfo
159 *
160 * @return
161 * ADDR_E_RETURNCODE
162 ************************************************************************************************************************
163 */
164 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
165 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
166 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
167 ) const
168 {
169 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
170 pIn->swizzleMode);
171
172 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
173
174 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
175
176 if ((numPipeTotal == 1) && (numRbTotal == 1))
177 {
178 numCompressBlkPerMetaBlkLog2 = 10;
179 }
180 else
181 {
182 if (m_settings.applyAliasFix)
183 {
184 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
185 }
186 else
187 {
188 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
189 }
190 }
191
192 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
193
194 Dim3d metaBlkDim = {8, 8, 1};
195 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
196 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
197 UINT_32 heightAmp = totalAmpBits - widthAmp;
198 metaBlkDim.w <<= widthAmp;
199 metaBlkDim.h <<= heightAmp;
200
201 #if DEBUG
202 Dim3d metaBlkDimDbg = {8, 8, 1};
203 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
204 {
205 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
206 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
207 {
208 metaBlkDimDbg.h <<= 1;
209 }
210 else
211 {
212 metaBlkDimDbg.w <<= 1;
213 }
214 }
215 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
216 #endif
217
218 UINT_32 numMetaBlkX;
219 UINT_32 numMetaBlkY;
220 UINT_32 numMetaBlkZ;
221
222 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
223 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
224 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
225
226 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
227 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
228
229 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
230 {
231 align *= (numPipeTotal >> 1);
232 }
233
234 align = Max(align, metaBlkSize);
235
236 if (m_settings.metaBaseAlignFix)
237 {
238 align = Max(align, GetBlockSize(pIn->swizzleMode));
239 }
240
241 if (m_settings.htileAlignFix)
242 {
243 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
244 const INT_32 htileCachelineSizeLog2 = 11;
245 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
246
247 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
248
249 align <<= rbMaskPadding;
250 }
251
252 pOut->pitch = numMetaBlkX * metaBlkDim.w;
253 pOut->height = numMetaBlkY * metaBlkDim.h;
254 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
255
256 pOut->metaBlkWidth = metaBlkDim.w;
257 pOut->metaBlkHeight = metaBlkDim.h;
258 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
259
260 pOut->baseAlign = align;
261 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
262
263 return ADDR_OK;
264 }
265
266 /**
267 ************************************************************************************************************************
268 * Gfx9Lib::HwlComputeCmaskInfo
269 *
270 * @brief
271 * Interface function stub of AddrComputeCmaskInfo
272 *
273 * @return
274 * ADDR_E_RETURNCODE
275 ************************************************************************************************************************
276 */
277 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
278 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
279 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
280 ) const
281 {
282 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
283
284 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
285 pIn->swizzleMode);
286
287 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
288
289 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
290
291 if ((numPipeTotal == 1) && (numRbTotal == 1))
292 {
293 numCompressBlkPerMetaBlkLog2 = 13;
294 }
295 else
296 {
297 if (m_settings.applyAliasFix)
298 {
299 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
300 }
301 else
302 {
303 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
304 }
305
306 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
307 }
308
309 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
310
311 Dim2d metaBlkDim = {8, 8};
312 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
313 UINT_32 heightAmp = totalAmpBits >> 1;
314 UINT_32 widthAmp = totalAmpBits - heightAmp;
315 metaBlkDim.w <<= widthAmp;
316 metaBlkDim.h <<= heightAmp;
317
318 #if DEBUG
319 Dim2d metaBlkDimDbg = {8, 8};
320 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
321 {
322 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
323 {
324 metaBlkDimDbg.h <<= 1;
325 }
326 else
327 {
328 metaBlkDimDbg.w <<= 1;
329 }
330 }
331 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
332 #endif
333
334 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
335 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
336 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
337
338 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
339
340 if (m_settings.metaBaseAlignFix)
341 {
342 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
343 }
344
345 pOut->pitch = numMetaBlkX * metaBlkDim.w;
346 pOut->height = numMetaBlkY * metaBlkDim.h;
347 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
348 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
349 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
350
351 pOut->metaBlkWidth = metaBlkDim.w;
352 pOut->metaBlkHeight = metaBlkDim.h;
353
354 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
355
356 return ADDR_OK;
357 }
358
359 /**
360 ************************************************************************************************************************
361 * Gfx9Lib::GetMetaMipInfo
362 *
363 * @brief
364 * Get meta mip info
365 *
366 * @return
367 * N/A
368 ************************************************************************************************************************
369 */
370 VOID Gfx9Lib::GetMetaMipInfo(
371 UINT_32 numMipLevels, ///< [in] number of mip levels
372 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
373 BOOL_32 dataThick, ///< [in] data surface is thick
374 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
375 UINT_32 mip0Width, ///< [in] mip0 width
376 UINT_32 mip0Height, ///< [in] mip0 height
377 UINT_32 mip0Depth, ///< [in] mip0 depth
378 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
379 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
380 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
381 const
382 {
383 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
384 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
385 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
386 UINT_32 tailWidth = pMetaBlkDim->w;
387 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
388 UINT_32 tailDepth = pMetaBlkDim->d;
389 BOOL_32 inTail = FALSE;
390 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
391
392 if (numMipLevels > 1)
393 {
394 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
395 {
396 // Z major
397 major = ADDR_MAJOR_Z;
398 }
399 else if (numMetaBlkX >= numMetaBlkY)
400 {
401 // X major
402 major = ADDR_MAJOR_X;
403 }
404 else
405 {
406 // Y major
407 major = ADDR_MAJOR_Y;
408 }
409
410 inTail = ((mip0Width <= tailWidth) &&
411 (mip0Height <= tailHeight) &&
412 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
413
414 if (inTail == FALSE)
415 {
416 UINT_32 orderLimit;
417 UINT_32 *pMipDim;
418 UINT_32 *pOrderDim;
419
420 if (major == ADDR_MAJOR_Z)
421 {
422 // Z major
423 pMipDim = &numMetaBlkY;
424 pOrderDim = &numMetaBlkZ;
425 orderLimit = 4;
426 }
427 else if (major == ADDR_MAJOR_X)
428 {
429 // X major
430 pMipDim = &numMetaBlkY;
431 pOrderDim = &numMetaBlkX;
432 orderLimit = 4;
433 }
434 else
435 {
436 // Y major
437 pMipDim = &numMetaBlkX;
438 pOrderDim = &numMetaBlkY;
439 orderLimit = 2;
440 }
441
442 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
443 {
444 *pMipDim += 2;
445 }
446 else
447 {
448 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
449 }
450 }
451 }
452
453 if (pInfo != NULL)
454 {
455 UINT_32 mipWidth = mip0Width;
456 UINT_32 mipHeight = mip0Height;
457 UINT_32 mipDepth = mip0Depth;
458 Dim3d mipCoord = {0};
459
460 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
461 {
462 if (inTail)
463 {
464 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
465 pMetaBlkDim);
466 break;
467 }
468 else
469 {
470 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
471 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
472 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
473
474 pInfo[mip].inMiptail = FALSE;
475 pInfo[mip].startX = mipCoord.w;
476 pInfo[mip].startY = mipCoord.h;
477 pInfo[mip].startZ = mipCoord.d;
478 pInfo[mip].width = mipWidth;
479 pInfo[mip].height = mipHeight;
480 pInfo[mip].depth = dataThick ? mipDepth : 1;
481
482 if ((mip >= 3) || (mip & 1))
483 {
484 switch (major)
485 {
486 case ADDR_MAJOR_X:
487 mipCoord.w += mipWidth;
488 break;
489 case ADDR_MAJOR_Y:
490 mipCoord.h += mipHeight;
491 break;
492 case ADDR_MAJOR_Z:
493 mipCoord.d += mipDepth;
494 break;
495 default:
496 break;
497 }
498 }
499 else
500 {
501 switch (major)
502 {
503 case ADDR_MAJOR_X:
504 mipCoord.h += mipHeight;
505 break;
506 case ADDR_MAJOR_Y:
507 mipCoord.w += mipWidth;
508 break;
509 case ADDR_MAJOR_Z:
510 mipCoord.h += mipHeight;
511 break;
512 default:
513 break;
514 }
515 }
516
517 mipWidth = Max(mipWidth >> 1, 1u);
518 mipHeight = Max(mipHeight >> 1, 1u);
519 mipDepth = Max(mipDepth >> 1, 1u);
520
521 inTail = ((mipWidth <= tailWidth) &&
522 (mipHeight <= tailHeight) &&
523 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
524 }
525 }
526 }
527
528 *pNumMetaBlkX = numMetaBlkX;
529 *pNumMetaBlkY = numMetaBlkY;
530 *pNumMetaBlkZ = numMetaBlkZ;
531 }
532
533 /**
534 ************************************************************************************************************************
535 * Gfx9Lib::HwlComputeDccInfo
536 *
537 * @brief
538 * Interface function to compute DCC key info
539 *
540 * @return
541 * ADDR_E_RETURNCODE
542 ************************************************************************************************************************
543 */
544 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
545 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
546 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
547 ) const
548 {
549 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
550 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
551 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
552
553 if (dataLinear)
554 {
555 metaLinear = TRUE;
556 }
557 else if (metaLinear == TRUE)
558 {
559 pipeAligned = FALSE;
560 }
561
562 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
563
564 if (metaLinear)
565 {
566 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
567 ADDR_ASSERT_ALWAYS();
568
569 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
570 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
571 }
572 else
573 {
574 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
575
576 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
577
578 UINT_32 numFrags = Max(pIn->numFrags, 1u);
579 UINT_32 numSlices = Max(pIn->numSlices, 1u);
580
581 minMetaBlkSize /= numFrags;
582
583 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
584
585 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
586
587 if ((numPipeTotal > 1) || (numRbTotal > 1))
588 {
589 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
590
591 numCompressBlkPerMetaBlk =
592 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
593
594 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
595 {
596 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
597 }
598 }
599
600 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
601 Dim3d metaBlkDim = compressBlkDim;
602
603 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
604 {
605 if ((metaBlkDim.h < metaBlkDim.w) ||
606 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
607 {
608 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
609 {
610 metaBlkDim.h <<= 1;
611 }
612 else
613 {
614 metaBlkDim.d <<= 1;
615 }
616 }
617 else
618 {
619 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
620 {
621 metaBlkDim.w <<= 1;
622 }
623 else
624 {
625 metaBlkDim.d <<= 1;
626 }
627 }
628 }
629
630 UINT_32 numMetaBlkX;
631 UINT_32 numMetaBlkY;
632 UINT_32 numMetaBlkZ;
633
634 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
635 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
636 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
637
638 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
639
640 if (numFrags > m_maxCompFrag)
641 {
642 sizeAlign *= (numFrags / m_maxCompFrag);
643 }
644
645 if (m_settings.metaBaseAlignFix)
646 {
647 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
648 }
649
650 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
651 numCompressBlkPerMetaBlk * numFrags;
652 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
653 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
654
655 pOut->pitch = numMetaBlkX * metaBlkDim.w;
656 pOut->height = numMetaBlkY * metaBlkDim.h;
657 pOut->depth = numMetaBlkZ * metaBlkDim.d;
658
659 pOut->compressBlkWidth = compressBlkDim.w;
660 pOut->compressBlkHeight = compressBlkDim.h;
661 pOut->compressBlkDepth = compressBlkDim.d;
662
663 pOut->metaBlkWidth = metaBlkDim.w;
664 pOut->metaBlkHeight = metaBlkDim.h;
665 pOut->metaBlkDepth = metaBlkDim.d;
666
667 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
668 pOut->fastClearSizePerSlice =
669 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
670 }
671
672 return ADDR_OK;
673 }
674
675 /**
676 ************************************************************************************************************************
677 * Gfx9Lib::HwlComputeMaxBaseAlignments
678 *
679 * @brief
680 * Gets maximum alignments
681 * @return
682 * maximum alignments
683 ************************************************************************************************************************
684 */
685 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
686 {
687 return Size64K;
688 }
689
690 /**
691 ************************************************************************************************************************
692 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
693 *
694 * @brief
695 * Gets maximum alignments for metadata
696 * @return
697 * maximum alignments for metadata
698 ************************************************************************************************************************
699 */
700 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
701 {
702 // Max base alignment for Htile
703 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
704 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
705
706 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
707 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
708 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
709 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
710
711 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
712
713 if (maxNumPipeTotal > 2)
714 {
715 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
716 }
717
718 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
719
720 if (m_settings.metaBaseAlignFix)
721 {
722 maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
723 }
724
725 if (m_settings.htileAlignFix)
726 {
727 maxBaseAlignHtile *= maxNumPipeTotal;
728 }
729
730 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
731
732 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
733 UINT_32 maxBaseAlignDcc3D = 65536;
734
735 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
736 {
737 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
738 }
739
740 // Max base alignment for Msaa Dcc
741 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
742
743 if (m_settings.metaBaseAlignFix)
744 {
745 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
746 }
747
748 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
749 }
750
751 /**
752 ************************************************************************************************************************
753 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
754 *
755 * @brief
756 * Interface function stub of AddrComputeCmaskAddrFromCoord
757 *
758 * @return
759 * ADDR_E_RETURNCODE
760 ************************************************************************************************************************
761 */
762 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
763 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
764 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
765 {
766 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
767 input.size = sizeof(input);
768 input.cMaskFlags = pIn->cMaskFlags;
769 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
770 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
771 input.numSlices = Max(pIn->numSlices, 1u);
772 input.swizzleMode = pIn->swizzleMode;
773 input.resourceType = pIn->resourceType;
774
775 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
776 output.size = sizeof(output);
777
778 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
779
780 if (returnCode == ADDR_OK)
781 {
782 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
783 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
784 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
785 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
786
787 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
788 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
789 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
790
791 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
792
793 UINT_32 xb = pIn->x / output.metaBlkWidth;
794 UINT_32 yb = pIn->y / output.metaBlkHeight;
795 UINT_32 zb = pIn->slice;
796
797 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
798 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
799 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
800
801 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
802 UINT_64 address = pMetaEq->solve(coords);
803
804 pOut->addr = address >> 1;
805 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
806
807 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
808 pIn->swizzleMode);
809
810 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
811
812 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
813 }
814
815 return returnCode;
816 }
817
818 /**
819 ************************************************************************************************************************
820 * Gfx9Lib::HwlComputeHtileAddrFromCoord
821 *
822 * @brief
823 * Interface function stub of AddrComputeHtileAddrFromCoord
824 *
825 * @return
826 * ADDR_E_RETURNCODE
827 ************************************************************************************************************************
828 */
829 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
830 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
831 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
832 {
833 ADDR_E_RETURNCODE returnCode = ADDR_OK;
834
835 if (pIn->numMipLevels > 1)
836 {
837 returnCode = ADDR_NOTIMPLEMENTED;
838 }
839 else
840 {
841 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
842 input.size = sizeof(input);
843 input.hTileFlags = pIn->hTileFlags;
844 input.depthFlags = pIn->depthflags;
845 input.swizzleMode = pIn->swizzleMode;
846 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
847 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
848 input.numSlices = Max(pIn->numSlices, 1u);
849 input.numMipLevels = Max(pIn->numMipLevels, 1u);
850
851 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
852 output.size = sizeof(output);
853
854 returnCode = ComputeHtileInfo(&input, &output);
855
856 if (returnCode == ADDR_OK)
857 {
858 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
859 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
860 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
861 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
862
863 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
864 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
865 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
866
867 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
868
869 UINT_32 xb = pIn->x / output.metaBlkWidth;
870 UINT_32 yb = pIn->y / output.metaBlkHeight;
871 UINT_32 zb = pIn->slice;
872
873 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
874 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
875 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
876
877 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
878 UINT_64 address = pMetaEq->solve(coords);
879
880 pOut->addr = address >> 1;
881
882 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
883 pIn->swizzleMode);
884
885 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
886
887 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
888 }
889 }
890
891 return returnCode;
892 }
893
894 /**
895 ************************************************************************************************************************
896 * Gfx9Lib::HwlComputeHtileCoordFromAddr
897 *
898 * @brief
899 * Interface function stub of AddrComputeHtileCoordFromAddr
900 *
901 * @return
902 * ADDR_E_RETURNCODE
903 ************************************************************************************************************************
904 */
905 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
906 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
907 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
908 {
909 ADDR_E_RETURNCODE returnCode = ADDR_OK;
910
911 if (pIn->numMipLevels > 1)
912 {
913 returnCode = ADDR_NOTIMPLEMENTED;
914 }
915 else
916 {
917 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
918 input.size = sizeof(input);
919 input.hTileFlags = pIn->hTileFlags;
920 input.swizzleMode = pIn->swizzleMode;
921 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
922 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
923 input.numSlices = Max(pIn->numSlices, 1u);
924 input.numMipLevels = Max(pIn->numMipLevels, 1u);
925
926 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
927 output.size = sizeof(output);
928
929 returnCode = ComputeHtileInfo(&input, &output);
930
931 if (returnCode == ADDR_OK)
932 {
933 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
934 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
935 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
936 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
937
938 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
939 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
940 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
941
942 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
943
944 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
945 pIn->swizzleMode);
946
947 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
948
949 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
950
951 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
952 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
953
954 UINT_32 coords[NUM_DIMS];
955 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
956
957 pOut->slice = coords[DIM_M] / sliceSizeInBlock;
958 pOut->y = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
959 pOut->x = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
960 }
961 }
962
963 return returnCode;
964 }
965
966 /**
967 ************************************************************************************************************************
968 * Gfx9Lib::HwlComputeDccAddrFromCoord
969 *
970 * @brief
971 * Interface function stub of AddrComputeDccAddrFromCoord
972 *
973 * @return
974 * ADDR_E_RETURNCODE
975 ************************************************************************************************************************
976 */
977 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
978 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
979 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
980 {
981 ADDR_E_RETURNCODE returnCode = ADDR_OK;
982
983 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
984 {
985 returnCode = ADDR_NOTIMPLEMENTED;
986 }
987 else
988 {
989 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
990 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
991 UINT_32 metaBlkWidthLog2 = Log2(pIn->metaBlkWidth);
992 UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
993 UINT_32 metaBlkDepthLog2 = Log2(pIn->metaBlkDepth);
994 UINT_32 compBlkWidthLog2 = Log2(pIn->compressBlkWidth);
995 UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
996 UINT_32 compBlkDepthLog2 = Log2(pIn->compressBlkDepth);
997
998 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
999 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1000 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1001 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1002
1003 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1004
1005 UINT_32 xb = pIn->x / pIn->metaBlkWidth;
1006 UINT_32 yb = pIn->y / pIn->metaBlkHeight;
1007 UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
1008
1009 UINT_32 pitchInBlock = pIn->pitch / pIn->metaBlkWidth;
1010 UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
1011 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1012
1013 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
1014 UINT_64 address = pMetaEq->solve(coords);
1015
1016 pOut->addr = address >> 1;
1017
1018 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1019 pIn->swizzleMode);
1020
1021 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1022
1023 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1024 }
1025
1026 return returnCode;
1027 }
1028
1029 /**
1030 ************************************************************************************************************************
1031 * Gfx9Lib::HwlInitGlobalParams
1032 *
1033 * @brief
1034 * Initializes global parameters
1035 *
1036 * @return
1037 * TRUE if all settings are valid
1038 *
1039 ************************************************************************************************************************
1040 */
1041 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1042 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1043 {
1044 BOOL_32 valid = TRUE;
1045
1046 if (m_settings.isArcticIsland)
1047 {
1048 GB_ADDR_CONFIG_gfx9 gbAddrConfig;
1049
1050 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1051
1052 // These values are copied from CModel code
1053 switch (gbAddrConfig.bits.NUM_PIPES)
1054 {
1055 case ADDR_CONFIG_1_PIPE:
1056 m_pipes = 1;
1057 m_pipesLog2 = 0;
1058 break;
1059 case ADDR_CONFIG_2_PIPE:
1060 m_pipes = 2;
1061 m_pipesLog2 = 1;
1062 break;
1063 case ADDR_CONFIG_4_PIPE:
1064 m_pipes = 4;
1065 m_pipesLog2 = 2;
1066 break;
1067 case ADDR_CONFIG_8_PIPE:
1068 m_pipes = 8;
1069 m_pipesLog2 = 3;
1070 break;
1071 case ADDR_CONFIG_16_PIPE:
1072 m_pipes = 16;
1073 m_pipesLog2 = 4;
1074 break;
1075 case ADDR_CONFIG_32_PIPE:
1076 m_pipes = 32;
1077 m_pipesLog2 = 5;
1078 break;
1079 default:
1080 ADDR_ASSERT_ALWAYS();
1081 break;
1082 }
1083
1084 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1085 {
1086 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1087 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1088 m_pipeInterleaveLog2 = 8;
1089 break;
1090 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1091 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1092 m_pipeInterleaveLog2 = 9;
1093 break;
1094 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1095 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1096 m_pipeInterleaveLog2 = 10;
1097 break;
1098 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1099 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1100 m_pipeInterleaveLog2 = 11;
1101 break;
1102 default:
1103 ADDR_ASSERT_ALWAYS();
1104 break;
1105 }
1106
1107 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1108 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1109 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1110
1111 switch (gbAddrConfig.bits.NUM_BANKS)
1112 {
1113 case ADDR_CONFIG_1_BANK:
1114 m_banks = 1;
1115 m_banksLog2 = 0;
1116 break;
1117 case ADDR_CONFIG_2_BANK:
1118 m_banks = 2;
1119 m_banksLog2 = 1;
1120 break;
1121 case ADDR_CONFIG_4_BANK:
1122 m_banks = 4;
1123 m_banksLog2 = 2;
1124 break;
1125 case ADDR_CONFIG_8_BANK:
1126 m_banks = 8;
1127 m_banksLog2 = 3;
1128 break;
1129 case ADDR_CONFIG_16_BANK:
1130 m_banks = 16;
1131 m_banksLog2 = 4;
1132 break;
1133 default:
1134 ADDR_ASSERT_ALWAYS();
1135 break;
1136 }
1137
1138 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1139 {
1140 case ADDR_CONFIG_1_SHADER_ENGINE:
1141 m_se = 1;
1142 m_seLog2 = 0;
1143 break;
1144 case ADDR_CONFIG_2_SHADER_ENGINE:
1145 m_se = 2;
1146 m_seLog2 = 1;
1147 break;
1148 case ADDR_CONFIG_4_SHADER_ENGINE:
1149 m_se = 4;
1150 m_seLog2 = 2;
1151 break;
1152 case ADDR_CONFIG_8_SHADER_ENGINE:
1153 m_se = 8;
1154 m_seLog2 = 3;
1155 break;
1156 default:
1157 ADDR_ASSERT_ALWAYS();
1158 break;
1159 }
1160
1161 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1162 {
1163 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1164 m_rbPerSe = 1;
1165 m_rbPerSeLog2 = 0;
1166 break;
1167 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1168 m_rbPerSe = 2;
1169 m_rbPerSeLog2 = 1;
1170 break;
1171 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1172 m_rbPerSe = 4;
1173 m_rbPerSeLog2 = 2;
1174 break;
1175 default:
1176 ADDR_ASSERT_ALWAYS();
1177 break;
1178 }
1179
1180 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1181 {
1182 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1183 m_maxCompFrag = 1;
1184 m_maxCompFragLog2 = 0;
1185 break;
1186 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1187 m_maxCompFrag = 2;
1188 m_maxCompFragLog2 = 1;
1189 break;
1190 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1191 m_maxCompFrag = 4;
1192 m_maxCompFragLog2 = 2;
1193 break;
1194 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1195 m_maxCompFrag = 8;
1196 m_maxCompFragLog2 = 3;
1197 break;
1198 default:
1199 ADDR_ASSERT_ALWAYS();
1200 break;
1201 }
1202
1203 if ((m_rbPerSeLog2 == 1) &&
1204 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1205 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1206 {
1207 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1208 ADDR_ASSERT(m_settings.isRaven == FALSE);
1209
1210 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1211
1212 if (m_settings.isVega12)
1213 {
1214 m_settings.htileCacheRbConflict = 1;
1215 }
1216 }
1217
1218 // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1219 m_blockVarSizeLog2 = 0;
1220 }
1221 else
1222 {
1223 valid = FALSE;
1224 ADDR_NOT_IMPLEMENTED();
1225 }
1226
1227 if (valid)
1228 {
1229 InitEquationTable();
1230 }
1231
1232 return valid;
1233 }
1234
1235 /**
1236 ************************************************************************************************************************
1237 * Gfx9Lib::HwlConvertChipFamily
1238 *
1239 * @brief
1240 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1241 * @return
1242 * ChipFamily
1243 ************************************************************************************************************************
1244 */
1245 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1246 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1247 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1248 {
1249 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1250
1251 switch (uChipFamily)
1252 {
1253 case FAMILY_AI:
1254 m_settings.isArcticIsland = 1;
1255 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1256 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1257 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1258 m_settings.isDce12 = 1;
1259
1260 if (m_settings.isVega10 == 0)
1261 {
1262 m_settings.htileAlignFix = 1;
1263 m_settings.applyAliasFix = 1;
1264 }
1265
1266 m_settings.metaBaseAlignFix = 1;
1267
1268 m_settings.depthPipeXorDisable = 1;
1269 break;
1270 case FAMILY_RV:
1271 m_settings.isArcticIsland = 1;
1272
1273 if (ASICREV_IS_RAVEN(uChipRevision))
1274 {
1275 m_settings.isRaven = 1;
1276
1277 m_settings.depthPipeXorDisable = 1;
1278 }
1279
1280 if (ASICREV_IS_RAVEN2(uChipRevision))
1281 {
1282 m_settings.isRaven = 1;
1283 }
1284
1285 if (m_settings.isRaven == 0)
1286 {
1287 m_settings.htileAlignFix = 1;
1288 m_settings.applyAliasFix = 1;
1289 }
1290
1291 if (ASICREV_IS_RENOIR(uChipRevision))
1292 {
1293 m_settings.isRaven = 1;
1294 }
1295
1296 m_settings.isDcn1 = m_settings.isRaven;
1297
1298 m_settings.metaBaseAlignFix = 1;
1299 break;
1300
1301 default:
1302 ADDR_ASSERT(!"This should be a Fusion");
1303 break;
1304 }
1305
1306 return family;
1307 }
1308
1309 /**
1310 ************************************************************************************************************************
1311 * Gfx9Lib::InitRbEquation
1312 *
1313 * @brief
1314 * Init RB equation
1315 * @return
1316 * N/A
1317 ************************************************************************************************************************
1318 */
1319 VOID Gfx9Lib::GetRbEquation(
1320 CoordEq* pRbEq, ///< [out] rb equation
1321 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1322 UINT_32 numSeLog2) ///< [in] number of shader engine
1323 const
1324 {
1325 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1326 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1327 Coordinate cx(DIM_X, rbRegion);
1328 Coordinate cy(DIM_Y, rbRegion);
1329
1330 UINT_32 start = 0;
1331 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1332
1333 // Clear the rb equation
1334 pRbEq->resize(0);
1335 pRbEq->resize(numRbTotalLog2);
1336
1337 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1338 {
1339 // Special case when more than 1 SE, and 2 RB per SE
1340 (*pRbEq)[0].add(cx);
1341 (*pRbEq)[0].add(cy);
1342 cx++;
1343 cy++;
1344
1345 if (m_settings.applyAliasFix == false)
1346 {
1347 (*pRbEq)[0].add(cy);
1348 }
1349
1350 (*pRbEq)[0].add(cy);
1351 start++;
1352 }
1353
1354 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1355
1356 for (UINT_32 i = 0; i < numBits; i++)
1357 {
1358 UINT_32 idx =
1359 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1360
1361 if ((i % 2) == 1)
1362 {
1363 (*pRbEq)[idx].add(cx);
1364 cx++;
1365 }
1366 else
1367 {
1368 (*pRbEq)[idx].add(cy);
1369 cy++;
1370 }
1371 }
1372 }
1373
1374 /**
1375 ************************************************************************************************************************
1376 * Gfx9Lib::GetDataEquation
1377 *
1378 * @brief
1379 * Get data equation for fmask and Z
1380 * @return
1381 * N/A
1382 ************************************************************************************************************************
1383 */
1384 VOID Gfx9Lib::GetDataEquation(
1385 CoordEq* pDataEq, ///< [out] data surface equation
1386 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1387 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1388 AddrResourceType resourceType, ///< [in] data surface resource type
1389 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1390 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1391 const
1392 {
1393 Coordinate cx(DIM_X, 0);
1394 Coordinate cy(DIM_Y, 0);
1395 Coordinate cz(DIM_Z, 0);
1396 Coordinate cs(DIM_S, 0);
1397
1398 // Clear the equation
1399 pDataEq->resize(0);
1400 pDataEq->resize(27);
1401
1402 if (dataSurfaceType == Gfx9DataColor)
1403 {
1404 if (IsLinear(swizzleMode))
1405 {
1406 Coordinate cm(DIM_M, 0);
1407
1408 pDataEq->resize(49);
1409
1410 for (UINT_32 i = 0; i < 49; i++)
1411 {
1412 (*pDataEq)[i].add(cm);
1413 cm++;
1414 }
1415 }
1416 else if (IsThick(resourceType, swizzleMode))
1417 {
1418 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1419 UINT_32 i;
1420 if (IsStandardSwizzle(resourceType, swizzleMode))
1421 {
1422 // Standard 3d swizzle
1423 // Fill in bottom x bits
1424 for (i = elementBytesLog2; i < 4; i++)
1425 {
1426 (*pDataEq)[i].add(cx);
1427 cx++;
1428 }
1429 // Fill in 2 bits of y and then z
1430 for (i = 4; i < 6; i++)
1431 {
1432 (*pDataEq)[i].add(cy);
1433 cy++;
1434 }
1435 for (i = 6; i < 8; i++)
1436 {
1437 (*pDataEq)[i].add(cz);
1438 cz++;
1439 }
1440 if (elementBytesLog2 < 2)
1441 {
1442 // fill in z & y bit
1443 (*pDataEq)[8].add(cz);
1444 (*pDataEq)[9].add(cy);
1445 cz++;
1446 cy++;
1447 }
1448 else if (elementBytesLog2 == 2)
1449 {
1450 // fill in y and x bit
1451 (*pDataEq)[8].add(cy);
1452 (*pDataEq)[9].add(cx);
1453 cy++;
1454 cx++;
1455 }
1456 else
1457 {
1458 // fill in 2 x bits
1459 (*pDataEq)[8].add(cx);
1460 cx++;
1461 (*pDataEq)[9].add(cx);
1462 cx++;
1463 }
1464 }
1465 else
1466 {
1467 // Z 3d swizzle
1468 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1469 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1470 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1471 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1472 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1473 {
1474 (*pDataEq)[i].add(cz);
1475 cz++;
1476 }
1477 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1478 {
1479 // add an x and z
1480 (*pDataEq)[6].add(cx);
1481 (*pDataEq)[7].add(cz);
1482 cx++;
1483 cz++;
1484 }
1485 else if (elementBytesLog2 == 2)
1486 {
1487 // add a y and z
1488 (*pDataEq)[6].add(cy);
1489 (*pDataEq)[7].add(cz);
1490 cy++;
1491 cz++;
1492 }
1493 // add y and x
1494 (*pDataEq)[8].add(cy);
1495 (*pDataEq)[9].add(cx);
1496 cy++;
1497 cx++;
1498 }
1499 // Fill in bit 10 and up
1500 pDataEq->mort3d( cz, cy, cx, 10 );
1501 }
1502 else if (IsThin(resourceType, swizzleMode))
1503 {
1504 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1505 // Color 2D
1506 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1507 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1508 UINT_32 i;
1509 // Fill in bottom x bits
1510 for (i = elementBytesLog2; i < 4; i++)
1511 {
1512 (*pDataEq)[i].add(cx);
1513 cx++;
1514 }
1515 // Fill in bottom y bits
1516 for (i = 4; i < 4 + microYBits; i++)
1517 {
1518 (*pDataEq)[i].add(cy);
1519 cy++;
1520 }
1521 // Fill in last of the micro_x bits
1522 for (i = 4 + microYBits; i < 8; i++)
1523 {
1524 (*pDataEq)[i].add(cx);
1525 cx++;
1526 }
1527 // Fill in x/y bits below sample split
1528 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1529 // Fill in sample bits
1530 for (i = 0; i < numSamplesLog2; i++)
1531 {
1532 cs.set(DIM_S, i);
1533 (*pDataEq)[tileSplitStart + i].add(cs);
1534 }
1535 // Fill in x/y bits above sample split
1536 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1537 {
1538 pDataEq->mort2d(cx, cy, blockSizeLog2);
1539 }
1540 else
1541 {
1542 pDataEq->mort2d(cy, cx, blockSizeLog2);
1543 }
1544 }
1545 else
1546 {
1547 ADDR_ASSERT_ALWAYS();
1548 }
1549 }
1550 else
1551 {
1552 // Fmask or depth
1553 UINT_32 sampleStart = elementBytesLog2;
1554 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1555 UINT_32 ymajStart = 6 + numSamplesLog2;
1556
1557 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1558 {
1559 cs.set(DIM_S, s);
1560 (*pDataEq)[sampleStart + s].add(cs);
1561 }
1562
1563 // Put in the x-major order pixel bits
1564 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1565 // Put in the y-major order pixel bits
1566 pDataEq->mort2d(cy, cx, ymajStart);
1567 }
1568 }
1569
1570 /**
1571 ************************************************************************************************************************
1572 * Gfx9Lib::GetPipeEquation
1573 *
1574 * @brief
1575 * Get pipe equation
1576 * @return
1577 * N/A
1578 ************************************************************************************************************************
1579 */
1580 VOID Gfx9Lib::GetPipeEquation(
1581 CoordEq* pPipeEq, ///< [out] pipe equation
1582 CoordEq* pDataEq, ///< [in] data equation
1583 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1584 UINT_32 numPipeLog2, ///< [in] number of pipes
1585 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1586 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1587 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1588 AddrResourceType resourceType ///< [in] data surface resource type
1589 ) const
1590 {
1591 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1592 CoordEq dataEq;
1593
1594 pDataEq->copy(dataEq);
1595
1596 if (dataSurfaceType == Gfx9DataColor)
1597 {
1598 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1599 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1600 }
1601
1602 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1603
1604 // This section should only apply to z/stencil, maybe fmask
1605 // If the pipe bit is below the comp block size,
1606 // then keep moving up the address until we find a bit that is above
1607 UINT_32 pipeStart = 0;
1608
1609 if (dataSurfaceType != Gfx9DataColor)
1610 {
1611 Coordinate tileMin(DIM_X, 3);
1612
1613 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1614 {
1615 pipeStart++;
1616 }
1617
1618 // if pipe is 0, then the first pipe bit is above the comp block size,
1619 // so we don't need to do anything
1620 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1621 // we will get the same pipe equation
1622 if (pipeStart != 0)
1623 {
1624 for (UINT_32 i = 0; i < numPipeLog2; i++)
1625 {
1626 // Copy the jth bit above pipe interleave to the current pipe equation bit
1627 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1628 }
1629 }
1630 }
1631
1632 if (IsPrt(swizzleMode))
1633 {
1634 // Clear out bits above the block size if prt's are enabled
1635 dataEq.resize(blockSizeLog2);
1636 dataEq.resize(48);
1637 }
1638
1639 if (IsXor(swizzleMode))
1640 {
1641 CoordEq xorMask;
1642
1643 if (IsThick(resourceType, swizzleMode))
1644 {
1645 CoordEq xorMask2;
1646
1647 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1648
1649 xorMask.resize(numPipeLog2);
1650
1651 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1652 {
1653 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1654 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1655 }
1656 }
1657 else
1658 {
1659 // Xor in the bits above the pipe+gpu bits
1660 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1661
1662 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1663 {
1664 Coordinate co;
1665 CoordEq xorMask2;
1666 // if 1xaa and not prt, then xor in the z bits
1667 xorMask2.resize(0);
1668 xorMask2.resize(numPipeLog2);
1669 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1670 {
1671 co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1672 xorMask2[pipeIdx].add(co);
1673 }
1674
1675 pPipeEq->xorin(xorMask2);
1676 }
1677 }
1678
1679 xorMask.reverse();
1680 pPipeEq->xorin(xorMask);
1681 }
1682 }
1683 /**
1684 ************************************************************************************************************************
1685 * Gfx9Lib::GetMetaEquation
1686 *
1687 * @brief
1688 * Get meta equation for cmask/htile/DCC
1689 * @return
1690 * Pointer to a calculated meta equation
1691 ************************************************************************************************************************
1692 */
1693 const CoordEq* Gfx9Lib::GetMetaEquation(
1694 const MetaEqParams& metaEqParams)
1695 {
1696 UINT_32 cachedMetaEqIndex;
1697
1698 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1699 {
1700 if (memcmp(&metaEqParams,
1701 &m_cachedMetaEqKey[cachedMetaEqIndex],
1702 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1703 {
1704 break;
1705 }
1706 }
1707
1708 CoordEq* pMetaEq = NULL;
1709
1710 if (cachedMetaEqIndex < MaxCachedMetaEq)
1711 {
1712 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1713 }
1714 else
1715 {
1716 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1717
1718 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1719
1720 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1721
1722 GenMetaEquation(pMetaEq,
1723 metaEqParams.maxMip,
1724 metaEqParams.elementBytesLog2,
1725 metaEqParams.numSamplesLog2,
1726 metaEqParams.metaFlag,
1727 metaEqParams.dataSurfaceType,
1728 metaEqParams.swizzleMode,
1729 metaEqParams.resourceType,
1730 metaEqParams.metaBlkWidthLog2,
1731 metaEqParams.metaBlkHeightLog2,
1732 metaEqParams.metaBlkDepthLog2,
1733 metaEqParams.compBlkWidthLog2,
1734 metaEqParams.compBlkHeightLog2,
1735 metaEqParams.compBlkDepthLog2);
1736 }
1737
1738 return pMetaEq;
1739 }
1740
1741 /**
1742 ************************************************************************************************************************
1743 * Gfx9Lib::GenMetaEquation
1744 *
1745 * @brief
1746 * Get meta equation for cmask/htile/DCC
1747 * @return
1748 * N/A
1749 ************************************************************************************************************************
1750 */
1751 VOID Gfx9Lib::GenMetaEquation(
1752 CoordEq* pMetaEq, ///< [out] meta equation
1753 UINT_32 maxMip, ///< [in] max mip Id
1754 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1755 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1756 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1757 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1758 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1759 AddrResourceType resourceType, ///< [in] data surface resource type
1760 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1761 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1762 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1763 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1764 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1765 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1766 const
1767 {
1768 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1769 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1770
1771 // Get the correct data address and rb equation
1772 CoordEq dataEq;
1773 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1774 elementBytesLog2, numSamplesLog2);
1775
1776 // Get pipe and rb equations
1777 CoordEq pipeEquation;
1778 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1779 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1780 numPipeTotalLog2 = pipeEquation.getsize();
1781
1782 if (metaFlag.linear)
1783 {
1784 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1785 ADDR_ASSERT_ALWAYS();
1786
1787 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1788
1789 dataEq.copy(*pMetaEq);
1790
1791 if (IsLinear(swizzleMode))
1792 {
1793 if (metaFlag.pipeAligned)
1794 {
1795 // Remove the pipe bits
1796 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1797 pMetaEq->shift(-shift, pipeInterleaveLog2);
1798 }
1799 // Divide by comp block size, which for linear (which is always color) is 256 B
1800 pMetaEq->shift(-8);
1801
1802 if (metaFlag.pipeAligned)
1803 {
1804 // Put pipe bits back in
1805 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1806
1807 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1808 {
1809 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1810 }
1811 }
1812 }
1813
1814 pMetaEq->shift(1);
1815 }
1816 else
1817 {
1818 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1819 UINT_32 compFragLog2 =
1820 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1821 maxCompFragLog2 : numSamplesLog2;
1822
1823 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1824
1825 // Make sure the metaaddr is cleared
1826 pMetaEq->resize(0);
1827 pMetaEq->resize(27);
1828
1829 if (IsThick(resourceType, swizzleMode))
1830 {
1831 Coordinate cx(DIM_X, 0);
1832 Coordinate cy(DIM_Y, 0);
1833 Coordinate cz(DIM_Z, 0);
1834
1835 if (maxMip > 0)
1836 {
1837 pMetaEq->mort3d(cy, cx, cz);
1838 }
1839 else
1840 {
1841 pMetaEq->mort3d(cx, cy, cz);
1842 }
1843 }
1844 else
1845 {
1846 Coordinate cx(DIM_X, 0);
1847 Coordinate cy(DIM_Y, 0);
1848 Coordinate cs;
1849
1850 if (maxMip > 0)
1851 {
1852 pMetaEq->mort2d(cy, cx, compFragLog2);
1853 }
1854 else
1855 {
1856 pMetaEq->mort2d(cx, cy, compFragLog2);
1857 }
1858
1859 //------------------------------------------------------------------------------------------------------------------------
1860 // Put the compressible fragments at the lsb
1861 // the uncompressible frags will be at the msb of the micro address
1862 //------------------------------------------------------------------------------------------------------------------------
1863 for (UINT_32 s = 0; s < compFragLog2; s++)
1864 {
1865 cs.set(DIM_S, s);
1866 (*pMetaEq)[s].add(cs);
1867 }
1868 }
1869
1870 // Keep a copy of the pipe equations
1871 CoordEq origPipeEquation;
1872 pipeEquation.copy(origPipeEquation);
1873
1874 Coordinate co;
1875 // filter out everything under the compressed block size
1876 co.set(DIM_X, compBlkWidthLog2);
1877 pMetaEq->Filter('<', co, 0, DIM_X);
1878 co.set(DIM_Y, compBlkHeightLog2);
1879 pMetaEq->Filter('<', co, 0, DIM_Y);
1880 co.set(DIM_Z, compBlkDepthLog2);
1881 pMetaEq->Filter('<', co, 0, DIM_Z);
1882
1883 // For non-color, filter out sample bits
1884 if (dataSurfaceType != Gfx9DataColor)
1885 {
1886 co.set(DIM_X, 0);
1887 pMetaEq->Filter('<', co, 0, DIM_S);
1888 }
1889
1890 // filter out everything above the metablock size
1891 co.set(DIM_X, metaBlkWidthLog2 - 1);
1892 pMetaEq->Filter('>', co, 0, DIM_X);
1893 co.set(DIM_Y, metaBlkHeightLog2 - 1);
1894 pMetaEq->Filter('>', co, 0, DIM_Y);
1895 co.set(DIM_Z, metaBlkDepthLog2 - 1);
1896 pMetaEq->Filter('>', co, 0, DIM_Z);
1897
1898 // filter out everything above the metablock size for the channel bits
1899 co.set(DIM_X, metaBlkWidthLog2 - 1);
1900 pipeEquation.Filter('>', co, 0, DIM_X);
1901 co.set(DIM_Y, metaBlkHeightLog2 - 1);
1902 pipeEquation.Filter('>', co, 0, DIM_Y);
1903 co.set(DIM_Z, metaBlkDepthLog2 - 1);
1904 pipeEquation.Filter('>', co, 0, DIM_Z);
1905
1906 // Make sure we still have the same number of channel bits
1907 if (pipeEquation.getsize() != numPipeTotalLog2)
1908 {
1909 ADDR_ASSERT_ALWAYS();
1910 }
1911
1912 // Loop through all channel and rb bits,
1913 // and make sure these components exist in the metadata address
1914 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1915 {
1916 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1917 {
1918 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1919 {
1920 ADDR_ASSERT_ALWAYS();
1921 }
1922 }
1923 }
1924
1925 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1926 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1927 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1928 CoordEq origRbEquation;
1929
1930 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1931
1932 CoordEq rbEquation = origRbEquation;
1933
1934 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1935 {
1936 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1937 {
1938 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1939 {
1940 ADDR_ASSERT_ALWAYS();
1941 }
1942 }
1943 }
1944
1945 if (m_settings.applyAliasFix)
1946 {
1947 co.set(DIM_Z, -1);
1948 }
1949
1950 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1951 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1952 {
1953 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1954 {
1955 BOOL_32 isRbEquationInPipeEquation = FALSE;
1956
1957 if (m_settings.applyAliasFix)
1958 {
1959 CoordTerm filteredPipeEq;
1960 filteredPipeEq = pipeEquation[j];
1961
1962 filteredPipeEq.Filter('>', co, 0, DIM_Z);
1963
1964 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1965 }
1966 else
1967 {
1968 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1969 }
1970
1971 if (isRbEquationInPipeEquation)
1972 {
1973 rbEquation[i].Clear();
1974 }
1975 }
1976 }
1977
1978 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1979
1980 // Loop through each bit of the channel, get the smallest coordinate,
1981 // and remove it from the metaaddr, and rb_equation
1982 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1983 {
1984 pipeEquation[i].getsmallest(co);
1985
1986 UINT_32 old_size = pMetaEq->getsize();
1987 pMetaEq->Filter('=', co);
1988 UINT_32 new_size = pMetaEq->getsize();
1989 if (new_size != old_size-1)
1990 {
1991 ADDR_ASSERT_ALWAYS();
1992 }
1993 pipeEquation.remove(co);
1994 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
1995 {
1996 if (rbEquation[j].remove(co))
1997 {
1998 // if we actually removed something from this bit, then add the remaining
1999 // channel bits, as these can be removed for this bit
2000 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2001 {
2002 if (pipeEquation[i][k] != co)
2003 {
2004 rbEquation[j].add(pipeEquation[i][k]);
2005 rbAppendedWithPipeBits[j] = true;
2006 }
2007 }
2008 }
2009 }
2010 }
2011
2012 // Loop through the rb bits and see what remain;
2013 // filter out the smallest coordinate if it remains
2014 UINT_32 rbBitsLeft = 0;
2015 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2016 {
2017 BOOL_32 isRbEqAppended = FALSE;
2018
2019 if (m_settings.applyAliasFix)
2020 {
2021 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2022 }
2023 else
2024 {
2025 isRbEqAppended = (rbEquation[i].getsize() > 0);
2026 }
2027
2028 if (isRbEqAppended)
2029 {
2030 rbBitsLeft++;
2031 rbEquation[i].getsmallest(co);
2032 UINT_32 old_size = pMetaEq->getsize();
2033 pMetaEq->Filter('=', co);
2034 UINT_32 new_size = pMetaEq->getsize();
2035 if (new_size != old_size - 1)
2036 {
2037 // assert warning
2038 }
2039 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2040 {
2041 if (rbEquation[j].remove(co))
2042 {
2043 // if we actually removed something from this bit, then add the remaining
2044 // rb bits, as these can be removed for this bit
2045 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2046 {
2047 if (rbEquation[i][k] != co)
2048 {
2049 rbEquation[j].add(rbEquation[i][k]);
2050 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2051 }
2052 }
2053 }
2054 }
2055 }
2056 }
2057
2058 // capture the size of the metaaddr
2059 UINT_32 metaSize = pMetaEq->getsize();
2060 // resize to 49 bits...make this a nibble address
2061 pMetaEq->resize(49);
2062 // Concatenate the macro address above the current address
2063 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2064 {
2065 co.set(DIM_M, j);
2066 (*pMetaEq)[i].add(co);
2067 }
2068
2069 // Multiply by meta element size (in nibbles)
2070 if (dataSurfaceType == Gfx9DataColor)
2071 {
2072 pMetaEq->shift(1);
2073 }
2074 else if (dataSurfaceType == Gfx9DataDepthStencil)
2075 {
2076 pMetaEq->shift(3);
2077 }
2078
2079 //------------------------------------------------------------------------------------------
2080 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2081 // Shift up from pipe interleave number of channel
2082 // and rb bits left, and uncompressed fragments
2083 //------------------------------------------------------------------------------------------
2084
2085 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2086
2087 // Put in the channel bits
2088 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2089 {
2090 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2091 }
2092
2093 // Put in remaining rb bits
2094 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2095 {
2096 BOOL_32 isRbEqAppended = FALSE;
2097
2098 if (m_settings.applyAliasFix)
2099 {
2100 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2101 }
2102 else
2103 {
2104 isRbEqAppended = (rbEquation[i].getsize() > 0);
2105 }
2106
2107 if (isRbEqAppended)
2108 {
2109 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2110 // Mark any rb bit we add in to the rb mask
2111 j++;
2112 }
2113 }
2114
2115 //------------------------------------------------------------------------------------------
2116 // Put in the uncompressed fragment bits
2117 //------------------------------------------------------------------------------------------
2118 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2119 {
2120 co.set(DIM_S, compFragLog2 + i);
2121 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2122 }
2123 }
2124 }
2125
2126 /**
2127 ************************************************************************************************************************
2128 * Gfx9Lib::IsEquationSupported
2129 *
2130 * @brief
2131 * Check if equation is supported for given swizzle mode and resource type.
2132 *
2133 * @return
2134 * TRUE if supported
2135 ************************************************************************************************************************
2136 */
2137 BOOL_32 Gfx9Lib::IsEquationSupported(
2138 AddrResourceType rsrcType,
2139 AddrSwizzleMode swMode,
2140 UINT_32 elementBytesLog2) const
2141 {
2142 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2143 (IsValidSwMode(swMode) == TRUE) &&
2144 (IsLinear(swMode) == FALSE) &&
2145 (((IsTex2d(rsrcType) == TRUE) &&
2146 ((elementBytesLog2 < 4) ||
2147 ((IsRotateSwizzle(swMode) == FALSE) &&
2148 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2149 ((IsTex3d(rsrcType) == TRUE) &&
2150 (IsRotateSwizzle(swMode) == FALSE) &&
2151 (IsBlock256b(swMode) == FALSE)));
2152
2153 return supported;
2154 }
2155
2156 /**
2157 ************************************************************************************************************************
2158 * Gfx9Lib::InitEquationTable
2159 *
2160 * @brief
2161 * Initialize Equation table.
2162 *
2163 * @return
2164 * N/A
2165 ************************************************************************************************************************
2166 */
2167 VOID Gfx9Lib::InitEquationTable()
2168 {
2169 memset(m_equationTable, 0, sizeof(m_equationTable));
2170
2171 // Loop all possible resource type (2D/3D)
2172 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2173 {
2174 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2175
2176 // Loop all possible swizzle mode
2177 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2178 {
2179 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2180
2181 // Loop all possible bpp
2182 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2183 {
2184 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2185
2186 // Check if the input is supported
2187 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2188 {
2189 ADDR_EQUATION equation;
2190 ADDR_E_RETURNCODE retCode;
2191
2192 memset(&equation, 0, sizeof(ADDR_EQUATION));
2193
2194 // Generate the equation
2195 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2196 {
2197 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2198 }
2199 else if (IsThin(rsrcType, swMode))
2200 {
2201 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2202 }
2203 else
2204 {
2205 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2206 }
2207
2208 // Only fill the equation into the table if the return code is ADDR_OK,
2209 // otherwise if the return code is not ADDR_OK, it indicates this is not
2210 // a valid input, we do nothing but just fill invalid equation index
2211 // into the lookup table.
2212 if (retCode == ADDR_OK)
2213 {
2214 equationIndex = m_numEquations;
2215 ADDR_ASSERT(equationIndex < EquationTableSize);
2216
2217 m_equationTable[equationIndex] = equation;
2218
2219 m_numEquations++;
2220 }
2221 else
2222 {
2223 ADDR_ASSERT_ALWAYS();
2224 }
2225 }
2226
2227 // Fill the index into the lookup table, if the combination is not supported
2228 // fill the invalid equation index
2229 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2230 }
2231 }
2232 }
2233 }
2234
2235 /**
2236 ************************************************************************************************************************
2237 * Gfx9Lib::HwlGetEquationIndex
2238 *
2239 * @brief
2240 * Interface function stub of GetEquationIndex
2241 *
2242 * @return
2243 * ADDR_E_RETURNCODE
2244 ************************************************************************************************************************
2245 */
2246 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2247 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2248 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2249 ) const
2250 {
2251 AddrResourceType rsrcType = pIn->resourceType;
2252 AddrSwizzleMode swMode = pIn->swizzleMode;
2253 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2254 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2255
2256 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2257 {
2258 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2259 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2260
2261 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2262 }
2263
2264 if (pOut->pMipInfo != NULL)
2265 {
2266 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2267 {
2268 pOut->pMipInfo[i].equationIndex = index;
2269 }
2270 }
2271
2272 return index;
2273 }
2274
2275 /**
2276 ************************************************************************************************************************
2277 * Gfx9Lib::HwlComputeBlock256Equation
2278 *
2279 * @brief
2280 * Interface function stub of ComputeBlock256Equation
2281 *
2282 * @return
2283 * ADDR_E_RETURNCODE
2284 ************************************************************************************************************************
2285 */
2286 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2287 AddrResourceType rsrcType,
2288 AddrSwizzleMode swMode,
2289 UINT_32 elementBytesLog2,
2290 ADDR_EQUATION* pEquation) const
2291 {
2292 ADDR_E_RETURNCODE ret = ADDR_OK;
2293
2294 pEquation->numBits = 8;
2295
2296 UINT_32 i = 0;
2297 for (; i < elementBytesLog2; i++)
2298 {
2299 InitChannel(1, 0 , i, &pEquation->addr[i]);
2300 }
2301
2302 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2303
2304 const UINT_32 maxBitsUsed = 4;
2305 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2306 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2307
2308 for (i = 0; i < maxBitsUsed; i++)
2309 {
2310 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2311 InitChannel(1, 1, i, &y[i]);
2312 }
2313
2314 if (IsStandardSwizzle(rsrcType, swMode))
2315 {
2316 switch (elementBytesLog2)
2317 {
2318 case 0:
2319 pixelBit[0] = x[0];
2320 pixelBit[1] = x[1];
2321 pixelBit[2] = x[2];
2322 pixelBit[3] = x[3];
2323 pixelBit[4] = y[0];
2324 pixelBit[5] = y[1];
2325 pixelBit[6] = y[2];
2326 pixelBit[7] = y[3];
2327 break;
2328 case 1:
2329 pixelBit[0] = x[0];
2330 pixelBit[1] = x[1];
2331 pixelBit[2] = x[2];
2332 pixelBit[3] = y[0];
2333 pixelBit[4] = y[1];
2334 pixelBit[5] = y[2];
2335 pixelBit[6] = x[3];
2336 break;
2337 case 2:
2338 pixelBit[0] = x[0];
2339 pixelBit[1] = x[1];
2340 pixelBit[2] = y[0];
2341 pixelBit[3] = y[1];
2342 pixelBit[4] = y[2];
2343 pixelBit[5] = x[2];
2344 break;
2345 case 3:
2346 pixelBit[0] = x[0];
2347 pixelBit[1] = y[0];
2348 pixelBit[2] = y[1];
2349 pixelBit[3] = x[1];
2350 pixelBit[4] = x[2];
2351 break;
2352 case 4:
2353 pixelBit[0] = y[0];
2354 pixelBit[1] = y[1];
2355 pixelBit[2] = x[0];
2356 pixelBit[3] = x[1];
2357 break;
2358 default:
2359 ADDR_ASSERT_ALWAYS();
2360 ret = ADDR_INVALIDPARAMS;
2361 break;
2362 }
2363 }
2364 else if (IsDisplaySwizzle(rsrcType, swMode))
2365 {
2366 switch (elementBytesLog2)
2367 {
2368 case 0:
2369 pixelBit[0] = x[0];
2370 pixelBit[1] = x[1];
2371 pixelBit[2] = x[2];
2372 pixelBit[3] = y[1];
2373 pixelBit[4] = y[0];
2374 pixelBit[5] = y[2];
2375 pixelBit[6] = x[3];
2376 pixelBit[7] = y[3];
2377 break;
2378 case 1:
2379 pixelBit[0] = x[0];
2380 pixelBit[1] = x[1];
2381 pixelBit[2] = x[2];
2382 pixelBit[3] = y[0];
2383 pixelBit[4] = y[1];
2384 pixelBit[5] = y[2];
2385 pixelBit[6] = x[3];
2386 break;
2387 case 2:
2388 pixelBit[0] = x[0];
2389 pixelBit[1] = x[1];
2390 pixelBit[2] = y[0];
2391 pixelBit[3] = x[2];
2392 pixelBit[4] = y[1];
2393 pixelBit[5] = y[2];
2394 break;
2395 case 3:
2396 pixelBit[0] = x[0];
2397 pixelBit[1] = y[0];
2398 pixelBit[2] = x[1];
2399 pixelBit[3] = x[2];
2400 pixelBit[4] = y[1];
2401 break;
2402 case 4:
2403 pixelBit[0] = x[0];
2404 pixelBit[1] = y[0];
2405 pixelBit[2] = x[1];
2406 pixelBit[3] = y[1];
2407 break;
2408 default:
2409 ADDR_ASSERT_ALWAYS();
2410 ret = ADDR_INVALIDPARAMS;
2411 break;
2412 }
2413 }
2414 else if (IsRotateSwizzle(swMode))
2415 {
2416 switch (elementBytesLog2)
2417 {
2418 case 0:
2419 pixelBit[0] = y[0];
2420 pixelBit[1] = y[1];
2421 pixelBit[2] = y[2];
2422 pixelBit[3] = x[1];
2423 pixelBit[4] = x[0];
2424 pixelBit[5] = x[2];
2425 pixelBit[6] = x[3];
2426 pixelBit[7] = y[3];
2427 break;
2428 case 1:
2429 pixelBit[0] = y[0];
2430 pixelBit[1] = y[1];
2431 pixelBit[2] = y[2];
2432 pixelBit[3] = x[0];
2433 pixelBit[4] = x[1];
2434 pixelBit[5] = x[2];
2435 pixelBit[6] = x[3];
2436 break;
2437 case 2:
2438 pixelBit[0] = y[0];
2439 pixelBit[1] = y[1];
2440 pixelBit[2] = x[0];
2441 pixelBit[3] = y[2];
2442 pixelBit[4] = x[1];
2443 pixelBit[5] = x[2];
2444 break;
2445 case 3:
2446 pixelBit[0] = y[0];
2447 pixelBit[1] = x[0];
2448 pixelBit[2] = y[1];
2449 pixelBit[3] = x[1];
2450 pixelBit[4] = x[2];
2451 break;
2452 default:
2453 ADDR_ASSERT_ALWAYS();
2454 case 4:
2455 ret = ADDR_INVALIDPARAMS;
2456 break;
2457 }
2458 }
2459 else
2460 {
2461 ADDR_ASSERT_ALWAYS();
2462 ret = ADDR_INVALIDPARAMS;
2463 }
2464
2465 // Post validation
2466 if (ret == ADDR_OK)
2467 {
2468 ASSERTED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2469 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2470 (microBlockDim.w * (1 << elementBytesLog2)));
2471 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2472 }
2473
2474 return ret;
2475 }
2476
2477 /**
2478 ************************************************************************************************************************
2479 * Gfx9Lib::HwlComputeThinEquation
2480 *
2481 * @brief
2482 * Interface function stub of ComputeThinEquation
2483 *
2484 * @return
2485 * ADDR_E_RETURNCODE
2486 ************************************************************************************************************************
2487 */
2488 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2489 AddrResourceType rsrcType,
2490 AddrSwizzleMode swMode,
2491 UINT_32 elementBytesLog2,
2492 ADDR_EQUATION* pEquation) const
2493 {
2494 ADDR_E_RETURNCODE ret = ADDR_OK;
2495
2496 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2497
2498 UINT_32 maxXorBits = blockSizeLog2;
2499 if (IsNonPrtXor(swMode))
2500 {
2501 // For non-prt-xor, maybe need to initialize some more bits for xor
2502 // The highest xor bit used in equation will be max the following 3 items:
2503 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2504 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2505 // 3. blockSizeLog2
2506
2507 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2508 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2509 GetPipeXorBits(blockSizeLog2) +
2510 2 * GetBankXorBits(blockSizeLog2));
2511 }
2512
2513 const UINT_32 maxBitsUsed = 14;
2514 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2515 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2516 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2517
2518 const UINT_32 extraXorBits = 16;
2519 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2520 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2521
2522 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2523 {
2524 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2525 InitChannel(1, 1, i, &y[i]);
2526 }
2527
2528 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2529
2530 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2531 {
2532 InitChannel(1, 0 , i, &pixelBit[i]);
2533 }
2534
2535 UINT_32 xIdx = 0;
2536 UINT_32 yIdx = 0;
2537 UINT_32 lowBits = 0;
2538
2539 if (IsZOrderSwizzle(swMode))
2540 {
2541 if (elementBytesLog2 <= 3)
2542 {
2543 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2544 {
2545 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2546 }
2547
2548 lowBits = 6;
2549 }
2550 else
2551 {
2552 ret = ADDR_INVALIDPARAMS;
2553 }
2554 }
2555 else
2556 {
2557 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2558
2559 if (ret == ADDR_OK)
2560 {
2561 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2562 xIdx = Log2(microBlockDim.w);
2563 yIdx = Log2(microBlockDim.h);
2564 lowBits = 8;
2565 }
2566 }
2567
2568 if (ret == ADDR_OK)
2569 {
2570 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2571 {
2572 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2573 }
2574
2575 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2576 {
2577 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2578 }
2579
2580 if (IsXor(swMode))
2581 {
2582 // Fill XOR bits
2583 UINT_32 pipeStart = m_pipeInterleaveLog2;
2584 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2585
2586 UINT_32 bankStart = pipeStart + pipeXorBits;
2587 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2588
2589 for (UINT_32 i = 0; i < pipeXorBits; i++)
2590 {
2591 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2592 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2593 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2594
2595 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2596 }
2597
2598 for (UINT_32 i = 0; i < bankXorBits; i++)
2599 {
2600 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2601 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2602 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2603
2604 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2605 }
2606
2607 if (IsPrt(swMode) == FALSE)
2608 {
2609 for (UINT_32 i = 0; i < pipeXorBits; i++)
2610 {
2611 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2612 }
2613
2614 for (UINT_32 i = 0; i < bankXorBits; i++)
2615 {
2616 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2617 }
2618 }
2619 }
2620
2621 pEquation->numBits = blockSizeLog2;
2622 }
2623
2624 return ret;
2625 }
2626
2627 /**
2628 ************************************************************************************************************************
2629 * Gfx9Lib::HwlComputeThickEquation
2630 *
2631 * @brief
2632 * Interface function stub of ComputeThickEquation
2633 *
2634 * @return
2635 * ADDR_E_RETURNCODE
2636 ************************************************************************************************************************
2637 */
2638 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2639 AddrResourceType rsrcType,
2640 AddrSwizzleMode swMode,
2641 UINT_32 elementBytesLog2,
2642 ADDR_EQUATION* pEquation) const
2643 {
2644 ADDR_E_RETURNCODE ret = ADDR_OK;
2645
2646 ADDR_ASSERT(IsTex3d(rsrcType));
2647
2648 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2649
2650 UINT_32 maxXorBits = blockSizeLog2;
2651 if (IsNonPrtXor(swMode))
2652 {
2653 // For non-prt-xor, maybe need to initialize some more bits for xor
2654 // The highest xor bit used in equation will be max the following 3:
2655 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2656 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2657 // 3. blockSizeLog2
2658
2659 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2660 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2661 GetPipeXorBits(blockSizeLog2) +
2662 3 * GetBankXorBits(blockSizeLog2));
2663 }
2664
2665 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2666 {
2667 InitChannel(1, 0 , i, &pEquation->addr[i]);
2668 }
2669
2670 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2671
2672 const UINT_32 maxBitsUsed = 12;
2673 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2674 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2675 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2676 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2677
2678 const UINT_32 extraXorBits = 24;
2679 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2680 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2681
2682 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2683 {
2684 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2685 InitChannel(1, 1, i, &y[i]);
2686 InitChannel(1, 2, i, &z[i]);
2687 }
2688
2689 if (IsZOrderSwizzle(swMode))
2690 {
2691 switch (elementBytesLog2)
2692 {
2693 case 0:
2694 pixelBit[0] = x[0];
2695 pixelBit[1] = y[0];
2696 pixelBit[2] = x[1];
2697 pixelBit[3] = y[1];
2698 pixelBit[4] = z[0];
2699 pixelBit[5] = z[1];
2700 pixelBit[6] = x[2];
2701 pixelBit[7] = z[2];
2702 pixelBit[8] = y[2];
2703 pixelBit[9] = x[3];
2704 break;
2705 case 1:
2706 pixelBit[0] = x[0];
2707 pixelBit[1] = y[0];
2708 pixelBit[2] = x[1];
2709 pixelBit[3] = y[1];
2710 pixelBit[4] = z[0];
2711 pixelBit[5] = z[1];
2712 pixelBit[6] = z[2];
2713 pixelBit[7] = y[2];
2714 pixelBit[8] = x[2];
2715 break;
2716 case 2:
2717 pixelBit[0] = x[0];
2718 pixelBit[1] = y[0];
2719 pixelBit[2] = x[1];
2720 pixelBit[3] = z[0];
2721 pixelBit[4] = y[1];
2722 pixelBit[5] = z[1];
2723 pixelBit[6] = y[2];
2724 pixelBit[7] = x[2];
2725 break;
2726 case 3:
2727 pixelBit[0] = x[0];
2728 pixelBit[1] = y[0];
2729 pixelBit[2] = z[0];
2730 pixelBit[3] = x[1];
2731 pixelBit[4] = z[1];
2732 pixelBit[5] = y[1];
2733 pixelBit[6] = x[2];
2734 break;
2735 case 4:
2736 pixelBit[0] = x[0];
2737 pixelBit[1] = y[0];
2738 pixelBit[2] = z[0];
2739 pixelBit[3] = z[1];
2740 pixelBit[4] = y[1];
2741 pixelBit[5] = x[1];
2742 break;
2743 default:
2744 ADDR_ASSERT_ALWAYS();
2745 ret = ADDR_INVALIDPARAMS;
2746 break;
2747 }
2748 }
2749 else if (IsStandardSwizzle(rsrcType, swMode))
2750 {
2751 switch (elementBytesLog2)
2752 {
2753 case 0:
2754 pixelBit[0] = x[0];
2755 pixelBit[1] = x[1];
2756 pixelBit[2] = x[2];
2757 pixelBit[3] = x[3];
2758 pixelBit[4] = y[0];
2759 pixelBit[5] = y[1];
2760 pixelBit[6] = z[0];
2761 pixelBit[7] = z[1];
2762 pixelBit[8] = z[2];
2763 pixelBit[9] = y[2];
2764 break;
2765 case 1:
2766 pixelBit[0] = x[0];
2767 pixelBit[1] = x[1];
2768 pixelBit[2] = x[2];
2769 pixelBit[3] = y[0];
2770 pixelBit[4] = y[1];
2771 pixelBit[5] = z[0];
2772 pixelBit[6] = z[1];
2773 pixelBit[7] = z[2];
2774 pixelBit[8] = y[2];
2775 break;
2776 case 2:
2777 pixelBit[0] = x[0];
2778 pixelBit[1] = x[1];
2779 pixelBit[2] = y[0];
2780 pixelBit[3] = y[1];
2781 pixelBit[4] = z[0];
2782 pixelBit[5] = z[1];
2783 pixelBit[6] = y[2];
2784 pixelBit[7] = x[2];
2785 break;
2786 case 3:
2787 pixelBit[0] = x[0];
2788 pixelBit[1] = y[0];
2789 pixelBit[2] = y[1];
2790 pixelBit[3] = z[0];
2791 pixelBit[4] = z[1];
2792 pixelBit[5] = x[1];
2793 pixelBit[6] = x[2];
2794 break;
2795 case 4:
2796 pixelBit[0] = y[0];
2797 pixelBit[1] = y[1];
2798 pixelBit[2] = z[0];
2799 pixelBit[3] = z[1];
2800 pixelBit[4] = x[0];
2801 pixelBit[5] = x[1];
2802 break;
2803 default:
2804 ADDR_ASSERT_ALWAYS();
2805 ret = ADDR_INVALIDPARAMS;
2806 break;
2807 }
2808 }
2809 else
2810 {
2811 ADDR_ASSERT_ALWAYS();
2812 ret = ADDR_INVALIDPARAMS;
2813 }
2814
2815 if (ret == ADDR_OK)
2816 {
2817 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2818 UINT_32 xIdx = Log2(microBlockDim.w);
2819 UINT_32 yIdx = Log2(microBlockDim.h);
2820 UINT_32 zIdx = Log2(microBlockDim.d);
2821
2822 pixelBit = pEquation->addr;
2823
2824 const UINT_32 lowBits = 10;
2825 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2826 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2827
2828 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2829 {
2830 if ((i % 3) == 0)
2831 {
2832 pixelBit[i] = x[xIdx++];
2833 }
2834 else if ((i % 3) == 1)
2835 {
2836 pixelBit[i] = z[zIdx++];
2837 }
2838 else
2839 {
2840 pixelBit[i] = y[yIdx++];
2841 }
2842 }
2843
2844 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2845 {
2846 if ((i % 3) == 0)
2847 {
2848 xorExtra[i - blockSizeLog2] = x[xIdx++];
2849 }
2850 else if ((i % 3) == 1)
2851 {
2852 xorExtra[i - blockSizeLog2] = z[zIdx++];
2853 }
2854 else
2855 {
2856 xorExtra[i - blockSizeLog2] = y[yIdx++];
2857 }
2858 }
2859
2860 if (IsXor(swMode))
2861 {
2862 // Fill XOR bits
2863 UINT_32 pipeStart = m_pipeInterleaveLog2;
2864 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2865 for (UINT_32 i = 0; i < pipeXorBits; i++)
2866 {
2867 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2868 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2869 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2870
2871 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2872
2873 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2874 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2875 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2876
2877 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2878 }
2879
2880 UINT_32 bankStart = pipeStart + pipeXorBits;
2881 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2882 for (UINT_32 i = 0; i < bankXorBits; i++)
2883 {
2884 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2885 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2886 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2887
2888 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2889
2890 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2891 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2892 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2893
2894 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2895 }
2896 }
2897
2898 pEquation->numBits = blockSizeLog2;
2899 }
2900
2901 return ret;
2902 }
2903
2904 /**
2905 ************************************************************************************************************************
2906 * Gfx9Lib::IsValidDisplaySwizzleMode
2907 *
2908 * @brief
2909 * Check if a swizzle mode is supported by display engine
2910 *
2911 * @return
2912 * TRUE is swizzle mode is supported by display engine
2913 ************************************************************************************************************************
2914 */
2915 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2916 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2917 {
2918 BOOL_32 support = FALSE;
2919
2920 if (m_settings.isDce12)
2921 {
2922 switch (pIn->swizzleMode)
2923 {
2924 case ADDR_SW_256B_D:
2925 case ADDR_SW_256B_R:
2926 support = (pIn->bpp == 32);
2927 break;
2928
2929 case ADDR_SW_LINEAR:
2930 case ADDR_SW_4KB_D:
2931 case ADDR_SW_4KB_R:
2932 case ADDR_SW_64KB_D:
2933 case ADDR_SW_64KB_R:
2934 case ADDR_SW_4KB_D_X:
2935 case ADDR_SW_4KB_R_X:
2936 case ADDR_SW_64KB_D_X:
2937 case ADDR_SW_64KB_R_X:
2938 support = (pIn->bpp <= 64);
2939 break;
2940
2941 default:
2942 break;
2943 }
2944 }
2945 else if (m_settings.isDcn1)
2946 {
2947 switch (pIn->swizzleMode)
2948 {
2949 case ADDR_SW_4KB_D:
2950 case ADDR_SW_64KB_D:
2951 case ADDR_SW_64KB_D_T:
2952 case ADDR_SW_4KB_D_X:
2953 case ADDR_SW_64KB_D_X:
2954 support = (pIn->bpp == 64);
2955 break;
2956
2957 case ADDR_SW_LINEAR:
2958 case ADDR_SW_4KB_S:
2959 case ADDR_SW_64KB_S:
2960 case ADDR_SW_64KB_S_T:
2961 case ADDR_SW_4KB_S_X:
2962 case ADDR_SW_64KB_S_X:
2963 support = (pIn->bpp <= 64);
2964 break;
2965
2966 default:
2967 break;
2968 }
2969 }
2970 else
2971 {
2972 ADDR_NOT_IMPLEMENTED();
2973 }
2974
2975 return support;
2976 }
2977
2978 /**
2979 ************************************************************************************************************************
2980 * Gfx9Lib::HwlComputePipeBankXor
2981 *
2982 * @brief
2983 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2984 *
2985 * @return
2986 * PipeBankXor value
2987 ************************************************************************************************************************
2988 */
2989 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
2990 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
2991 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
2992 {
2993 if (IsXor(pIn->swizzleMode))
2994 {
2995 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2996 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
2997 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
2998
2999 UINT_32 pipeXor = 0;
3000 UINT_32 bankXor = 0;
3001
3002 const UINT_32 bankMask = (1 << bankBits) - 1;
3003 const UINT_32 index = pIn->surfIndex & bankMask;
3004
3005 const UINT_32 bpp = pIn->flags.fmask ?
3006 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3007 if (bankBits == 4)
3008 {
3009 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3010 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3011
3012 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3013 }
3014 else if (bankBits > 0)
3015 {
3016 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3017 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3018 bankXor = (index * bankIncrease) & bankMask;
3019 }
3020
3021 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3022 }
3023 else
3024 {
3025 pOut->pipeBankXor = 0;
3026 }
3027
3028 return ADDR_OK;
3029 }
3030
3031 /**
3032 ************************************************************************************************************************
3033 * Gfx9Lib::HwlComputeSlicePipeBankXor
3034 *
3035 * @brief
3036 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3037 *
3038 * @return
3039 * PipeBankXor value
3040 ************************************************************************************************************************
3041 */
3042 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3043 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3044 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3045 {
3046 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3047 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3048 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3049
3050 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3051 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3052
3053 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3054
3055 return ADDR_OK;
3056 }
3057
3058 /**
3059 ************************************************************************************************************************
3060 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3061 *
3062 * @brief
3063 * Compute sub resource offset to support swizzle pattern
3064 *
3065 * @return
3066 * Offset
3067 ************************************************************************************************************************
3068 */
3069 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3070 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3071 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3072 {
3073 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3074
3075 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3076 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3077 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3078 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3079 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3080 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3081
3082 pOut->offset = pIn->slice * pIn->sliceSize +
3083 pIn->macroBlockOffset +
3084 (pIn->mipTailOffset ^ pipeBankXor) -
3085 static_cast<UINT_64>(pipeBankXor);
3086 return ADDR_OK;
3087 }
3088
3089 /**
3090 ************************************************************************************************************************
3091 * Gfx9Lib::ValidateNonSwModeParams
3092 *
3093 * @brief
3094 * Validate compute surface info params except swizzle mode
3095 *
3096 * @return
3097 * TRUE if parameters are valid, FALSE otherwise
3098 ************************************************************************************************************************
3099 */
3100 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3101 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3102 {
3103 BOOL_32 valid = TRUE;
3104
3105 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3106 {
3107 ADDR_ASSERT_ALWAYS();
3108 valid = FALSE;
3109 }
3110
3111 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3112 {
3113 ADDR_ASSERT_ALWAYS();
3114 valid = FALSE;
3115 }
3116
3117 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3118 const BOOL_32 msaa = (pIn->numFrags > 1);
3119 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3120
3121 const AddrResourceType rsrcType = pIn->resourceType;
3122 const BOOL_32 tex3d = IsTex3d(rsrcType);
3123 const BOOL_32 tex2d = IsTex2d(rsrcType);
3124 const BOOL_32 tex1d = IsTex1d(rsrcType);
3125
3126 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3127 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3128 const BOOL_32 display = flags.display || flags.rotated;
3129 const BOOL_32 stereo = flags.qbStereo;
3130 const BOOL_32 fmask = flags.fmask;
3131
3132 // Resource type check
3133 if (tex1d)
3134 {
3135 if (msaa || zbuffer || display || stereo || isBc || fmask)
3136 {
3137 ADDR_ASSERT_ALWAYS();
3138 valid = FALSE;
3139 }
3140 }
3141 else if (tex2d)
3142 {
3143 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3144 {
3145 ADDR_ASSERT_ALWAYS();
3146 valid = FALSE;
3147 }
3148 }
3149 else if (tex3d)
3150 {
3151 if (msaa || zbuffer || display || stereo || fmask)
3152 {
3153 ADDR_ASSERT_ALWAYS();
3154 valid = FALSE;
3155 }
3156 }
3157 else
3158 {
3159 ADDR_ASSERT_ALWAYS();
3160 valid = FALSE;
3161 }
3162
3163 return valid;
3164 }
3165
3166 /**
3167 ************************************************************************************************************************
3168 * Gfx9Lib::ValidateSwModeParams
3169 *
3170 * @brief
3171 * Validate compute surface info related to swizzle mode
3172 *
3173 * @return
3174 * TRUE if parameters are valid, FALSE otherwise
3175 ************************************************************************************************************************
3176 */
3177 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3178 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3179 {
3180 BOOL_32 valid = TRUE;
3181
3182 if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3183 {
3184 ADDR_ASSERT_ALWAYS();
3185 valid = FALSE;
3186 }
3187
3188 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3189 const BOOL_32 msaa = (pIn->numFrags > 1);
3190 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3191 const BOOL_32 is422 = ElemLib::IsMacroPixelPacked(pIn->format);
3192
3193 const AddrResourceType rsrcType = pIn->resourceType;
3194 const BOOL_32 tex3d = IsTex3d(rsrcType);
3195 const BOOL_32 tex2d = IsTex2d(rsrcType);
3196 const BOOL_32 tex1d = IsTex1d(rsrcType);
3197
3198 const AddrSwizzleMode swizzle = pIn->swizzleMode;
3199 const BOOL_32 linear = IsLinear(swizzle);
3200 const BOOL_32 blk256B = IsBlock256b(swizzle);
3201 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3202
3203 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3204 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3205 const BOOL_32 color = flags.color;
3206 const BOOL_32 texture = flags.texture;
3207 const BOOL_32 display = flags.display || flags.rotated;
3208 const BOOL_32 prt = flags.prt;
3209 const BOOL_32 fmask = flags.fmask;
3210
3211 const BOOL_32 thin3d = tex3d && flags.view3dAs2dArray;
3212 const BOOL_32 zMaxMip = tex3d && mipmap &&
3213 (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3214
3215 // Misc check
3216 if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3217 {
3218 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3219 ADDR_ASSERT_ALWAYS();
3220 valid = FALSE;
3221 }
3222
3223 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3224 {
3225 ADDR_ASSERT_ALWAYS();
3226 valid = FALSE;
3227 }
3228
3229 if ((pIn->bpp == 96) && (linear == FALSE))
3230 {
3231 ADDR_ASSERT_ALWAYS();
3232 valid = FALSE;
3233 }
3234
3235 if (prt && isNonPrtXor)
3236 {
3237 ADDR_ASSERT_ALWAYS();
3238 valid = FALSE;
3239 }
3240
3241 // Resource type check
3242 if (tex1d)
3243 {
3244 if (linear == FALSE)
3245 {
3246 ADDR_ASSERT_ALWAYS();
3247 valid = FALSE;
3248 }
3249 }
3250
3251 // Swizzle type check
3252 if (linear)
3253 {
3254 if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3255 ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3256 {
3257 ADDR_ASSERT_ALWAYS();
3258 valid = FALSE;
3259 }
3260 }
3261 else if (IsZOrderSwizzle(swizzle))
3262 {
3263 if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3264 {
3265 ADDR_ASSERT_ALWAYS();
3266 valid = FALSE;
3267 }
3268 }
3269 else if (IsStandardSwizzle(swizzle))
3270 {
3271 if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3272 {
3273 ADDR_ASSERT_ALWAYS();
3274 valid = FALSE;
3275 }
3276 }
3277 else if (IsDisplaySwizzle(swizzle))
3278 {
3279 if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3280 {
3281 ADDR_ASSERT_ALWAYS();
3282 valid = FALSE;
3283 }
3284 }
3285 else if (IsRotateSwizzle(swizzle))
3286 {
3287 if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3288 {
3289 ADDR_ASSERT_ALWAYS();
3290 valid = FALSE;
3291 }
3292 }
3293 else
3294 {
3295 ADDR_ASSERT_ALWAYS();
3296 valid = FALSE;
3297 }
3298
3299 // Block type check
3300 if (blk256B)
3301 {
3302 if (prt || zbuffer || tex3d || mipmap || msaa)
3303 {
3304 ADDR_ASSERT_ALWAYS();
3305 valid = FALSE;
3306 }
3307 }
3308
3309 return valid;
3310 }
3311
3312 /**
3313 ************************************************************************************************************************
3314 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3315 *
3316 * @brief
3317 * Compute surface info sanity check
3318 *
3319 * @return
3320 * ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3321 ************************************************************************************************************************
3322 */
3323 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3324 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3325 {
3326 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3327 }
3328
3329 /**
3330 ************************************************************************************************************************
3331 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3332 *
3333 * @brief
3334 * Internal function to get suggested surface information for cliet to use
3335 *
3336 * @return
3337 * ADDR_E_RETURNCODE
3338 ************************************************************************************************************************
3339 */
3340 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3341 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3342 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3343 {
3344 ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3345 ElemLib* pElemLib = GetElemLib();
3346
3347 UINT_32 bpp = pIn->bpp;
3348 UINT_32 width = Max(pIn->width, 1u);
3349 UINT_32 height = Max(pIn->height, 1u);
3350 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3351 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3352
3353 if (pIn->flags.fmask)
3354 {
3355 bpp = GetFmaskBpp(numSamples, numFrags);
3356 numFrags = 1;
3357 numSamples = 1;
3358 pOut->resourceType = ADDR_RSRC_TEX_2D;
3359 }
3360 else
3361 {
3362 // Set format to INVALID will skip this conversion
3363 if (pIn->format != ADDR_FMT_INVALID)
3364 {
3365 UINT_32 expandX, expandY;
3366
3367 // Don't care for this case
3368 ElemMode elemMode = ADDR_UNCOMPRESSED;
3369
3370 // Get compression/expansion factors and element mode which indicates compression/expansion
3371 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3372 &elemMode,
3373 &expandX,
3374 &expandY);
3375
3376 UINT_32 basePitch = 0;
3377 GetElemLib()->AdjustSurfaceInfo(elemMode,
3378 expandX,
3379 expandY,
3380 &bpp,
3381 &basePitch,
3382 &width,
3383 &height);
3384 }
3385
3386 // The output may get changed for volume(3D) texture resource in future
3387 pOut->resourceType = pIn->resourceType;
3388 }
3389
3390 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3391 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3392 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
3393 const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated;
3394
3395 // Pre sanity check on non swizzle mode parameters
3396 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3397 localIn.flags = pIn->flags;
3398 localIn.resourceType = pOut->resourceType;
3399 localIn.format = pIn->format;
3400 localIn.bpp = bpp;
3401 localIn.width = width;
3402 localIn.height = height;
3403 localIn.numSlices = numSlices;
3404 localIn.numMipLevels = numMipLevels;
3405 localIn.numSamples = numSamples;
3406 localIn.numFrags = numFrags;
3407
3408 if (ValidateNonSwModeParams(&localIn))
3409 {
3410 // Forbid swizzle mode(s) by client setting
3411 ADDR2_SWMODE_SET allowedSwModeSet = {};
3412 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3413 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
3414 allowedSwModeSet.value |=
3415 pIn->forbiddenBlock.macroThin4KB ? 0 :
3416 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3417 allowedSwModeSet.value |=
3418 pIn->forbiddenBlock.macroThick4KB ? 0 :
3419 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3420 allowedSwModeSet.value |=
3421 pIn->forbiddenBlock.macroThin64KB ? 0 :
3422 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3423 allowedSwModeSet.value |=
3424 pIn->forbiddenBlock.macroThick64KB ? 0 :
3425 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3426
3427 if (pIn->preferredSwSet.value != 0)
3428 {
3429 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3430 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3431 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3432 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3433 }
3434
3435 if (pIn->noXor)
3436 {
3437 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3438 }
3439
3440 if (pIn->maxAlign > 0)
3441 {
3442 if (pIn->maxAlign < Size64K)
3443 {
3444 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3445 }
3446
3447 if (pIn->maxAlign < Size4K)
3448 {
3449 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3450 }
3451
3452 if (pIn->maxAlign < Size256)
3453 {
3454 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3455 }
3456 }
3457
3458 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3459 switch (pOut->resourceType)
3460 {
3461 case ADDR_RSRC_TEX_1D:
3462 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3463 break;
3464
3465 case ADDR_RSRC_TEX_2D:
3466 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3467
3468 if (bpp > 64)
3469 {
3470 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3471 }
3472 break;
3473
3474 case ADDR_RSRC_TEX_3D:
3475 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3476
3477 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3478 {
3479 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3480 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3481 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3482 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3483 }
3484
3485 if ((bpp == 128) && pIn->flags.color)
3486 {
3487 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3488 }
3489
3490 if (pIn->flags.view3dAs2dArray)
3491 {
3492 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3493 }
3494 break;
3495
3496 default:
3497 ADDR_ASSERT_ALWAYS();
3498 allowedSwModeSet.value = 0;
3499 break;
3500 }
3501
3502 if (pIn->format == ADDR_FMT_32_32_32)
3503 {
3504 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3505 }
3506
3507 if (ElemLib::IsBlockCompressed(pIn->format))
3508 {
3509 if (pIn->flags.texture)
3510 {
3511 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3512 }
3513 else
3514 {
3515 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3516 }
3517 }
3518
3519 if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3520 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3521 {
3522 allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3523 }
3524
3525 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3526 {
3527 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3528
3529 if (pIn->flags.noMetadata == FALSE)
3530 {
3531 if (pIn->flags.depth &&
3532 pIn->flags.texture &&
3533 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3534 {
3535 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3536 // equation from wrong address within memory range a tile covered and use the
3537 // garbage data for compressed Z reading which finally leads to corruption.
3538 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3539 }
3540
3541 if (m_settings.htileCacheRbConflict &&
3542 (pIn->flags.depth || pIn->flags.stencil) &&
3543 (numSlices > 1) &&
3544 (pIn->flags.metaRbUnaligned == FALSE) &&
3545 (pIn->flags.metaPipeUnaligned == FALSE))
3546 {
3547 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3548 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3549 }
3550 }
3551 }
3552
3553 if (msaa)
3554 {
3555 allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3556 }
3557
3558 if ((numFrags > 1) &&
3559 (Size4K < (m_pipeInterleaveBytes * numFrags)))
3560 {
3561 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3562 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3563 }
3564
3565 if (numMipLevels > 1)
3566 {
3567 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3568 }
3569
3570 if (displayRsrc)
3571 {
3572 if (m_settings.isDce12)
3573 {
3574 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3575 }
3576 else if (m_settings.isDcn1)
3577 {
3578 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3579 }
3580 else
3581 {
3582 ADDR_NOT_IMPLEMENTED();
3583 }
3584 }
3585
3586 if (allowedSwModeSet.value != 0)
3587 {
3588 #if DEBUG
3589 // Post sanity check, at least AddrLib should accept the output generated by its own
3590 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3591
3592 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3593 {
3594 if (validateSwModeSet & 1)
3595 {
3596 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3597 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3598 }
3599
3600 validateSwModeSet >>= 1;
3601 }
3602 #endif
3603
3604 pOut->validSwModeSet = allowedSwModeSet;
3605 pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3606 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3607 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3608
3609 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3610
3611 if (pOut->clientPreferredSwSet.value == 0)
3612 {
3613 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3614 }
3615
3616 // Apply optional restrictions
3617 if (pIn->flags.needEquation)
3618 {
3619 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3620 }
3621
3622 if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3623 {
3624 pOut->swizzleMode = ADDR_SW_LINEAR;
3625 }
3626 else
3627 {
3628 // Always ignore linear swizzle mode if there is other choice.
3629 allowedSwModeSet.swLinear = 0;
3630
3631 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3632
3633 // Determine block size if there is 2 or more block type candidates
3634 if (IsPow2(allowedBlockSet.value) == FALSE)
3635 {
3636 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR };
3637
3638 swMode[AddrBlockMicro] = ADDR_SW_256B_D;
3639 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D;
3640 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3641
3642 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3643 {
3644 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
3645 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3646 }
3647
3648 Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
3649 Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
3650 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3651
3652 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3653 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3654 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3655 UINT_32 minSizeBlk = AddrBlockMicro;
3656 UINT_64 minSize = 0;
3657
3658 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3659 {
3660 if (allowedBlockSet.value & (1 << i))
3661 {
3662 ComputeBlockDimensionForSurf(&blkDim[i].w,
3663 &blkDim[i].h,
3664 &blkDim[i].d,
3665 bpp,
3666 numFrags,
3667 pOut->resourceType,
3668 swMode[i]);
3669
3670 if (displayRsrc)
3671 {
3672 blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3673 }
3674
3675 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3676 padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
3677
3678 if ((minSize == 0) ||
3679 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3680 {
3681 minSize = padSize[i];
3682 minSizeBlk = i;
3683 }
3684 }
3685 }
3686
3687 if ((allowedBlockSet.micro == TRUE) &&
3688 (width <= blkDim[AddrBlockMicro].w) &&
3689 (height <= blkDim[AddrBlockMicro].h) &&
3690 (NextPow2(pIn->minSizeAlign) <= Size256))
3691 {
3692 minSizeBlk = AddrBlockMicro;
3693 }
3694
3695 if (minSizeBlk == AddrBlockMicro)
3696 {
3697 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3698 allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3699 }
3700 else if (minSizeBlk == AddrBlockThick4KB)
3701 {
3702 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3703 allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3704 }
3705 else if (minSizeBlk == AddrBlockThin4KB)
3706 {
3707 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3708 Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3709 }
3710 else if (minSizeBlk == AddrBlockThick64KB)
3711 {
3712 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3713 allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3714 }
3715 else
3716 {
3717 ADDR_ASSERT(minSizeBlk == AddrBlockThin64KB);
3718 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3719 Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3720 }
3721 }
3722
3723 // Block type should be determined.
3724 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3725
3726 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3727
3728 // Determine swizzle type if there is 2 or more swizzle type candidates
3729 if (IsPow2(allowedSwSet.value) == FALSE)
3730 {
3731 if (ElemLib::IsBlockCompressed(pIn->format))
3732 {
3733 if (allowedSwSet.sw_D)
3734 {
3735 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3736 }
3737 else
3738 {
3739 ADDR_ASSERT(allowedSwSet.sw_S);
3740 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3741 }
3742 }
3743 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3744 {
3745 if (allowedSwSet.sw_S)
3746 {
3747 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3748 }
3749 else if (allowedSwSet.sw_D)
3750 {
3751 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3752 }
3753 else
3754 {
3755 ADDR_ASSERT(allowedSwSet.sw_R);
3756 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3757 }
3758 }
3759 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3760 {
3761 if (pIn->flags.color && allowedSwSet.sw_D)
3762 {
3763 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3764 }
3765 else if (allowedSwSet.sw_Z)
3766 {
3767 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3768 }
3769 else
3770 {
3771 ADDR_ASSERT(allowedSwSet.sw_S);
3772 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3773 }
3774 }
3775 else
3776 {
3777 if (pIn->flags.rotated && allowedSwSet.sw_R)
3778 {
3779 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3780 }
3781 else if (allowedSwSet.sw_D)
3782 {
3783 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3784 }
3785 else if (allowedSwSet.sw_S)
3786 {
3787 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3788 }
3789 else
3790 {
3791 ADDR_ASSERT(allowedSwSet.sw_Z);
3792 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3793 }
3794 }
3795 }
3796
3797 // Swizzle type should be determined.
3798 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3799
3800 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3801 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3802 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3803 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3804 }
3805
3806 returnCode = ADDR_OK;
3807 }
3808 else
3809 {
3810 // Invalid combination...
3811 ADDR_ASSERT_ALWAYS();
3812 }
3813 }
3814 else
3815 {
3816 // Invalid combination...
3817 ADDR_ASSERT_ALWAYS();
3818 }
3819
3820 return returnCode;
3821 }
3822
3823 /**
3824 ************************************************************************************************************************
3825 * Gfx9Lib::ComputeStereoInfo
3826 *
3827 * @brief
3828 * Compute height alignment and right eye pipeBankXor for stereo surface
3829 *
3830 * @return
3831 * Error code
3832 *
3833 ************************************************************************************************************************
3834 */
3835 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3836 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3837 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
3838 UINT_32* pHeightAlign
3839 ) const
3840 {
3841 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3842
3843 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3844
3845 if (eqIndex < m_numEquations)
3846 {
3847 if (IsXor(pIn->swizzleMode))
3848 {
3849 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3850 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
3851 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
3852 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
3853 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3854 const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3855
3856 ADDR_ASSERT(maxYCoordBlock256 ==
3857 GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
3858
3859 const UINT_32 maxYCoordInBaseEquation =
3860 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
3861
3862 ADDR_ASSERT(maxYCoordInBaseEquation ==
3863 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3864
3865 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3866
3867 ADDR_ASSERT(maxYCoordInPipeXor ==
3868 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3869
3870 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3871 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3872
3873 ADDR_ASSERT(maxYCoordInBankXor ==
3874 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3875
3876 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3877
3878 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3879 {
3880 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3881
3882 if (pOut->pStereoInfo != NULL)
3883 {
3884 pOut->pStereoInfo->rightSwizzle = 0;
3885
3886 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3887 {
3888 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3889 {
3890 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3891 }
3892
3893 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3894 {
3895 pOut->pStereoInfo->rightSwizzle |=
3896 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3897 }
3898
3899 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3900 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3901 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3902 }
3903 }
3904 }
3905 }
3906 }
3907 else
3908 {
3909 ADDR_ASSERT_ALWAYS();
3910 returnCode = ADDR_ERROR;
3911 }
3912
3913 return returnCode;
3914 }
3915
3916 /**
3917 ************************************************************************************************************************
3918 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3919 *
3920 * @brief
3921 * Internal function to calculate alignment for tiled surface
3922 *
3923 * @return
3924 * ADDR_E_RETURNCODE
3925 ************************************************************************************************************************
3926 */
3927 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3928 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3929 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3930 ) const
3931 {
3932 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3933 &pOut->blockHeight,
3934 &pOut->blockSlices,
3935 pIn->bpp,
3936 pIn->numFrags,
3937 pIn->resourceType,
3938 pIn->swizzleMode);
3939
3940 if (returnCode == ADDR_OK)
3941 {
3942 UINT_32 pitchAlignInElement = pOut->blockWidth;
3943
3944 if ((IsTex2d(pIn->resourceType) == TRUE) &&
3945 (pIn->flags.display || pIn->flags.rotated) &&
3946 (pIn->numMipLevels <= 1) &&
3947 (pIn->numSamples <= 1) &&
3948 (pIn->numFrags <= 1))
3949 {
3950 // Display engine needs pitch align to be at least 32 pixels.
3951 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3952 }
3953
3954 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3955
3956 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3957 {
3958 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3959 {
3960 returnCode = ADDR_INVALIDPARAMS;
3961 }
3962 else if (pIn->pitchInElement < pOut->pitch)
3963 {
3964 returnCode = ADDR_INVALIDPARAMS;
3965 }
3966 else
3967 {
3968 pOut->pitch = pIn->pitchInElement;
3969 }
3970 }
3971
3972 UINT_32 heightAlign = 0;
3973
3974 if (pIn->flags.qbStereo)
3975 {
3976 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3977 }
3978
3979 if (returnCode == ADDR_OK)
3980 {
3981 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3982
3983 if (heightAlign > 1)
3984 {
3985 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3986 }
3987
3988 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3989
3990 pOut->epitchIsHeight = FALSE;
3991 pOut->mipChainInTail = FALSE;
3992 pOut->firstMipIdInTail = pIn->numMipLevels;
3993
3994 pOut->mipChainPitch = pOut->pitch;
3995 pOut->mipChainHeight = pOut->height;
3996 pOut->mipChainSlice = pOut->numSlices;
3997
3998 if (pIn->numMipLevels > 1)
3999 {
4000 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4001 pIn->swizzleMode,
4002 pIn->bpp,
4003 pIn->width,
4004 pIn->height,
4005 pIn->numSlices,
4006 pOut->blockWidth,
4007 pOut->blockHeight,
4008 pOut->blockSlices,
4009 pIn->numMipLevels,
4010 pOut->pMipInfo);
4011
4012 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4013
4014 if (endingMipId == 0)
4015 {
4016 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4017 pIn->swizzleMode,
4018 pOut->blockWidth,
4019 pOut->blockHeight,
4020 pOut->blockSlices);
4021
4022 pOut->epitchIsHeight = TRUE;
4023 pOut->pitch = tailMaxDim.w;
4024 pOut->height = tailMaxDim.h;
4025 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4026 tailMaxDim.d : pIn->numSlices;
4027 pOut->mipChainInTail = TRUE;
4028 }
4029 else
4030 {
4031 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
4032 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4033
4034 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4035 pIn->swizzleMode,
4036 mip0WidthInBlk,
4037 mip0HeightInBlk,
4038 pOut->numSlices / pOut->blockSlices);
4039 if (majorMode == ADDR_MAJOR_Y)
4040 {
4041 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4042
4043 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4044 {
4045 mip1WidthInBlk++;
4046 }
4047
4048 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4049
4050 pOut->epitchIsHeight = FALSE;
4051 }
4052 else
4053 {
4054 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4055
4056 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4057 {
4058 mip1HeightInBlk++;
4059 }
4060
4061 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4062
4063 pOut->epitchIsHeight = TRUE;
4064 }
4065 }
4066
4067 if (pOut->pMipInfo != NULL)
4068 {
4069 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4070
4071 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4072 {
4073 Dim3d mipStartPos = {0};
4074 UINT_32 mipTailOffsetInBytes = 0;
4075
4076 mipStartPos = GetMipStartPos(pIn->resourceType,
4077 pIn->swizzleMode,
4078 pOut->pitch,
4079 pOut->height,
4080 pOut->numSlices,
4081 pOut->blockWidth,
4082 pOut->blockHeight,
4083 pOut->blockSlices,
4084 i,
4085 elementBytesLog2,
4086 &mipTailOffsetInBytes);
4087
4088 UINT_32 pitchInBlock =
4089 pOut->mipChainPitch / pOut->blockWidth;
4090 UINT_32 sliceInBlock =
4091 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4092 UINT_64 blockIndex =
4093 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4094 UINT_64 macroBlockOffset =
4095 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4096
4097 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4098 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
4099 }
4100 }
4101 }
4102 else if (pOut->pMipInfo != NULL)
4103 {
4104 pOut->pMipInfo[0].pitch = pOut->pitch;
4105 pOut->pMipInfo[0].height = pOut->height;
4106 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4107 pOut->pMipInfo[0].offset = 0;
4108 }
4109
4110 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4111 (pIn->bpp >> 3) * pIn->numFrags;
4112 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
4113 pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4114
4115 if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4116 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4117 (pIn->flags.texture == TRUE) &&
4118 (pIn->flags.noMetadata == FALSE) &&
4119 (pIn->flags.metaPipeUnaligned == FALSE))
4120 {
4121 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4122 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4123 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4124 // them, which may cause invalid metadata to be fetched.
4125 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4126 }
4127
4128 if (pIn->flags.prt)
4129 {
4130 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4131 }
4132 }
4133 }
4134
4135 return returnCode;
4136 }
4137
4138 /**
4139 ************************************************************************************************************************
4140 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4141 *
4142 * @brief
4143 * Internal function to calculate alignment for linear surface
4144 *
4145 * @return
4146 * ADDR_E_RETURNCODE
4147 ************************************************************************************************************************
4148 */
4149 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4150 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4151 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4152 ) const
4153 {
4154 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4155 UINT_32 pitch = 0;
4156 UINT_32 actualHeight = 0;
4157 UINT_32 elementBytes = pIn->bpp >> 3;
4158 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4159
4160 if (IsTex1d(pIn->resourceType))
4161 {
4162 if (pIn->height > 1)
4163 {
4164 returnCode = ADDR_INVALIDPARAMS;
4165 }
4166 else
4167 {
4168 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4169
4170 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4171 actualHeight = pIn->numMipLevels;
4172
4173 if (pIn->flags.prt == FALSE)
4174 {
4175 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4176 &pitch, &actualHeight);
4177 }
4178
4179 if (returnCode == ADDR_OK)
4180 {
4181 if (pOut->pMipInfo != NULL)
4182 {
4183 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4184 {
4185 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4186 pOut->pMipInfo[i].pitch = pitch;
4187 pOut->pMipInfo[i].height = 1;
4188 pOut->pMipInfo[i].depth = 1;
4189 }
4190 }
4191 }
4192 }
4193 }
4194 else
4195 {
4196 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4197 }
4198
4199 if ((pitch == 0) || (actualHeight == 0))
4200 {
4201 returnCode = ADDR_INVALIDPARAMS;
4202 }
4203
4204 if (returnCode == ADDR_OK)
4205 {
4206 pOut->pitch = pitch;
4207 pOut->height = pIn->height;
4208 pOut->numSlices = pIn->numSlices;
4209 pOut->mipChainPitch = pitch;
4210 pOut->mipChainHeight = actualHeight;
4211 pOut->mipChainSlice = pOut->numSlices;
4212 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4213 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4214 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4215 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4216 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4217 pOut->blockHeight = 1;
4218 pOut->blockSlices = 1;
4219 }
4220
4221 // Post calculation validate
4222 ADDR_ASSERT(pOut->sliceSize > 0);
4223
4224 return returnCode;
4225 }
4226
4227 /**
4228 ************************************************************************************************************************
4229 * Gfx9Lib::GetMipChainInfo
4230 *
4231 * @brief
4232 * Internal function to get out information about mip chain
4233 *
4234 * @return
4235 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4236 ************************************************************************************************************************
4237 */
4238 UINT_32 Gfx9Lib::GetMipChainInfo(
4239 AddrResourceType resourceType,
4240 AddrSwizzleMode swizzleMode,
4241 UINT_32 bpp,
4242 UINT_32 mip0Width,
4243 UINT_32 mip0Height,
4244 UINT_32 mip0Depth,
4245 UINT_32 blockWidth,
4246 UINT_32 blockHeight,
4247 UINT_32 blockDepth,
4248 UINT_32 numMipLevel,
4249 ADDR2_MIP_INFO* pMipInfo) const
4250 {
4251 const Dim3d tailMaxDim =
4252 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4253
4254 UINT_32 mipPitch = mip0Width;
4255 UINT_32 mipHeight = mip0Height;
4256 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4257 UINT_32 offset = 0;
4258 UINT_32 firstMipIdInTail = numMipLevel;
4259 BOOL_32 inTail = FALSE;
4260 BOOL_32 finalDim = FALSE;
4261 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4262 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4263
4264 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4265 {
4266 if (inTail)
4267 {
4268 if (finalDim == FALSE)
4269 {
4270 UINT_32 mipSize;
4271
4272 if (is3dThick)
4273 {
4274 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4275 }
4276 else
4277 {
4278 mipSize = mipPitch * mipHeight * (bpp >> 3);
4279 }
4280
4281 if (mipSize <= 256)
4282 {
4283 UINT_32 index = Log2(bpp >> 3);
4284
4285 if (is3dThick)
4286 {
4287 mipPitch = Block256_3dZ[index].w;
4288 mipHeight = Block256_3dZ[index].h;
4289 mipDepth = Block256_3dZ[index].d;
4290 }
4291 else
4292 {
4293 mipPitch = Block256_2d[index].w;
4294 mipHeight = Block256_2d[index].h;
4295 }
4296
4297 finalDim = TRUE;
4298 }
4299 }
4300 }
4301 else
4302 {
4303 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4304 mipPitch, mipHeight, mipDepth);
4305
4306 if (inTail)
4307 {
4308 firstMipIdInTail = mipId;
4309 mipPitch = tailMaxDim.w;
4310 mipHeight = tailMaxDim.h;
4311
4312 if (is3dThick)
4313 {
4314 mipDepth = tailMaxDim.d;
4315 }
4316 }
4317 else
4318 {
4319 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4320 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4321
4322 if (is3dThick)
4323 {
4324 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4325 }
4326 }
4327 }
4328
4329 if (pMipInfo != NULL)
4330 {
4331 pMipInfo[mipId].pitch = mipPitch;
4332 pMipInfo[mipId].height = mipHeight;
4333 pMipInfo[mipId].depth = mipDepth;
4334 pMipInfo[mipId].offset = offset;
4335 }
4336
4337 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4338
4339 if (finalDim)
4340 {
4341 if (is3dThin)
4342 {
4343 mipDepth = Max(mipDepth >> 1, 1u);
4344 }
4345 }
4346 else
4347 {
4348 mipPitch = Max(mipPitch >> 1, 1u);
4349 mipHeight = Max(mipHeight >> 1, 1u);
4350
4351 if (is3dThick || is3dThin)
4352 {
4353 mipDepth = Max(mipDepth >> 1, 1u);
4354 }
4355 }
4356 }
4357
4358 return firstMipIdInTail;
4359 }
4360
4361 /**
4362 ************************************************************************************************************************
4363 * Gfx9Lib::GetMetaMiptailInfo
4364 *
4365 * @brief
4366 * Get mip tail coordinate information.
4367 *
4368 * @return
4369 * N/A
4370 ************************************************************************************************************************
4371 */
4372 VOID Gfx9Lib::GetMetaMiptailInfo(
4373 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4374 Dim3d mipCoord, ///< [in] mip tail base coord
4375 UINT_32 numMipInTail, ///< [in] number of mips in tail
4376 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4377 ) const
4378 {
4379 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4380 UINT_32 mipWidth = pMetaBlkDim->w;
4381 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4382 UINT_32 mipDepth = pMetaBlkDim->d;
4383 UINT_32 minInc;
4384
4385 if (isThick)
4386 {
4387 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4388 }
4389 else if (pMetaBlkDim->h >= 1024)
4390 {
4391 minInc = 256;
4392 }
4393 else if (pMetaBlkDim->h == 512)
4394 {
4395 minInc = 128;
4396 }
4397 else
4398 {
4399 minInc = 64;
4400 }
4401
4402 UINT_32 blk32MipId = 0xFFFFFFFF;
4403
4404 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4405 {
4406 pInfo[mip].inMiptail = TRUE;
4407 pInfo[mip].startX = mipCoord.w;
4408 pInfo[mip].startY = mipCoord.h;
4409 pInfo[mip].startZ = mipCoord.d;
4410 pInfo[mip].width = mipWidth;
4411 pInfo[mip].height = mipHeight;
4412 pInfo[mip].depth = mipDepth;
4413
4414 if (mipWidth <= 32)
4415 {
4416 if (blk32MipId == 0xFFFFFFFF)
4417 {
4418 blk32MipId = mip;
4419 }
4420
4421 mipCoord.w = pInfo[blk32MipId].startX;
4422 mipCoord.h = pInfo[blk32MipId].startY;
4423 mipCoord.d = pInfo[blk32MipId].startZ;
4424
4425 switch (mip - blk32MipId)
4426 {
4427 case 0:
4428 mipCoord.w += 32; // 16x16
4429 break;
4430 case 1:
4431 mipCoord.h += 32; // 8x8
4432 break;
4433 case 2:
4434 mipCoord.h += 32; // 4x4
4435 mipCoord.w += 16;
4436 break;
4437 case 3:
4438 mipCoord.h += 32; // 2x2
4439 mipCoord.w += 32;
4440 break;
4441 case 4:
4442 mipCoord.h += 32; // 1x1
4443 mipCoord.w += 48;
4444 break;
4445 // The following are for BC/ASTC formats
4446 case 5:
4447 mipCoord.h += 48; // 1/2 x 1/2
4448 break;
4449 case 6:
4450 mipCoord.h += 48; // 1/4 x 1/4
4451 mipCoord.w += 16;
4452 break;
4453 case 7:
4454 mipCoord.h += 48; // 1/8 x 1/8
4455 mipCoord.w += 32;
4456 break;
4457 case 8:
4458 mipCoord.h += 48; // 1/16 x 1/16
4459 mipCoord.w += 48;
4460 break;
4461 default:
4462 ADDR_ASSERT_ALWAYS();
4463 break;
4464 }
4465
4466 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4467 mipHeight = mipWidth;
4468
4469 if (isThick)
4470 {
4471 mipDepth = mipWidth;
4472 }
4473 }
4474 else
4475 {
4476 if (mipWidth <= minInc)
4477 {
4478 // if we're below the minimal increment...
4479 if (isThick)
4480 {
4481 // For 3d, just go in z direction
4482 mipCoord.d += mipDepth;
4483 }
4484 else
4485 {
4486 // For 2d, first go across, then down
4487 if ((mipWidth * 2) == minInc)
4488 {
4489 // if we're 2 mips below, that's when we go back in x, and down in y
4490 mipCoord.w -= minInc;
4491 mipCoord.h += minInc;
4492 }
4493 else
4494 {
4495 // otherwise, just go across in x
4496 mipCoord.w += minInc;
4497 }
4498 }
4499 }
4500 else
4501 {
4502 // On even mip, go down, otherwise, go across
4503 if (mip & 1)
4504 {
4505 mipCoord.w += mipWidth;
4506 }
4507 else
4508 {
4509 mipCoord.h += mipHeight;
4510 }
4511 }
4512 // Divide the width by 2
4513 mipWidth >>= 1;
4514 // After the first mip in tail, the mip is always a square
4515 mipHeight = mipWidth;
4516 // ...or for 3d, a cube
4517 if (isThick)
4518 {
4519 mipDepth = mipWidth;
4520 }
4521 }
4522 }
4523 }
4524
4525 /**
4526 ************************************************************************************************************************
4527 * Gfx9Lib::GetMipStartPos
4528 *
4529 * @brief
4530 * Internal function to get out information about mip logical start position
4531 *
4532 * @return
4533 * logical start position in macro block width/heith/depth of one mip level within one slice
4534 ************************************************************************************************************************
4535 */
4536 Dim3d Gfx9Lib::GetMipStartPos(
4537 AddrResourceType resourceType,
4538 AddrSwizzleMode swizzleMode,
4539 UINT_32 width,
4540 UINT_32 height,
4541 UINT_32 depth,
4542 UINT_32 blockWidth,
4543 UINT_32 blockHeight,
4544 UINT_32 blockDepth,
4545 UINT_32 mipId,
4546 UINT_32 log2ElementBytes,
4547 UINT_32* pMipTailBytesOffset) const
4548 {
4549 Dim3d mipStartPos = {0};
4550 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4551
4552 // Report mip in tail if Mip0 is already in mip tail
4553 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4554 UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
4555 UINT_32 mipIndexInTail = mipId;
4556
4557 if (inMipTail == FALSE)
4558 {
4559 // Mip 0 dimension, unit in block
4560 UINT_32 mipWidthInBlk = width / blockWidth;
4561 UINT_32 mipHeightInBlk = height / blockHeight;
4562 UINT_32 mipDepthInBlk = depth / blockDepth;
4563 AddrMajorMode majorMode = GetMajorMode(resourceType,
4564 swizzleMode,
4565 mipWidthInBlk,
4566 mipHeightInBlk,
4567 mipDepthInBlk);
4568
4569 UINT_32 endingMip = mipId + 1;
4570
4571 for (UINT_32 i = 1; i <= mipId; i++)
4572 {
4573 if ((i == 1) || (i == 3))
4574 {
4575 if (majorMode == ADDR_MAJOR_Y)
4576 {
4577 mipStartPos.w += mipWidthInBlk;
4578 }
4579 else
4580 {
4581 mipStartPos.h += mipHeightInBlk;
4582 }
4583 }
4584 else
4585 {
4586 if (majorMode == ADDR_MAJOR_X)
4587 {
4588 mipStartPos.w += mipWidthInBlk;
4589 }
4590 else if (majorMode == ADDR_MAJOR_Y)
4591 {
4592 mipStartPos.h += mipHeightInBlk;
4593 }
4594 else
4595 {
4596 mipStartPos.d += mipDepthInBlk;
4597 }
4598 }
4599
4600 BOOL_32 inTail = FALSE;
4601
4602 if (IsThick(resourceType, swizzleMode))
4603 {
4604 UINT_32 dim = log2BlkSize % 3;
4605
4606 if (dim == 0)
4607 {
4608 inTail =
4609 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4610 }
4611 else if (dim == 1)
4612 {
4613 inTail =
4614 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4615 }
4616 else
4617 {
4618 inTail =
4619 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4620 }
4621 }
4622 else
4623 {
4624 if (log2BlkSize & 1)
4625 {
4626 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4627 }
4628 else
4629 {
4630 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4631 }
4632 }
4633
4634 if (inTail)
4635 {
4636 endingMip = i;
4637 break;
4638 }
4639
4640 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4641 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4642 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4643 }
4644
4645 if (mipId >= endingMip)
4646 {
4647 inMipTail = TRUE;
4648 mipIndexInTail = mipId - endingMip;
4649 }
4650 }
4651
4652 if (inMipTail)
4653 {
4654 UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4655 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4656 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4657 }
4658
4659 return mipStartPos;
4660 }
4661
4662 /**
4663 ************************************************************************************************************************
4664 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4665 *
4666 * @brief
4667 * Internal function to calculate address from coord for tiled swizzle surface
4668 *
4669 * @return
4670 * ADDR_E_RETURNCODE
4671 ************************************************************************************************************************
4672 */
4673 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4674 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4675 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4676 ) const
4677 {
4678 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4679 localIn.swizzleMode = pIn->swizzleMode;
4680 localIn.flags = pIn->flags;
4681 localIn.resourceType = pIn->resourceType;
4682 localIn.bpp = pIn->bpp;
4683 localIn.width = Max(pIn->unalignedWidth, 1u);
4684 localIn.height = Max(pIn->unalignedHeight, 1u);
4685 localIn.numSlices = Max(pIn->numSlices, 1u);
4686 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4687 localIn.numSamples = Max(pIn->numSamples, 1u);
4688 localIn.numFrags = Max(pIn->numFrags, 1u);
4689 if (localIn.numMipLevels <= 1)
4690 {
4691 localIn.pitchInElement = pIn->pitchInElement;
4692 }
4693
4694 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4695 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4696
4697 BOOL_32 valid = (returnCode == ADDR_OK) &&
4698 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4699 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4700 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4701
4702 if (valid)
4703 {
4704 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4705 Dim3d mipStartPos = {0};
4706 UINT_32 mipTailBytesOffset = 0;
4707
4708 if (pIn->numMipLevels > 1)
4709 {
4710 // Mip-map chain cannot be MSAA surface
4711 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4712
4713 mipStartPos = GetMipStartPos(pIn->resourceType,
4714 pIn->swizzleMode,
4715 localOut.pitch,
4716 localOut.height,
4717 localOut.numSlices,
4718 localOut.blockWidth,
4719 localOut.blockHeight,
4720 localOut.blockSlices,
4721 pIn->mipId,
4722 log2ElementBytes,
4723 &mipTailBytesOffset);
4724 }
4725
4726 UINT_32 interleaveOffset = 0;
4727 UINT_32 pipeBits = 0;
4728 UINT_32 pipeXor = 0;
4729 UINT_32 bankBits = 0;
4730 UINT_32 bankXor = 0;
4731
4732 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4733 {
4734 UINT_32 blockOffset = 0;
4735 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4736
4737 if (IsZOrderSwizzle(pIn->swizzleMode))
4738 {
4739 // Morton generation
4740 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4741 {
4742 UINT_32 totalLowBits = 6 - log2ElementBytes;
4743 UINT_32 mortBits = totalLowBits / 2;
4744 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4745 // Are 9 bits enough?
4746 UINT_32 highBitsValue =
4747 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4748 blockOffset = lowBitsValue | highBitsValue;
4749 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4750 }
4751 else
4752 {
4753 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4754 }
4755
4756 // Fill LSBs with sample bits
4757 if (pIn->numSamples > 1)
4758 {
4759 blockOffset *= pIn->numSamples;
4760 blockOffset |= pIn->sample;
4761 }
4762
4763 // Shift according to BytesPP
4764 blockOffset <<= log2ElementBytes;
4765 }
4766 else
4767 {
4768 // Micro block offset
4769 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4770 blockOffset = microBlockOffset;
4771
4772 // Micro block dimension
4773 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4774 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4775 // Morton generation, does 12 bit enough?
4776 blockOffset |=
4777 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4778
4779 // Sample bits start location
4780 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4781 // Join sample bits information to the highest Macro block bits
4782 if (IsNonPrtXor(pIn->swizzleMode))
4783 {
4784 // Non-prt-Xor : xor highest Macro block bits with sample bits
4785 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4786 }
4787 else
4788 {
4789 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4790 // after this op, the blockOffset only contains log2 Macro block size bits
4791 blockOffset %= (1 << sampleStart);
4792 blockOffset |= (pIn->sample << sampleStart);
4793 ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4794 }
4795 }
4796
4797 if (IsXor(pIn->swizzleMode))
4798 {
4799 // Mask off bits above Macro block bits to keep page synonyms working for prt
4800 if (IsPrt(pIn->swizzleMode))
4801 {
4802 blockOffset &= ((1 << log2BlkSize) - 1);
4803 }
4804
4805 // Preserve offset inside pipe interleave
4806 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4807 blockOffset >>= m_pipeInterleaveLog2;
4808
4809 // Pipe/Se xor bits
4810 pipeBits = GetPipeXorBits(log2BlkSize);
4811 // Pipe xor
4812 pipeXor = FoldXor2d(blockOffset, pipeBits);
4813 blockOffset >>= pipeBits;
4814
4815 // Bank xor bits
4816 bankBits = GetBankXorBits(log2BlkSize);
4817 // Bank Xor
4818 bankXor = FoldXor2d(blockOffset, bankBits);
4819 blockOffset >>= bankBits;
4820
4821 // Put all the part back together
4822 blockOffset <<= bankBits;
4823 blockOffset |= bankXor;
4824 blockOffset <<= pipeBits;
4825 blockOffset |= pipeXor;
4826 blockOffset <<= m_pipeInterleaveLog2;
4827 blockOffset |= interleaveOffset;
4828 }
4829
4830 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4831 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4832
4833 blockOffset |= mipTailBytesOffset;
4834
4835 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4836 {
4837 // Apply slice xor if not MSAA/PRT
4838 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4839 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4840 (m_pipeInterleaveLog2 + pipeBits));
4841 }
4842
4843 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4844 bankBits, pipeBits, &blockOffset);
4845
4846 blockOffset %= (1 << log2BlkSize);
4847
4848 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4849 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4850 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4851 UINT_64 macroBlockIndex =
4852 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4853 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4854 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4855
4856 pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
4857 }
4858 else
4859 {
4860 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4861
4862 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4863
4864 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4865 (pIn->y / microBlockDim.h),
4866 (pIn->slice / microBlockDim.d),
4867 8);
4868
4869 blockOffset <<= 10;
4870 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4871
4872 if (IsXor(pIn->swizzleMode))
4873 {
4874 // Mask off bits above Macro block bits to keep page synonyms working for prt
4875 if (IsPrt(pIn->swizzleMode))
4876 {
4877 blockOffset &= ((1 << log2BlkSize) - 1);
4878 }
4879
4880 // Preserve offset inside pipe interleave
4881 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4882 blockOffset >>= m_pipeInterleaveLog2;
4883
4884 // Pipe/Se xor bits
4885 pipeBits = GetPipeXorBits(log2BlkSize);
4886 // Pipe xor
4887 pipeXor = FoldXor3d(blockOffset, pipeBits);
4888 blockOffset >>= pipeBits;
4889
4890 // Bank xor bits
4891 bankBits = GetBankXorBits(log2BlkSize);
4892 // Bank Xor
4893 bankXor = FoldXor3d(blockOffset, bankBits);
4894 blockOffset >>= bankBits;
4895
4896 // Put all the part back together
4897 blockOffset <<= bankBits;
4898 blockOffset |= bankXor;
4899 blockOffset <<= pipeBits;
4900 blockOffset |= pipeXor;
4901 blockOffset <<= m_pipeInterleaveLog2;
4902 blockOffset |= interleaveOffset;
4903 }
4904
4905 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4906 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4907 blockOffset |= mipTailBytesOffset;
4908
4909 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4910 bankBits, pipeBits, &blockOffset);
4911
4912 blockOffset %= (1 << log2BlkSize);
4913
4914 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
4915 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4916 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4917
4918 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4919 UINT_32 sliceSizeInBlock =
4920 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4921 UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4922
4923 pOut->addr = blockOffset | (blockIndex << log2BlkSize);
4924 }
4925 }
4926 else
4927 {
4928 returnCode = ADDR_INVALIDPARAMS;
4929 }
4930
4931 return returnCode;
4932 }
4933
4934 /**
4935 ************************************************************************************************************************
4936 * Gfx9Lib::ComputeSurfaceInfoLinear
4937 *
4938 * @brief
4939 * Internal function to calculate padding for linear swizzle 2D/3D surface
4940 *
4941 * @return
4942 * N/A
4943 ************************************************************************************************************************
4944 */
4945 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4946 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
4947 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
4948 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
4949 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
4950 ) const
4951 {
4952 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4953
4954 UINT_32 elementBytes = pIn->bpp >> 3;
4955 UINT_32 pitchAlignInElement = 0;
4956
4957 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4958 {
4959 ADDR_ASSERT(pIn->numMipLevels <= 1);
4960 ADDR_ASSERT(pIn->numSlices <= 1);
4961 pitchAlignInElement = 1;
4962 }
4963 else
4964 {
4965 pitchAlignInElement = (256 / elementBytes);
4966 }
4967
4968 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
4969 UINT_32 slice0PaddedHeight = pIn->height;
4970
4971 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4972 &mipChainWidth, &slice0PaddedHeight);
4973
4974 if (returnCode == ADDR_OK)
4975 {
4976 UINT_32 mipChainHeight = 0;
4977 UINT_32 mipHeight = pIn->height;
4978 UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4979
4980 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4981 {
4982 if (pMipInfo != NULL)
4983 {
4984 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4985 pMipInfo[i].pitch = mipChainWidth;
4986 pMipInfo[i].height = mipHeight;
4987 pMipInfo[i].depth = mipDepth;
4988 }
4989
4990 mipChainHeight += mipHeight;
4991 mipHeight = RoundHalf(mipHeight);
4992 mipHeight = Max(mipHeight, 1u);
4993 }
4994
4995 *pMipmap0PaddedWidth = mipChainWidth;
4996 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
4997 }
4998
4999 return returnCode;
5000 }
5001
5002 /**
5003 ************************************************************************************************************************
5004 * Gfx9Lib::ComputeThinBlockDimension
5005 *
5006 * @brief
5007 * Internal function to get thin block width/height/depth in element from surface input params.
5008 *
5009 * @return
5010 * N/A
5011 ************************************************************************************************************************
5012 */
5013 VOID Gfx9Lib::ComputeThinBlockDimension(
5014 UINT_32* pWidth,
5015 UINT_32* pHeight,
5016 UINT_32* pDepth,
5017 UINT_32 bpp,
5018 UINT_32 numSamples,
5019 AddrResourceType resourceType,
5020 AddrSwizzleMode swizzleMode) const
5021 {
5022 ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5023
5024 const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
5025 const UINT_32 eleBytes = bpp >> 3;
5026 const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5027 const UINT_32 log2blkSizeIn256B = log2BlkSize - 8;
5028 const UINT_32 widthAmp = log2blkSizeIn256B / 2;
5029 const UINT_32 heightAmp = log2blkSizeIn256B - widthAmp;
5030
5031 ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5032
5033 *pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5034 *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5035 *pDepth = 1;
5036
5037 if (numSamples > 1)
5038 {
5039 const UINT_32 log2sample = Log2(numSamples);
5040 const UINT_32 q = log2sample >> 1;
5041 const UINT_32 r = log2sample & 1;
5042
5043 if (log2BlkSize & 1)
5044 {
5045 *pWidth >>= q;
5046 *pHeight >>= (q + r);
5047 }
5048 else
5049 {
5050 *pWidth >>= (q + r);
5051 *pHeight >>= q;
5052 }
5053 }
5054 }
5055
5056 } // V2
5057 } // Addr