amd/addrlib: update to the latest version
[mesa.git] / src / amd / addrlib / src / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 #include "util/macros.h"
41
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
44
45 namespace Addr
46 {
47
48 /**
49 ************************************************************************************************************************
50 * Gfx9HwlInit
51 *
52 * @brief
53 * Creates an Gfx9Lib object.
54 *
55 * @return
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
58 */
59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
60 {
61 return V2::Gfx9Lib::CreateObj(pClient);
62 }
63
64 namespace V2
65 {
66
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
70
71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_R
77
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_R
82
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_R
87
88 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
91 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
92
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0}, // ADDR_SW_64KB_R_T
97
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_4KB_R_x
102
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_64KB_R_X
107
108 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
109 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
110 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
111 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
113 };
114
115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
116
117 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
118
119 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
120
121 /**
122 ************************************************************************************************************************
123 * Gfx9Lib::Gfx9Lib
124 *
125 * @brief
126 * Constructor
127 *
128 ************************************************************************************************************************
129 */
130 Gfx9Lib::Gfx9Lib(const Client* pClient)
131 :
132 Lib(pClient)
133 {
134 m_class = AI_ADDRLIB;
135 memset(&m_settings, 0, sizeof(m_settings));
136 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
137 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
138 m_metaEqOverrideIndex = 0;
139 }
140
141 /**
142 ************************************************************************************************************************
143 * Gfx9Lib::~Gfx9Lib
144 *
145 * @brief
146 * Destructor
147 ************************************************************************************************************************
148 */
149 Gfx9Lib::~Gfx9Lib()
150 {
151 }
152
153 /**
154 ************************************************************************************************************************
155 * Gfx9Lib::HwlComputeHtileInfo
156 *
157 * @brief
158 * Interface function stub of AddrComputeHtilenfo
159 *
160 * @return
161 * ADDR_E_RETURNCODE
162 ************************************************************************************************************************
163 */
164 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
165 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
166 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
167 ) const
168 {
169 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
170 pIn->swizzleMode);
171
172 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
173
174 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
175
176 if ((numPipeTotal == 1) && (numRbTotal == 1))
177 {
178 numCompressBlkPerMetaBlkLog2 = 10;
179 }
180 else
181 {
182 if (m_settings.applyAliasFix)
183 {
184 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
185 }
186 else
187 {
188 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
189 }
190 }
191
192 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
193
194 Dim3d metaBlkDim = {8, 8, 1};
195 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
196 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
197 UINT_32 heightAmp = totalAmpBits - widthAmp;
198 metaBlkDim.w <<= widthAmp;
199 metaBlkDim.h <<= heightAmp;
200
201 #if DEBUG
202 Dim3d metaBlkDimDbg = {8, 8, 1};
203 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
204 {
205 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
206 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
207 {
208 metaBlkDimDbg.h <<= 1;
209 }
210 else
211 {
212 metaBlkDimDbg.w <<= 1;
213 }
214 }
215 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
216 #endif
217
218 UINT_32 numMetaBlkX;
219 UINT_32 numMetaBlkY;
220 UINT_32 numMetaBlkZ;
221
222 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
223 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
224 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
225
226 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
227 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
228
229 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
230 {
231 align *= (numPipeTotal >> 1);
232 }
233
234 align = Max(align, metaBlkSize);
235
236 if (m_settings.metaBaseAlignFix)
237 {
238 align = Max(align, GetBlockSize(pIn->swizzleMode));
239 }
240
241 if (m_settings.htileAlignFix)
242 {
243 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
244 const INT_32 htileCachelineSizeLog2 = 11;
245 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
246
247 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
248
249 align <<= rbMaskPadding;
250 }
251
252 pOut->pitch = numMetaBlkX * metaBlkDim.w;
253 pOut->height = numMetaBlkY * metaBlkDim.h;
254 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
255
256 pOut->metaBlkWidth = metaBlkDim.w;
257 pOut->metaBlkHeight = metaBlkDim.h;
258 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
259
260 pOut->baseAlign = align;
261 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
262
263 return ADDR_OK;
264 }
265
266 /**
267 ************************************************************************************************************************
268 * Gfx9Lib::HwlComputeCmaskInfo
269 *
270 * @brief
271 * Interface function stub of AddrComputeCmaskInfo
272 *
273 * @return
274 * ADDR_E_RETURNCODE
275 ************************************************************************************************************************
276 */
277 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
278 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
279 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
280 ) const
281 {
282 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
283
284 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
285 pIn->swizzleMode);
286
287 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
288
289 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
290
291 if ((numPipeTotal == 1) && (numRbTotal == 1))
292 {
293 numCompressBlkPerMetaBlkLog2 = 13;
294 }
295 else
296 {
297 if (m_settings.applyAliasFix)
298 {
299 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
300 }
301 else
302 {
303 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
304 }
305
306 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
307 }
308
309 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
310
311 Dim2d metaBlkDim = {8, 8};
312 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
313 UINT_32 heightAmp = totalAmpBits >> 1;
314 UINT_32 widthAmp = totalAmpBits - heightAmp;
315 metaBlkDim.w <<= widthAmp;
316 metaBlkDim.h <<= heightAmp;
317
318 #if DEBUG
319 Dim2d metaBlkDimDbg = {8, 8};
320 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
321 {
322 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
323 {
324 metaBlkDimDbg.h <<= 1;
325 }
326 else
327 {
328 metaBlkDimDbg.w <<= 1;
329 }
330 }
331 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
332 #endif
333
334 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
335 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
336 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
337
338 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
339
340 if (m_settings.metaBaseAlignFix)
341 {
342 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
343 }
344
345 pOut->pitch = numMetaBlkX * metaBlkDim.w;
346 pOut->height = numMetaBlkY * metaBlkDim.h;
347 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
348 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
349 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
350
351 pOut->metaBlkWidth = metaBlkDim.w;
352 pOut->metaBlkHeight = metaBlkDim.h;
353
354 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
355
356 return ADDR_OK;
357 }
358
359 /**
360 ************************************************************************************************************************
361 * Gfx9Lib::GetMetaMipInfo
362 *
363 * @brief
364 * Get meta mip info
365 *
366 * @return
367 * N/A
368 ************************************************************************************************************************
369 */
370 VOID Gfx9Lib::GetMetaMipInfo(
371 UINT_32 numMipLevels, ///< [in] number of mip levels
372 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
373 BOOL_32 dataThick, ///< [in] data surface is thick
374 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
375 UINT_32 mip0Width, ///< [in] mip0 width
376 UINT_32 mip0Height, ///< [in] mip0 height
377 UINT_32 mip0Depth, ///< [in] mip0 depth
378 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
379 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
380 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
381 const
382 {
383 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
384 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
385 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
386 UINT_32 tailWidth = pMetaBlkDim->w;
387 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
388 UINT_32 tailDepth = pMetaBlkDim->d;
389 BOOL_32 inTail = FALSE;
390 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
391
392 if (numMipLevels > 1)
393 {
394 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
395 {
396 // Z major
397 major = ADDR_MAJOR_Z;
398 }
399 else if (numMetaBlkX >= numMetaBlkY)
400 {
401 // X major
402 major = ADDR_MAJOR_X;
403 }
404 else
405 {
406 // Y major
407 major = ADDR_MAJOR_Y;
408 }
409
410 inTail = ((mip0Width <= tailWidth) &&
411 (mip0Height <= tailHeight) &&
412 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
413
414 if (inTail == FALSE)
415 {
416 UINT_32 orderLimit;
417 UINT_32 *pMipDim;
418 UINT_32 *pOrderDim;
419
420 if (major == ADDR_MAJOR_Z)
421 {
422 // Z major
423 pMipDim = &numMetaBlkY;
424 pOrderDim = &numMetaBlkZ;
425 orderLimit = 4;
426 }
427 else if (major == ADDR_MAJOR_X)
428 {
429 // X major
430 pMipDim = &numMetaBlkY;
431 pOrderDim = &numMetaBlkX;
432 orderLimit = 4;
433 }
434 else
435 {
436 // Y major
437 pMipDim = &numMetaBlkX;
438 pOrderDim = &numMetaBlkY;
439 orderLimit = 2;
440 }
441
442 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
443 {
444 *pMipDim += 2;
445 }
446 else
447 {
448 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
449 }
450 }
451 }
452
453 if (pInfo != NULL)
454 {
455 UINT_32 mipWidth = mip0Width;
456 UINT_32 mipHeight = mip0Height;
457 UINT_32 mipDepth = mip0Depth;
458 Dim3d mipCoord = {0};
459
460 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
461 {
462 if (inTail)
463 {
464 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
465 pMetaBlkDim);
466 break;
467 }
468 else
469 {
470 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
471 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
472 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
473
474 pInfo[mip].inMiptail = FALSE;
475 pInfo[mip].startX = mipCoord.w;
476 pInfo[mip].startY = mipCoord.h;
477 pInfo[mip].startZ = mipCoord.d;
478 pInfo[mip].width = mipWidth;
479 pInfo[mip].height = mipHeight;
480 pInfo[mip].depth = dataThick ? mipDepth : 1;
481
482 if ((mip >= 3) || (mip & 1))
483 {
484 switch (major)
485 {
486 case ADDR_MAJOR_X:
487 mipCoord.w += mipWidth;
488 break;
489 case ADDR_MAJOR_Y:
490 mipCoord.h += mipHeight;
491 break;
492 case ADDR_MAJOR_Z:
493 mipCoord.d += mipDepth;
494 break;
495 default:
496 break;
497 }
498 }
499 else
500 {
501 switch (major)
502 {
503 case ADDR_MAJOR_X:
504 mipCoord.h += mipHeight;
505 break;
506 case ADDR_MAJOR_Y:
507 mipCoord.w += mipWidth;
508 break;
509 case ADDR_MAJOR_Z:
510 mipCoord.h += mipHeight;
511 break;
512 default:
513 break;
514 }
515 }
516
517 mipWidth = Max(mipWidth >> 1, 1u);
518 mipHeight = Max(mipHeight >> 1, 1u);
519 mipDepth = Max(mipDepth >> 1, 1u);
520
521 inTail = ((mipWidth <= tailWidth) &&
522 (mipHeight <= tailHeight) &&
523 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
524 }
525 }
526 }
527
528 *pNumMetaBlkX = numMetaBlkX;
529 *pNumMetaBlkY = numMetaBlkY;
530 *pNumMetaBlkZ = numMetaBlkZ;
531 }
532
533 /**
534 ************************************************************************************************************************
535 * Gfx9Lib::HwlComputeDccInfo
536 *
537 * @brief
538 * Interface function to compute DCC key info
539 *
540 * @return
541 * ADDR_E_RETURNCODE
542 ************************************************************************************************************************
543 */
544 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
545 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
546 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
547 ) const
548 {
549 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
550 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
551 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
552
553 if (dataLinear)
554 {
555 metaLinear = TRUE;
556 }
557 else if (metaLinear == TRUE)
558 {
559 pipeAligned = FALSE;
560 }
561
562 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
563
564 if (metaLinear)
565 {
566 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
567 ADDR_ASSERT_ALWAYS();
568
569 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
570 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
571 }
572 else
573 {
574 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
575
576 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
577
578 UINT_32 numFrags = Max(pIn->numFrags, 1u);
579 UINT_32 numSlices = Max(pIn->numSlices, 1u);
580
581 minMetaBlkSize /= numFrags;
582
583 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
584
585 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
586
587 if ((numPipeTotal > 1) || (numRbTotal > 1))
588 {
589 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
590
591 numCompressBlkPerMetaBlk =
592 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
593
594 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
595 {
596 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
597 }
598 }
599
600 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
601 Dim3d metaBlkDim = compressBlkDim;
602
603 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
604 {
605 if ((metaBlkDim.h < metaBlkDim.w) ||
606 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
607 {
608 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
609 {
610 metaBlkDim.h <<= 1;
611 }
612 else
613 {
614 metaBlkDim.d <<= 1;
615 }
616 }
617 else
618 {
619 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
620 {
621 metaBlkDim.w <<= 1;
622 }
623 else
624 {
625 metaBlkDim.d <<= 1;
626 }
627 }
628 }
629
630 UINT_32 numMetaBlkX;
631 UINT_32 numMetaBlkY;
632 UINT_32 numMetaBlkZ;
633
634 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
635 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
636 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
637
638 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
639
640 if (numFrags > m_maxCompFrag)
641 {
642 sizeAlign *= (numFrags / m_maxCompFrag);
643 }
644
645 if (m_settings.metaBaseAlignFix)
646 {
647 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
648 }
649
650 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
651 numCompressBlkPerMetaBlk * numFrags;
652 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
653 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
654
655 pOut->pitch = numMetaBlkX * metaBlkDim.w;
656 pOut->height = numMetaBlkY * metaBlkDim.h;
657 pOut->depth = numMetaBlkZ * metaBlkDim.d;
658
659 pOut->compressBlkWidth = compressBlkDim.w;
660 pOut->compressBlkHeight = compressBlkDim.h;
661 pOut->compressBlkDepth = compressBlkDim.d;
662
663 pOut->metaBlkWidth = metaBlkDim.w;
664 pOut->metaBlkHeight = metaBlkDim.h;
665 pOut->metaBlkDepth = metaBlkDim.d;
666
667 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
668 pOut->fastClearSizePerSlice =
669 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
670 }
671
672 return ADDR_OK;
673 }
674
675 /**
676 ************************************************************************************************************************
677 * Gfx9Lib::HwlComputeMaxBaseAlignments
678 *
679 * @brief
680 * Gets maximum alignments
681 * @return
682 * maximum alignments
683 ************************************************************************************************************************
684 */
685 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
686 {
687 return Size64K;
688 }
689
690 /**
691 ************************************************************************************************************************
692 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
693 *
694 * @brief
695 * Gets maximum alignments for metadata
696 * @return
697 * maximum alignments for metadata
698 ************************************************************************************************************************
699 */
700 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
701 {
702 // Max base alignment for Htile
703 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
704 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
705
706 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
707 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
708 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
709 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
710
711 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
712
713 if (maxNumPipeTotal > 2)
714 {
715 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
716 }
717
718 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
719
720 if (m_settings.metaBaseAlignFix)
721 {
722 maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
723 }
724
725 if (m_settings.htileAlignFix)
726 {
727 maxBaseAlignHtile *= maxNumPipeTotal;
728 }
729
730 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
731
732 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
733 UINT_32 maxBaseAlignDcc3D = 65536;
734
735 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
736 {
737 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
738 }
739
740 // Max base alignment for Msaa Dcc
741 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
742
743 if (m_settings.metaBaseAlignFix)
744 {
745 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
746 }
747
748 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
749 }
750
751 /**
752 ************************************************************************************************************************
753 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
754 *
755 * @brief
756 * Interface function stub of AddrComputeCmaskAddrFromCoord
757 *
758 * @return
759 * ADDR_E_RETURNCODE
760 ************************************************************************************************************************
761 */
762 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
763 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
764 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
765 {
766 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
767 input.size = sizeof(input);
768 input.cMaskFlags = pIn->cMaskFlags;
769 input.colorFlags = pIn->colorFlags;
770 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
771 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
772 input.numSlices = Max(pIn->numSlices, 1u);
773 input.swizzleMode = pIn->swizzleMode;
774 input.resourceType = pIn->resourceType;
775
776 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
777 output.size = sizeof(output);
778
779 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
780
781 if (returnCode == ADDR_OK)
782 {
783 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
784 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
785 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
786 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
787
788 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
789 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
790 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
791
792 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
793
794 UINT_32 xb = pIn->x / output.metaBlkWidth;
795 UINT_32 yb = pIn->y / output.metaBlkHeight;
796 UINT_32 zb = pIn->slice;
797
798 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
799 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
800 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
801
802 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
803
804 pOut->addr = address >> 1;
805 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
806
807 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
808 pIn->swizzleMode);
809
810 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
811
812 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
813 }
814
815 return returnCode;
816 }
817
818 /**
819 ************************************************************************************************************************
820 * Gfx9Lib::HwlComputeHtileAddrFromCoord
821 *
822 * @brief
823 * Interface function stub of AddrComputeHtileAddrFromCoord
824 *
825 * @return
826 * ADDR_E_RETURNCODE
827 ************************************************************************************************************************
828 */
829 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
830 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
831 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
832 {
833 ADDR_E_RETURNCODE returnCode = ADDR_OK;
834
835 if (pIn->numMipLevels > 1)
836 {
837 returnCode = ADDR_NOTIMPLEMENTED;
838 }
839 else
840 {
841 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
842 input.size = sizeof(input);
843 input.hTileFlags = pIn->hTileFlags;
844 input.depthFlags = pIn->depthflags;
845 input.swizzleMode = pIn->swizzleMode;
846 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
847 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
848 input.numSlices = Max(pIn->numSlices, 1u);
849 input.numMipLevels = Max(pIn->numMipLevels, 1u);
850
851 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
852 output.size = sizeof(output);
853
854 returnCode = ComputeHtileInfo(&input, &output);
855
856 if (returnCode == ADDR_OK)
857 {
858 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
859 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
860 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
861 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
862
863 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
864 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
865 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
866
867 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
868
869 UINT_32 xb = pIn->x / output.metaBlkWidth;
870 UINT_32 yb = pIn->y / output.metaBlkHeight;
871 UINT_32 zb = pIn->slice;
872
873 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
874 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
875 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
876
877 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
878
879 pOut->addr = address >> 1;
880
881 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
882 pIn->swizzleMode);
883
884 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
885
886 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
887 }
888 }
889
890 return returnCode;
891 }
892
893 /**
894 ************************************************************************************************************************
895 * Gfx9Lib::HwlComputeHtileCoordFromAddr
896 *
897 * @brief
898 * Interface function stub of AddrComputeHtileCoordFromAddr
899 *
900 * @return
901 * ADDR_E_RETURNCODE
902 ************************************************************************************************************************
903 */
904 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
905 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
906 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
907 {
908 ADDR_E_RETURNCODE returnCode = ADDR_OK;
909
910 if (pIn->numMipLevels > 1)
911 {
912 returnCode = ADDR_NOTIMPLEMENTED;
913 }
914 else
915 {
916 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
917 input.size = sizeof(input);
918 input.hTileFlags = pIn->hTileFlags;
919 input.swizzleMode = pIn->swizzleMode;
920 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
921 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
922 input.numSlices = Max(pIn->numSlices, 1u);
923 input.numMipLevels = Max(pIn->numMipLevels, 1u);
924
925 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
926 output.size = sizeof(output);
927
928 returnCode = ComputeHtileInfo(&input, &output);
929
930 if (returnCode == ADDR_OK)
931 {
932 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
933 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
934 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
935 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
936
937 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
938 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
939 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
940
941 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
942
943 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
944 pIn->swizzleMode);
945
946 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
947
948 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
949
950 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
951 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
952
953 UINT_32 x, y, z, s, m;
954 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
955
956 pOut->slice = m / sliceSizeInBlock;
957 pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
958 pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x;
959 }
960 }
961
962 return returnCode;
963 }
964
965 /**
966 ************************************************************************************************************************
967 * Gfx9Lib::HwlComputeDccAddrFromCoord
968 *
969 * @brief
970 * Interface function stub of AddrComputeDccAddrFromCoord
971 *
972 * @return
973 * ADDR_E_RETURNCODE
974 ************************************************************************************************************************
975 */
976 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
977 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
978 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
979 {
980 ADDR_E_RETURNCODE returnCode = ADDR_OK;
981
982 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
983 {
984 returnCode = ADDR_NOTIMPLEMENTED;
985 }
986 else
987 {
988 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
989 input.size = sizeof(input);
990 input.dccKeyFlags = pIn->dccKeyFlags;
991 input.colorFlags = pIn->colorFlags;
992 input.swizzleMode = pIn->swizzleMode;
993 input.resourceType = pIn->resourceType;
994 input.bpp = pIn->bpp;
995 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
996 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
997 input.numSlices = Max(pIn->numSlices, 1u);
998 input.numFrags = Max(pIn->numFrags, 1u);
999 input.numMipLevels = Max(pIn->numMipLevels, 1u);
1000
1001 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
1002 output.size = sizeof(output);
1003
1004 returnCode = ComputeDccInfo(&input, &output);
1005
1006 if (returnCode == ADDR_OK)
1007 {
1008 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1009 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
1010 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
1011 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1012 UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
1013 UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
1014 UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
1015 UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
1016
1017 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1018 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1019 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1020 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1021
1022 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1023
1024 UINT_32 xb = pIn->x / output.metaBlkWidth;
1025 UINT_32 yb = pIn->y / output.metaBlkHeight;
1026 UINT_32 zb = pIn->slice / output.metaBlkDepth;
1027
1028 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
1029 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1030 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1031
1032 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
1033
1034 pOut->addr = address >> 1;
1035
1036 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1037 pIn->swizzleMode);
1038
1039 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1040
1041 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1042 }
1043 }
1044
1045 return returnCode;
1046 }
1047
1048 /**
1049 ************************************************************************************************************************
1050 * Gfx9Lib::HwlInitGlobalParams
1051 *
1052 * @brief
1053 * Initializes global parameters
1054 *
1055 * @return
1056 * TRUE if all settings are valid
1057 *
1058 ************************************************************************************************************************
1059 */
1060 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1061 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1062 {
1063 BOOL_32 valid = TRUE;
1064
1065 if (m_settings.isArcticIsland)
1066 {
1067 GB_ADDR_CONFIG gbAddrConfig;
1068
1069 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1070
1071 // These values are copied from CModel code
1072 switch (gbAddrConfig.bits.NUM_PIPES)
1073 {
1074 case ADDR_CONFIG_1_PIPE:
1075 m_pipes = 1;
1076 m_pipesLog2 = 0;
1077 break;
1078 case ADDR_CONFIG_2_PIPE:
1079 m_pipes = 2;
1080 m_pipesLog2 = 1;
1081 break;
1082 case ADDR_CONFIG_4_PIPE:
1083 m_pipes = 4;
1084 m_pipesLog2 = 2;
1085 break;
1086 case ADDR_CONFIG_8_PIPE:
1087 m_pipes = 8;
1088 m_pipesLog2 = 3;
1089 break;
1090 case ADDR_CONFIG_16_PIPE:
1091 m_pipes = 16;
1092 m_pipesLog2 = 4;
1093 break;
1094 case ADDR_CONFIG_32_PIPE:
1095 m_pipes = 32;
1096 m_pipesLog2 = 5;
1097 break;
1098 default:
1099 ADDR_ASSERT_ALWAYS();
1100 break;
1101 }
1102
1103 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1104 {
1105 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1106 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1107 m_pipeInterleaveLog2 = 8;
1108 break;
1109 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1110 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1111 m_pipeInterleaveLog2 = 9;
1112 break;
1113 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1114 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1115 m_pipeInterleaveLog2 = 10;
1116 break;
1117 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1118 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1119 m_pipeInterleaveLog2 = 11;
1120 break;
1121 default:
1122 ADDR_ASSERT_ALWAYS();
1123 break;
1124 }
1125
1126 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1127 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1128 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1129
1130 switch (gbAddrConfig.bits.NUM_BANKS)
1131 {
1132 case ADDR_CONFIG_1_BANK:
1133 m_banks = 1;
1134 m_banksLog2 = 0;
1135 break;
1136 case ADDR_CONFIG_2_BANK:
1137 m_banks = 2;
1138 m_banksLog2 = 1;
1139 break;
1140 case ADDR_CONFIG_4_BANK:
1141 m_banks = 4;
1142 m_banksLog2 = 2;
1143 break;
1144 case ADDR_CONFIG_8_BANK:
1145 m_banks = 8;
1146 m_banksLog2 = 3;
1147 break;
1148 case ADDR_CONFIG_16_BANK:
1149 m_banks = 16;
1150 m_banksLog2 = 4;
1151 break;
1152 default:
1153 ADDR_ASSERT_ALWAYS();
1154 break;
1155 }
1156
1157 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1158 {
1159 case ADDR_CONFIG_1_SHADER_ENGINE:
1160 m_se = 1;
1161 m_seLog2 = 0;
1162 break;
1163 case ADDR_CONFIG_2_SHADER_ENGINE:
1164 m_se = 2;
1165 m_seLog2 = 1;
1166 break;
1167 case ADDR_CONFIG_4_SHADER_ENGINE:
1168 m_se = 4;
1169 m_seLog2 = 2;
1170 break;
1171 case ADDR_CONFIG_8_SHADER_ENGINE:
1172 m_se = 8;
1173 m_seLog2 = 3;
1174 break;
1175 default:
1176 ADDR_ASSERT_ALWAYS();
1177 break;
1178 }
1179
1180 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1181 {
1182 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1183 m_rbPerSe = 1;
1184 m_rbPerSeLog2 = 0;
1185 break;
1186 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1187 m_rbPerSe = 2;
1188 m_rbPerSeLog2 = 1;
1189 break;
1190 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1191 m_rbPerSe = 4;
1192 m_rbPerSeLog2 = 2;
1193 break;
1194 default:
1195 ADDR_ASSERT_ALWAYS();
1196 break;
1197 }
1198
1199 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1200 {
1201 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1202 m_maxCompFrag = 1;
1203 m_maxCompFragLog2 = 0;
1204 break;
1205 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1206 m_maxCompFrag = 2;
1207 m_maxCompFragLog2 = 1;
1208 break;
1209 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1210 m_maxCompFrag = 4;
1211 m_maxCompFragLog2 = 2;
1212 break;
1213 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1214 m_maxCompFrag = 8;
1215 m_maxCompFragLog2 = 3;
1216 break;
1217 default:
1218 ADDR_ASSERT_ALWAYS();
1219 break;
1220 }
1221
1222 if ((m_rbPerSeLog2 == 1) &&
1223 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1224 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1225 {
1226 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1227 ADDR_ASSERT(m_settings.isRaven == FALSE);
1228
1229 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1230
1231 if (m_settings.isVega12)
1232 {
1233 m_settings.htileCacheRbConflict = 1;
1234 }
1235 }
1236
1237 // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1238 m_blockVarSizeLog2 = 0;
1239 }
1240 else
1241 {
1242 valid = FALSE;
1243 ADDR_NOT_IMPLEMENTED();
1244 }
1245
1246 if (valid)
1247 {
1248 InitEquationTable();
1249 }
1250
1251 return valid;
1252 }
1253
1254 /**
1255 ************************************************************************************************************************
1256 * Gfx9Lib::HwlConvertChipFamily
1257 *
1258 * @brief
1259 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1260 * @return
1261 * ChipFamily
1262 ************************************************************************************************************************
1263 */
1264 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1265 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1266 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1267 {
1268 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1269
1270 switch (uChipFamily)
1271 {
1272 case FAMILY_AI:
1273 m_settings.isArcticIsland = 1;
1274 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1275 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1276 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1277 m_settings.isDce12 = 1;
1278
1279 if (m_settings.isVega10 == 0)
1280 {
1281 m_settings.htileAlignFix = 1;
1282 m_settings.applyAliasFix = 1;
1283 }
1284
1285 m_settings.metaBaseAlignFix = 1;
1286
1287 m_settings.depthPipeXorDisable = 1;
1288 break;
1289 case FAMILY_RV:
1290 m_settings.isArcticIsland = 1;
1291
1292 if (ASICREV_IS_RAVEN(uChipRevision))
1293 {
1294 m_settings.isRaven = 1;
1295
1296 m_settings.depthPipeXorDisable = 1;
1297 }
1298
1299 if (ASICREV_IS_RAVEN2(uChipRevision))
1300 {
1301 m_settings.isRaven = 1;
1302 }
1303
1304 if (m_settings.isRaven == 0)
1305 {
1306 m_settings.htileAlignFix = 1;
1307 m_settings.applyAliasFix = 1;
1308 }
1309
1310 if (ASICREV_IS_RENOIR(uChipRevision))
1311 {
1312 m_settings.isRaven = 1;
1313 }
1314
1315 m_settings.isDcn1 = m_settings.isRaven;
1316
1317 m_settings.metaBaseAlignFix = 1;
1318 break;
1319
1320 default:
1321 ADDR_ASSERT(!"This should be a Fusion");
1322 break;
1323 }
1324
1325 return family;
1326 }
1327
1328 /**
1329 ************************************************************************************************************************
1330 * Gfx9Lib::InitRbEquation
1331 *
1332 * @brief
1333 * Init RB equation
1334 * @return
1335 * N/A
1336 ************************************************************************************************************************
1337 */
1338 VOID Gfx9Lib::GetRbEquation(
1339 CoordEq* pRbEq, ///< [out] rb equation
1340 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1341 UINT_32 numSeLog2) ///< [in] number of shader engine
1342 const
1343 {
1344 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1345 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1346 Coordinate cx('x', rbRegion);
1347 Coordinate cy('y', rbRegion);
1348
1349 UINT_32 start = 0;
1350 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1351
1352 // Clear the rb equation
1353 pRbEq->resize(0);
1354 pRbEq->resize(numRbTotalLog2);
1355
1356 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1357 {
1358 // Special case when more than 1 SE, and 2 RB per SE
1359 (*pRbEq)[0].add(cx);
1360 (*pRbEq)[0].add(cy);
1361 cx++;
1362 cy++;
1363
1364 if (m_settings.applyAliasFix == false)
1365 {
1366 (*pRbEq)[0].add(cy);
1367 }
1368
1369 (*pRbEq)[0].add(cy);
1370 start++;
1371 }
1372
1373 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1374
1375 for (UINT_32 i = 0; i < numBits; i++)
1376 {
1377 UINT_32 idx =
1378 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1379
1380 if ((i % 2) == 1)
1381 {
1382 (*pRbEq)[idx].add(cx);
1383 cx++;
1384 }
1385 else
1386 {
1387 (*pRbEq)[idx].add(cy);
1388 cy++;
1389 }
1390 }
1391 }
1392
1393 /**
1394 ************************************************************************************************************************
1395 * Gfx9Lib::GetDataEquation
1396 *
1397 * @brief
1398 * Get data equation for fmask and Z
1399 * @return
1400 * N/A
1401 ************************************************************************************************************************
1402 */
1403 VOID Gfx9Lib::GetDataEquation(
1404 CoordEq* pDataEq, ///< [out] data surface equation
1405 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1406 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1407 AddrResourceType resourceType, ///< [in] data surface resource type
1408 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1409 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1410 const
1411 {
1412 Coordinate cx('x', 0);
1413 Coordinate cy('y', 0);
1414 Coordinate cz('z', 0);
1415 Coordinate cs('s', 0);
1416
1417 // Clear the equation
1418 pDataEq->resize(0);
1419 pDataEq->resize(27);
1420
1421 if (dataSurfaceType == Gfx9DataColor)
1422 {
1423 if (IsLinear(swizzleMode))
1424 {
1425 Coordinate cm('m', 0);
1426
1427 pDataEq->resize(49);
1428
1429 for (UINT_32 i = 0; i < 49; i++)
1430 {
1431 (*pDataEq)[i].add(cm);
1432 cm++;
1433 }
1434 }
1435 else if (IsThick(resourceType, swizzleMode))
1436 {
1437 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1438 UINT_32 i;
1439 if (IsStandardSwizzle(resourceType, swizzleMode))
1440 {
1441 // Standard 3d swizzle
1442 // Fill in bottom x bits
1443 for (i = elementBytesLog2; i < 4; i++)
1444 {
1445 (*pDataEq)[i].add(cx);
1446 cx++;
1447 }
1448 // Fill in 2 bits of y and then z
1449 for (i = 4; i < 6; i++)
1450 {
1451 (*pDataEq)[i].add(cy);
1452 cy++;
1453 }
1454 for (i = 6; i < 8; i++)
1455 {
1456 (*pDataEq)[i].add(cz);
1457 cz++;
1458 }
1459 if (elementBytesLog2 < 2)
1460 {
1461 // fill in z & y bit
1462 (*pDataEq)[8].add(cz);
1463 (*pDataEq)[9].add(cy);
1464 cz++;
1465 cy++;
1466 }
1467 else if (elementBytesLog2 == 2)
1468 {
1469 // fill in y and x bit
1470 (*pDataEq)[8].add(cy);
1471 (*pDataEq)[9].add(cx);
1472 cy++;
1473 cx++;
1474 }
1475 else
1476 {
1477 // fill in 2 x bits
1478 (*pDataEq)[8].add(cx);
1479 cx++;
1480 (*pDataEq)[9].add(cx);
1481 cx++;
1482 }
1483 }
1484 else
1485 {
1486 // Z 3d swizzle
1487 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1488 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1489 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1490 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1491 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1492 {
1493 (*pDataEq)[i].add(cz);
1494 cz++;
1495 }
1496 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1497 {
1498 // add an x and z
1499 (*pDataEq)[6].add(cx);
1500 (*pDataEq)[7].add(cz);
1501 cx++;
1502 cz++;
1503 }
1504 else if (elementBytesLog2 == 2)
1505 {
1506 // add a y and z
1507 (*pDataEq)[6].add(cy);
1508 (*pDataEq)[7].add(cz);
1509 cy++;
1510 cz++;
1511 }
1512 // add y and x
1513 (*pDataEq)[8].add(cy);
1514 (*pDataEq)[9].add(cx);
1515 cy++;
1516 cx++;
1517 }
1518 // Fill in bit 10 and up
1519 pDataEq->mort3d( cz, cy, cx, 10 );
1520 }
1521 else if (IsThin(resourceType, swizzleMode))
1522 {
1523 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1524 // Color 2D
1525 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1526 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1527 UINT_32 i;
1528 // Fill in bottom x bits
1529 for (i = elementBytesLog2; i < 4; i++)
1530 {
1531 (*pDataEq)[i].add(cx);
1532 cx++;
1533 }
1534 // Fill in bottom y bits
1535 for (i = 4; i < 4 + microYBits; i++)
1536 {
1537 (*pDataEq)[i].add(cy);
1538 cy++;
1539 }
1540 // Fill in last of the micro_x bits
1541 for (i = 4 + microYBits; i < 8; i++)
1542 {
1543 (*pDataEq)[i].add(cx);
1544 cx++;
1545 }
1546 // Fill in x/y bits below sample split
1547 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1548 // Fill in sample bits
1549 for (i = 0; i < numSamplesLog2; i++)
1550 {
1551 cs.set('s', i);
1552 (*pDataEq)[tileSplitStart + i].add(cs);
1553 }
1554 // Fill in x/y bits above sample split
1555 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1556 {
1557 pDataEq->mort2d(cx, cy, blockSizeLog2);
1558 }
1559 else
1560 {
1561 pDataEq->mort2d(cy, cx, blockSizeLog2);
1562 }
1563 }
1564 else
1565 {
1566 ADDR_ASSERT_ALWAYS();
1567 }
1568 }
1569 else
1570 {
1571 // Fmask or depth
1572 UINT_32 sampleStart = elementBytesLog2;
1573 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1574 UINT_32 ymajStart = 6 + numSamplesLog2;
1575
1576 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1577 {
1578 cs.set('s', s);
1579 (*pDataEq)[sampleStart + s].add(cs);
1580 }
1581
1582 // Put in the x-major order pixel bits
1583 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1584 // Put in the y-major order pixel bits
1585 pDataEq->mort2d(cy, cx, ymajStart);
1586 }
1587 }
1588
1589 /**
1590 ************************************************************************************************************************
1591 * Gfx9Lib::GetPipeEquation
1592 *
1593 * @brief
1594 * Get pipe equation
1595 * @return
1596 * N/A
1597 ************************************************************************************************************************
1598 */
1599 VOID Gfx9Lib::GetPipeEquation(
1600 CoordEq* pPipeEq, ///< [out] pipe equation
1601 CoordEq* pDataEq, ///< [in] data equation
1602 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1603 UINT_32 numPipeLog2, ///< [in] number of pipes
1604 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1605 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1606 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1607 AddrResourceType resourceType ///< [in] data surface resource type
1608 ) const
1609 {
1610 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1611 CoordEq dataEq;
1612
1613 pDataEq->copy(dataEq);
1614
1615 if (dataSurfaceType == Gfx9DataColor)
1616 {
1617 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1618 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1619 }
1620
1621 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1622
1623 // This section should only apply to z/stencil, maybe fmask
1624 // If the pipe bit is below the comp block size,
1625 // then keep moving up the address until we find a bit that is above
1626 UINT_32 pipeStart = 0;
1627
1628 if (dataSurfaceType != Gfx9DataColor)
1629 {
1630 Coordinate tileMin('x', 3);
1631
1632 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1633 {
1634 pipeStart++;
1635 }
1636
1637 // if pipe is 0, then the first pipe bit is above the comp block size,
1638 // so we don't need to do anything
1639 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1640 // we will get the same pipe equation
1641 if (pipeStart != 0)
1642 {
1643 for (UINT_32 i = 0; i < numPipeLog2; i++)
1644 {
1645 // Copy the jth bit above pipe interleave to the current pipe equation bit
1646 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1647 }
1648 }
1649 }
1650
1651 if (IsPrt(swizzleMode))
1652 {
1653 // Clear out bits above the block size if prt's are enabled
1654 dataEq.resize(blockSizeLog2);
1655 dataEq.resize(48);
1656 }
1657
1658 if (IsXor(swizzleMode))
1659 {
1660 CoordEq xorMask;
1661
1662 if (IsThick(resourceType, swizzleMode))
1663 {
1664 CoordEq xorMask2;
1665
1666 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1667
1668 xorMask.resize(numPipeLog2);
1669
1670 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1671 {
1672 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1673 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1674 }
1675 }
1676 else
1677 {
1678 // Xor in the bits above the pipe+gpu bits
1679 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1680
1681 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1682 {
1683 Coordinate co;
1684 CoordEq xorMask2;
1685 // if 1xaa and not prt, then xor in the z bits
1686 xorMask2.resize(0);
1687 xorMask2.resize(numPipeLog2);
1688 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1689 {
1690 co.set('z', numPipeLog2 - 1 - pipeIdx);
1691 xorMask2[pipeIdx].add(co);
1692 }
1693
1694 pPipeEq->xorin(xorMask2);
1695 }
1696 }
1697
1698 xorMask.reverse();
1699 pPipeEq->xorin(xorMask);
1700 }
1701 }
1702 /**
1703 ************************************************************************************************************************
1704 * Gfx9Lib::GetMetaEquation
1705 *
1706 * @brief
1707 * Get meta equation for cmask/htile/DCC
1708 * @return
1709 * Pointer to a calculated meta equation
1710 ************************************************************************************************************************
1711 */
1712 const CoordEq* Gfx9Lib::GetMetaEquation(
1713 const MetaEqParams& metaEqParams)
1714 {
1715 UINT_32 cachedMetaEqIndex;
1716
1717 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1718 {
1719 if (memcmp(&metaEqParams,
1720 &m_cachedMetaEqKey[cachedMetaEqIndex],
1721 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1722 {
1723 break;
1724 }
1725 }
1726
1727 CoordEq* pMetaEq = NULL;
1728
1729 if (cachedMetaEqIndex < MaxCachedMetaEq)
1730 {
1731 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1732 }
1733 else
1734 {
1735 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1736
1737 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1738
1739 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1740
1741 GenMetaEquation(pMetaEq,
1742 metaEqParams.maxMip,
1743 metaEqParams.elementBytesLog2,
1744 metaEqParams.numSamplesLog2,
1745 metaEqParams.metaFlag,
1746 metaEqParams.dataSurfaceType,
1747 metaEqParams.swizzleMode,
1748 metaEqParams.resourceType,
1749 metaEqParams.metaBlkWidthLog2,
1750 metaEqParams.metaBlkHeightLog2,
1751 metaEqParams.metaBlkDepthLog2,
1752 metaEqParams.compBlkWidthLog2,
1753 metaEqParams.compBlkHeightLog2,
1754 metaEqParams.compBlkDepthLog2);
1755 }
1756
1757 return pMetaEq;
1758 }
1759
1760 /**
1761 ************************************************************************************************************************
1762 * Gfx9Lib::GenMetaEquation
1763 *
1764 * @brief
1765 * Get meta equation for cmask/htile/DCC
1766 * @return
1767 * N/A
1768 ************************************************************************************************************************
1769 */
1770 VOID Gfx9Lib::GenMetaEquation(
1771 CoordEq* pMetaEq, ///< [out] meta equation
1772 UINT_32 maxMip, ///< [in] max mip Id
1773 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1774 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1775 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1776 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1777 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1778 AddrResourceType resourceType, ///< [in] data surface resource type
1779 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1780 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1781 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1782 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1783 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1784 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1785 const
1786 {
1787 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1788 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1789
1790 // Get the correct data address and rb equation
1791 CoordEq dataEq;
1792 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1793 elementBytesLog2, numSamplesLog2);
1794
1795 // Get pipe and rb equations
1796 CoordEq pipeEquation;
1797 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1798 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1799 numPipeTotalLog2 = pipeEquation.getsize();
1800
1801 if (metaFlag.linear)
1802 {
1803 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1804 ADDR_ASSERT_ALWAYS();
1805
1806 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1807
1808 dataEq.copy(*pMetaEq);
1809
1810 if (IsLinear(swizzleMode))
1811 {
1812 if (metaFlag.pipeAligned)
1813 {
1814 // Remove the pipe bits
1815 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1816 pMetaEq->shift(-shift, pipeInterleaveLog2);
1817 }
1818 // Divide by comp block size, which for linear (which is always color) is 256 B
1819 pMetaEq->shift(-8);
1820
1821 if (metaFlag.pipeAligned)
1822 {
1823 // Put pipe bits back in
1824 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1825
1826 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1827 {
1828 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1829 }
1830 }
1831 }
1832
1833 pMetaEq->shift(1);
1834 }
1835 else
1836 {
1837 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1838 UINT_32 compFragLog2 =
1839 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1840 maxCompFragLog2 : numSamplesLog2;
1841
1842 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1843
1844 // Make sure the metaaddr is cleared
1845 pMetaEq->resize(0);
1846 pMetaEq->resize(27);
1847
1848 if (IsThick(resourceType, swizzleMode))
1849 {
1850 Coordinate cx('x', 0);
1851 Coordinate cy('y', 0);
1852 Coordinate cz('z', 0);
1853
1854 if (maxMip > 0)
1855 {
1856 pMetaEq->mort3d(cy, cx, cz);
1857 }
1858 else
1859 {
1860 pMetaEq->mort3d(cx, cy, cz);
1861 }
1862 }
1863 else
1864 {
1865 Coordinate cx('x', 0);
1866 Coordinate cy('y', 0);
1867 Coordinate cs;
1868
1869 if (maxMip > 0)
1870 {
1871 pMetaEq->mort2d(cy, cx, compFragLog2);
1872 }
1873 else
1874 {
1875 pMetaEq->mort2d(cx, cy, compFragLog2);
1876 }
1877
1878 //------------------------------------------------------------------------------------------------------------------------
1879 // Put the compressible fragments at the lsb
1880 // the uncompressible frags will be at the msb of the micro address
1881 //------------------------------------------------------------------------------------------------------------------------
1882 for (UINT_32 s = 0; s < compFragLog2; s++)
1883 {
1884 cs.set('s', s);
1885 (*pMetaEq)[s].add(cs);
1886 }
1887 }
1888
1889 // Keep a copy of the pipe equations
1890 CoordEq origPipeEquation;
1891 pipeEquation.copy(origPipeEquation);
1892
1893 Coordinate co;
1894 // filter out everything under the compressed block size
1895 co.set('x', compBlkWidthLog2);
1896 pMetaEq->Filter('<', co, 0, 'x');
1897 co.set('y', compBlkHeightLog2);
1898 pMetaEq->Filter('<', co, 0, 'y');
1899 co.set('z', compBlkDepthLog2);
1900 pMetaEq->Filter('<', co, 0, 'z');
1901
1902 // For non-color, filter out sample bits
1903 if (dataSurfaceType != Gfx9DataColor)
1904 {
1905 co.set('x', 0);
1906 pMetaEq->Filter('<', co, 0, 's');
1907 }
1908
1909 // filter out everything above the metablock size
1910 co.set('x', metaBlkWidthLog2 - 1);
1911 pMetaEq->Filter('>', co, 0, 'x');
1912 co.set('y', metaBlkHeightLog2 - 1);
1913 pMetaEq->Filter('>', co, 0, 'y');
1914 co.set('z', metaBlkDepthLog2 - 1);
1915 pMetaEq->Filter('>', co, 0, 'z');
1916
1917 // filter out everything above the metablock size for the channel bits
1918 co.set('x', metaBlkWidthLog2 - 1);
1919 pipeEquation.Filter('>', co, 0, 'x');
1920 co.set('y', metaBlkHeightLog2 - 1);
1921 pipeEquation.Filter('>', co, 0, 'y');
1922 co.set('z', metaBlkDepthLog2 - 1);
1923 pipeEquation.Filter('>', co, 0, 'z');
1924
1925 // Make sure we still have the same number of channel bits
1926 if (pipeEquation.getsize() != numPipeTotalLog2)
1927 {
1928 ADDR_ASSERT_ALWAYS();
1929 }
1930
1931 // Loop through all channel and rb bits,
1932 // and make sure these components exist in the metadata address
1933 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1934 {
1935 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1936 {
1937 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1938 {
1939 ADDR_ASSERT_ALWAYS();
1940 }
1941 }
1942 }
1943
1944 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1945 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1946 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1947 CoordEq origRbEquation;
1948
1949 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1950
1951 CoordEq rbEquation = origRbEquation;
1952
1953 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1954 {
1955 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1956 {
1957 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1958 {
1959 ADDR_ASSERT_ALWAYS();
1960 }
1961 }
1962 }
1963
1964 if (m_settings.applyAliasFix)
1965 {
1966 co.set('z', -1);
1967 }
1968
1969 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1970 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1971 {
1972 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1973 {
1974 BOOL_32 isRbEquationInPipeEquation = FALSE;
1975
1976 if (m_settings.applyAliasFix)
1977 {
1978 CoordTerm filteredPipeEq;
1979 filteredPipeEq = pipeEquation[j];
1980
1981 filteredPipeEq.Filter('>', co, 0, 'z');
1982
1983 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1984 }
1985 else
1986 {
1987 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1988 }
1989
1990 if (isRbEquationInPipeEquation)
1991 {
1992 rbEquation[i].Clear();
1993 }
1994 }
1995 }
1996
1997 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1998
1999 // Loop through each bit of the channel, get the smallest coordinate,
2000 // and remove it from the metaaddr, and rb_equation
2001 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2002 {
2003 pipeEquation[i].getsmallest(co);
2004
2005 UINT_32 old_size = pMetaEq->getsize();
2006 pMetaEq->Filter('=', co);
2007 UINT_32 new_size = pMetaEq->getsize();
2008 if (new_size != old_size-1)
2009 {
2010 ADDR_ASSERT_ALWAYS();
2011 }
2012 pipeEquation.remove(co);
2013 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2014 {
2015 if (rbEquation[j].remove(co))
2016 {
2017 // if we actually removed something from this bit, then add the remaining
2018 // channel bits, as these can be removed for this bit
2019 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2020 {
2021 if (pipeEquation[i][k] != co)
2022 {
2023 rbEquation[j].add(pipeEquation[i][k]);
2024 rbAppendedWithPipeBits[j] = true;
2025 }
2026 }
2027 }
2028 }
2029 }
2030
2031 // Loop through the rb bits and see what remain;
2032 // filter out the smallest coordinate if it remains
2033 UINT_32 rbBitsLeft = 0;
2034 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2035 {
2036 BOOL_32 isRbEqAppended = FALSE;
2037
2038 if (m_settings.applyAliasFix)
2039 {
2040 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2041 }
2042 else
2043 {
2044 isRbEqAppended = (rbEquation[i].getsize() > 0);
2045 }
2046
2047 if (isRbEqAppended)
2048 {
2049 rbBitsLeft++;
2050 rbEquation[i].getsmallest(co);
2051 UINT_32 old_size = pMetaEq->getsize();
2052 pMetaEq->Filter('=', co);
2053 UINT_32 new_size = pMetaEq->getsize();
2054 if (new_size != old_size - 1)
2055 {
2056 // assert warning
2057 }
2058 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2059 {
2060 if (rbEquation[j].remove(co))
2061 {
2062 // if we actually removed something from this bit, then add the remaining
2063 // rb bits, as these can be removed for this bit
2064 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2065 {
2066 if (rbEquation[i][k] != co)
2067 {
2068 rbEquation[j].add(rbEquation[i][k]);
2069 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2070 }
2071 }
2072 }
2073 }
2074 }
2075 }
2076
2077 // capture the size of the metaaddr
2078 UINT_32 metaSize = pMetaEq->getsize();
2079 // resize to 49 bits...make this a nibble address
2080 pMetaEq->resize(49);
2081 // Concatenate the macro address above the current address
2082 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2083 {
2084 co.set('m', j);
2085 (*pMetaEq)[i].add(co);
2086 }
2087
2088 // Multiply by meta element size (in nibbles)
2089 if (dataSurfaceType == Gfx9DataColor)
2090 {
2091 pMetaEq->shift(1);
2092 }
2093 else if (dataSurfaceType == Gfx9DataDepthStencil)
2094 {
2095 pMetaEq->shift(3);
2096 }
2097
2098 //------------------------------------------------------------------------------------------
2099 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2100 // Shift up from pipe interleave number of channel
2101 // and rb bits left, and uncompressed fragments
2102 //------------------------------------------------------------------------------------------
2103
2104 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2105
2106 // Put in the channel bits
2107 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2108 {
2109 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2110 }
2111
2112 // Put in remaining rb bits
2113 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2114 {
2115 BOOL_32 isRbEqAppended = FALSE;
2116
2117 if (m_settings.applyAliasFix)
2118 {
2119 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2120 }
2121 else
2122 {
2123 isRbEqAppended = (rbEquation[i].getsize() > 0);
2124 }
2125
2126 if (isRbEqAppended)
2127 {
2128 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2129 // Mark any rb bit we add in to the rb mask
2130 j++;
2131 }
2132 }
2133
2134 //------------------------------------------------------------------------------------------
2135 // Put in the uncompressed fragment bits
2136 //------------------------------------------------------------------------------------------
2137 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2138 {
2139 co.set('s', compFragLog2 + i);
2140 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2141 }
2142 }
2143 }
2144
2145 /**
2146 ************************************************************************************************************************
2147 * Gfx9Lib::IsEquationSupported
2148 *
2149 * @brief
2150 * Check if equation is supported for given swizzle mode and resource type.
2151 *
2152 * @return
2153 * TRUE if supported
2154 ************************************************************************************************************************
2155 */
2156 BOOL_32 Gfx9Lib::IsEquationSupported(
2157 AddrResourceType rsrcType,
2158 AddrSwizzleMode swMode,
2159 UINT_32 elementBytesLog2) const
2160 {
2161 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2162 (IsValidSwMode(swMode) == TRUE) &&
2163 (IsLinear(swMode) == FALSE) &&
2164 (((IsTex2d(rsrcType) == TRUE) &&
2165 ((elementBytesLog2 < 4) ||
2166 ((IsRotateSwizzle(swMode) == FALSE) &&
2167 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2168 ((IsTex3d(rsrcType) == TRUE) &&
2169 (IsRotateSwizzle(swMode) == FALSE) &&
2170 (IsBlock256b(swMode) == FALSE)));
2171
2172 return supported;
2173 }
2174
2175 /**
2176 ************************************************************************************************************************
2177 * Gfx9Lib::InitEquationTable
2178 *
2179 * @brief
2180 * Initialize Equation table.
2181 *
2182 * @return
2183 * N/A
2184 ************************************************************************************************************************
2185 */
2186 VOID Gfx9Lib::InitEquationTable()
2187 {
2188 memset(m_equationTable, 0, sizeof(m_equationTable));
2189
2190 // Loop all possible resource type (2D/3D)
2191 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2192 {
2193 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2194
2195 // Loop all possible swizzle mode
2196 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2197 {
2198 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2199
2200 // Loop all possible bpp
2201 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2202 {
2203 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2204
2205 // Check if the input is supported
2206 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2207 {
2208 ADDR_EQUATION equation;
2209 ADDR_E_RETURNCODE retCode;
2210
2211 memset(&equation, 0, sizeof(ADDR_EQUATION));
2212
2213 // Generate the equation
2214 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2215 {
2216 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2217 }
2218 else if (IsThin(rsrcType, swMode))
2219 {
2220 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2221 }
2222 else
2223 {
2224 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2225 }
2226
2227 // Only fill the equation into the table if the return code is ADDR_OK,
2228 // otherwise if the return code is not ADDR_OK, it indicates this is not
2229 // a valid input, we do nothing but just fill invalid equation index
2230 // into the lookup table.
2231 if (retCode == ADDR_OK)
2232 {
2233 equationIndex = m_numEquations;
2234 ADDR_ASSERT(equationIndex < EquationTableSize);
2235
2236 m_equationTable[equationIndex] = equation;
2237
2238 m_numEquations++;
2239 }
2240 else
2241 {
2242 ADDR_ASSERT_ALWAYS();
2243 }
2244 }
2245
2246 // Fill the index into the lookup table, if the combination is not supported
2247 // fill the invalid equation index
2248 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2249 }
2250 }
2251 }
2252 }
2253
2254 /**
2255 ************************************************************************************************************************
2256 * Gfx9Lib::HwlGetEquationIndex
2257 *
2258 * @brief
2259 * Interface function stub of GetEquationIndex
2260 *
2261 * @return
2262 * ADDR_E_RETURNCODE
2263 ************************************************************************************************************************
2264 */
2265 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2266 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2267 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2268 ) const
2269 {
2270 AddrResourceType rsrcType = pIn->resourceType;
2271 AddrSwizzleMode swMode = pIn->swizzleMode;
2272 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2273 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2274
2275 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2276 {
2277 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2278 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2279
2280 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2281 }
2282
2283 if (pOut->pMipInfo != NULL)
2284 {
2285 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2286 {
2287 pOut->pMipInfo[i].equationIndex = index;
2288 }
2289 }
2290
2291 return index;
2292 }
2293
2294 /**
2295 ************************************************************************************************************************
2296 * Gfx9Lib::HwlComputeBlock256Equation
2297 *
2298 * @brief
2299 * Interface function stub of ComputeBlock256Equation
2300 *
2301 * @return
2302 * ADDR_E_RETURNCODE
2303 ************************************************************************************************************************
2304 */
2305 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2306 AddrResourceType rsrcType,
2307 AddrSwizzleMode swMode,
2308 UINT_32 elementBytesLog2,
2309 ADDR_EQUATION* pEquation) const
2310 {
2311 ADDR_E_RETURNCODE ret = ADDR_OK;
2312
2313 pEquation->numBits = 8;
2314
2315 UINT_32 i = 0;
2316 for (; i < elementBytesLog2; i++)
2317 {
2318 InitChannel(1, 0 , i, &pEquation->addr[i]);
2319 }
2320
2321 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2322
2323 const UINT_32 maxBitsUsed = 4;
2324 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2325 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2326
2327 for (i = 0; i < maxBitsUsed; i++)
2328 {
2329 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2330 InitChannel(1, 1, i, &y[i]);
2331 }
2332
2333 if (IsStandardSwizzle(rsrcType, swMode))
2334 {
2335 switch (elementBytesLog2)
2336 {
2337 case 0:
2338 pixelBit[0] = x[0];
2339 pixelBit[1] = x[1];
2340 pixelBit[2] = x[2];
2341 pixelBit[3] = x[3];
2342 pixelBit[4] = y[0];
2343 pixelBit[5] = y[1];
2344 pixelBit[6] = y[2];
2345 pixelBit[7] = y[3];
2346 break;
2347 case 1:
2348 pixelBit[0] = x[0];
2349 pixelBit[1] = x[1];
2350 pixelBit[2] = x[2];
2351 pixelBit[3] = y[0];
2352 pixelBit[4] = y[1];
2353 pixelBit[5] = y[2];
2354 pixelBit[6] = x[3];
2355 break;
2356 case 2:
2357 pixelBit[0] = x[0];
2358 pixelBit[1] = x[1];
2359 pixelBit[2] = y[0];
2360 pixelBit[3] = y[1];
2361 pixelBit[4] = y[2];
2362 pixelBit[5] = x[2];
2363 break;
2364 case 3:
2365 pixelBit[0] = x[0];
2366 pixelBit[1] = y[0];
2367 pixelBit[2] = y[1];
2368 pixelBit[3] = x[1];
2369 pixelBit[4] = x[2];
2370 break;
2371 case 4:
2372 pixelBit[0] = y[0];
2373 pixelBit[1] = y[1];
2374 pixelBit[2] = x[0];
2375 pixelBit[3] = x[1];
2376 break;
2377 default:
2378 ADDR_ASSERT_ALWAYS();
2379 ret = ADDR_INVALIDPARAMS;
2380 break;
2381 }
2382 }
2383 else if (IsDisplaySwizzle(rsrcType, swMode))
2384 {
2385 switch (elementBytesLog2)
2386 {
2387 case 0:
2388 pixelBit[0] = x[0];
2389 pixelBit[1] = x[1];
2390 pixelBit[2] = x[2];
2391 pixelBit[3] = y[1];
2392 pixelBit[4] = y[0];
2393 pixelBit[5] = y[2];
2394 pixelBit[6] = x[3];
2395 pixelBit[7] = y[3];
2396 break;
2397 case 1:
2398 pixelBit[0] = x[0];
2399 pixelBit[1] = x[1];
2400 pixelBit[2] = x[2];
2401 pixelBit[3] = y[0];
2402 pixelBit[4] = y[1];
2403 pixelBit[5] = y[2];
2404 pixelBit[6] = x[3];
2405 break;
2406 case 2:
2407 pixelBit[0] = x[0];
2408 pixelBit[1] = x[1];
2409 pixelBit[2] = y[0];
2410 pixelBit[3] = x[2];
2411 pixelBit[4] = y[1];
2412 pixelBit[5] = y[2];
2413 break;
2414 case 3:
2415 pixelBit[0] = x[0];
2416 pixelBit[1] = y[0];
2417 pixelBit[2] = x[1];
2418 pixelBit[3] = x[2];
2419 pixelBit[4] = y[1];
2420 break;
2421 case 4:
2422 pixelBit[0] = x[0];
2423 pixelBit[1] = y[0];
2424 pixelBit[2] = x[1];
2425 pixelBit[3] = y[1];
2426 break;
2427 default:
2428 ADDR_ASSERT_ALWAYS();
2429 ret = ADDR_INVALIDPARAMS;
2430 break;
2431 }
2432 }
2433 else if (IsRotateSwizzle(swMode))
2434 {
2435 switch (elementBytesLog2)
2436 {
2437 case 0:
2438 pixelBit[0] = y[0];
2439 pixelBit[1] = y[1];
2440 pixelBit[2] = y[2];
2441 pixelBit[3] = x[1];
2442 pixelBit[4] = x[0];
2443 pixelBit[5] = x[2];
2444 pixelBit[6] = x[3];
2445 pixelBit[7] = y[3];
2446 break;
2447 case 1:
2448 pixelBit[0] = y[0];
2449 pixelBit[1] = y[1];
2450 pixelBit[2] = y[2];
2451 pixelBit[3] = x[0];
2452 pixelBit[4] = x[1];
2453 pixelBit[5] = x[2];
2454 pixelBit[6] = x[3];
2455 break;
2456 case 2:
2457 pixelBit[0] = y[0];
2458 pixelBit[1] = y[1];
2459 pixelBit[2] = x[0];
2460 pixelBit[3] = y[2];
2461 pixelBit[4] = x[1];
2462 pixelBit[5] = x[2];
2463 break;
2464 case 3:
2465 pixelBit[0] = y[0];
2466 pixelBit[1] = x[0];
2467 pixelBit[2] = y[1];
2468 pixelBit[3] = x[1];
2469 pixelBit[4] = x[2];
2470 break;
2471 default:
2472 ADDR_ASSERT_ALWAYS();
2473 case 4:
2474 ret = ADDR_INVALIDPARAMS;
2475 break;
2476 }
2477 }
2478 else
2479 {
2480 ADDR_ASSERT_ALWAYS();
2481 ret = ADDR_INVALIDPARAMS;
2482 }
2483
2484 // Post validation
2485 if (ret == ADDR_OK)
2486 {
2487 ASSERTED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2488 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2489 (microBlockDim.w * (1 << elementBytesLog2)));
2490 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2491 }
2492
2493 return ret;
2494 }
2495
2496 /**
2497 ************************************************************************************************************************
2498 * Gfx9Lib::HwlComputeThinEquation
2499 *
2500 * @brief
2501 * Interface function stub of ComputeThinEquation
2502 *
2503 * @return
2504 * ADDR_E_RETURNCODE
2505 ************************************************************************************************************************
2506 */
2507 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2508 AddrResourceType rsrcType,
2509 AddrSwizzleMode swMode,
2510 UINT_32 elementBytesLog2,
2511 ADDR_EQUATION* pEquation) const
2512 {
2513 ADDR_E_RETURNCODE ret = ADDR_OK;
2514
2515 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2516
2517 UINT_32 maxXorBits = blockSizeLog2;
2518 if (IsNonPrtXor(swMode))
2519 {
2520 // For non-prt-xor, maybe need to initialize some more bits for xor
2521 // The highest xor bit used in equation will be max the following 3 items:
2522 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2523 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2524 // 3. blockSizeLog2
2525
2526 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2527 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2528 GetPipeXorBits(blockSizeLog2) +
2529 2 * GetBankXorBits(blockSizeLog2));
2530 }
2531
2532 const UINT_32 maxBitsUsed = 14;
2533 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2534 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2535 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2536
2537 const UINT_32 extraXorBits = 16;
2538 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2539 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2540
2541 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2542 {
2543 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2544 InitChannel(1, 1, i, &y[i]);
2545 }
2546
2547 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2548
2549 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2550 {
2551 InitChannel(1, 0 , i, &pixelBit[i]);
2552 }
2553
2554 UINT_32 xIdx = 0;
2555 UINT_32 yIdx = 0;
2556 UINT_32 lowBits = 0;
2557
2558 if (IsZOrderSwizzle(swMode))
2559 {
2560 if (elementBytesLog2 <= 3)
2561 {
2562 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2563 {
2564 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2565 }
2566
2567 lowBits = 6;
2568 }
2569 else
2570 {
2571 ret = ADDR_INVALIDPARAMS;
2572 }
2573 }
2574 else
2575 {
2576 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2577
2578 if (ret == ADDR_OK)
2579 {
2580 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2581 xIdx = Log2(microBlockDim.w);
2582 yIdx = Log2(microBlockDim.h);
2583 lowBits = 8;
2584 }
2585 }
2586
2587 if (ret == ADDR_OK)
2588 {
2589 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2590 {
2591 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2592 }
2593
2594 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2595 {
2596 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2597 }
2598
2599 if (IsXor(swMode))
2600 {
2601 // Fill XOR bits
2602 UINT_32 pipeStart = m_pipeInterleaveLog2;
2603 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2604
2605 UINT_32 bankStart = pipeStart + pipeXorBits;
2606 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2607
2608 for (UINT_32 i = 0; i < pipeXorBits; i++)
2609 {
2610 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2611 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2612 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2613
2614 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2615 }
2616
2617 for (UINT_32 i = 0; i < bankXorBits; i++)
2618 {
2619 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2620 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2621 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2622
2623 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2624 }
2625
2626 if (IsPrt(swMode) == FALSE)
2627 {
2628 for (UINT_32 i = 0; i < pipeXorBits; i++)
2629 {
2630 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2631 }
2632
2633 for (UINT_32 i = 0; i < bankXorBits; i++)
2634 {
2635 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2636 }
2637 }
2638 }
2639
2640 pEquation->numBits = blockSizeLog2;
2641 }
2642
2643 return ret;
2644 }
2645
2646 /**
2647 ************************************************************************************************************************
2648 * Gfx9Lib::HwlComputeThickEquation
2649 *
2650 * @brief
2651 * Interface function stub of ComputeThickEquation
2652 *
2653 * @return
2654 * ADDR_E_RETURNCODE
2655 ************************************************************************************************************************
2656 */
2657 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2658 AddrResourceType rsrcType,
2659 AddrSwizzleMode swMode,
2660 UINT_32 elementBytesLog2,
2661 ADDR_EQUATION* pEquation) const
2662 {
2663 ADDR_E_RETURNCODE ret = ADDR_OK;
2664
2665 ADDR_ASSERT(IsTex3d(rsrcType));
2666
2667 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2668
2669 UINT_32 maxXorBits = blockSizeLog2;
2670 if (IsNonPrtXor(swMode))
2671 {
2672 // For non-prt-xor, maybe need to initialize some more bits for xor
2673 // The highest xor bit used in equation will be max the following 3:
2674 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2675 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2676 // 3. blockSizeLog2
2677
2678 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2679 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2680 GetPipeXorBits(blockSizeLog2) +
2681 3 * GetBankXorBits(blockSizeLog2));
2682 }
2683
2684 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2685 {
2686 InitChannel(1, 0 , i, &pEquation->addr[i]);
2687 }
2688
2689 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2690
2691 const UINT_32 maxBitsUsed = 12;
2692 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2693 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2694 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2695 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2696
2697 const UINT_32 extraXorBits = 24;
2698 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2699 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2700
2701 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2702 {
2703 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2704 InitChannel(1, 1, i, &y[i]);
2705 InitChannel(1, 2, i, &z[i]);
2706 }
2707
2708 if (IsZOrderSwizzle(swMode))
2709 {
2710 switch (elementBytesLog2)
2711 {
2712 case 0:
2713 pixelBit[0] = x[0];
2714 pixelBit[1] = y[0];
2715 pixelBit[2] = x[1];
2716 pixelBit[3] = y[1];
2717 pixelBit[4] = z[0];
2718 pixelBit[5] = z[1];
2719 pixelBit[6] = x[2];
2720 pixelBit[7] = z[2];
2721 pixelBit[8] = y[2];
2722 pixelBit[9] = x[3];
2723 break;
2724 case 1:
2725 pixelBit[0] = x[0];
2726 pixelBit[1] = y[0];
2727 pixelBit[2] = x[1];
2728 pixelBit[3] = y[1];
2729 pixelBit[4] = z[0];
2730 pixelBit[5] = z[1];
2731 pixelBit[6] = z[2];
2732 pixelBit[7] = y[2];
2733 pixelBit[8] = x[2];
2734 break;
2735 case 2:
2736 pixelBit[0] = x[0];
2737 pixelBit[1] = y[0];
2738 pixelBit[2] = x[1];
2739 pixelBit[3] = z[0];
2740 pixelBit[4] = y[1];
2741 pixelBit[5] = z[1];
2742 pixelBit[6] = y[2];
2743 pixelBit[7] = x[2];
2744 break;
2745 case 3:
2746 pixelBit[0] = x[0];
2747 pixelBit[1] = y[0];
2748 pixelBit[2] = z[0];
2749 pixelBit[3] = x[1];
2750 pixelBit[4] = z[1];
2751 pixelBit[5] = y[1];
2752 pixelBit[6] = x[2];
2753 break;
2754 case 4:
2755 pixelBit[0] = x[0];
2756 pixelBit[1] = y[0];
2757 pixelBit[2] = z[0];
2758 pixelBit[3] = z[1];
2759 pixelBit[4] = y[1];
2760 pixelBit[5] = x[1];
2761 break;
2762 default:
2763 ADDR_ASSERT_ALWAYS();
2764 ret = ADDR_INVALIDPARAMS;
2765 break;
2766 }
2767 }
2768 else if (IsStandardSwizzle(rsrcType, swMode))
2769 {
2770 switch (elementBytesLog2)
2771 {
2772 case 0:
2773 pixelBit[0] = x[0];
2774 pixelBit[1] = x[1];
2775 pixelBit[2] = x[2];
2776 pixelBit[3] = x[3];
2777 pixelBit[4] = y[0];
2778 pixelBit[5] = y[1];
2779 pixelBit[6] = z[0];
2780 pixelBit[7] = z[1];
2781 pixelBit[8] = z[2];
2782 pixelBit[9] = y[2];
2783 break;
2784 case 1:
2785 pixelBit[0] = x[0];
2786 pixelBit[1] = x[1];
2787 pixelBit[2] = x[2];
2788 pixelBit[3] = y[0];
2789 pixelBit[4] = y[1];
2790 pixelBit[5] = z[0];
2791 pixelBit[6] = z[1];
2792 pixelBit[7] = z[2];
2793 pixelBit[8] = y[2];
2794 break;
2795 case 2:
2796 pixelBit[0] = x[0];
2797 pixelBit[1] = x[1];
2798 pixelBit[2] = y[0];
2799 pixelBit[3] = y[1];
2800 pixelBit[4] = z[0];
2801 pixelBit[5] = z[1];
2802 pixelBit[6] = y[2];
2803 pixelBit[7] = x[2];
2804 break;
2805 case 3:
2806 pixelBit[0] = x[0];
2807 pixelBit[1] = y[0];
2808 pixelBit[2] = y[1];
2809 pixelBit[3] = z[0];
2810 pixelBit[4] = z[1];
2811 pixelBit[5] = x[1];
2812 pixelBit[6] = x[2];
2813 break;
2814 case 4:
2815 pixelBit[0] = y[0];
2816 pixelBit[1] = y[1];
2817 pixelBit[2] = z[0];
2818 pixelBit[3] = z[1];
2819 pixelBit[4] = x[0];
2820 pixelBit[5] = x[1];
2821 break;
2822 default:
2823 ADDR_ASSERT_ALWAYS();
2824 ret = ADDR_INVALIDPARAMS;
2825 break;
2826 }
2827 }
2828 else
2829 {
2830 ADDR_ASSERT_ALWAYS();
2831 ret = ADDR_INVALIDPARAMS;
2832 }
2833
2834 if (ret == ADDR_OK)
2835 {
2836 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2837 UINT_32 xIdx = Log2(microBlockDim.w);
2838 UINT_32 yIdx = Log2(microBlockDim.h);
2839 UINT_32 zIdx = Log2(microBlockDim.d);
2840
2841 pixelBit = pEquation->addr;
2842
2843 const UINT_32 lowBits = 10;
2844 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2845 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2846
2847 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2848 {
2849 if ((i % 3) == 0)
2850 {
2851 pixelBit[i] = x[xIdx++];
2852 }
2853 else if ((i % 3) == 1)
2854 {
2855 pixelBit[i] = z[zIdx++];
2856 }
2857 else
2858 {
2859 pixelBit[i] = y[yIdx++];
2860 }
2861 }
2862
2863 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2864 {
2865 if ((i % 3) == 0)
2866 {
2867 xorExtra[i - blockSizeLog2] = x[xIdx++];
2868 }
2869 else if ((i % 3) == 1)
2870 {
2871 xorExtra[i - blockSizeLog2] = z[zIdx++];
2872 }
2873 else
2874 {
2875 xorExtra[i - blockSizeLog2] = y[yIdx++];
2876 }
2877 }
2878
2879 if (IsXor(swMode))
2880 {
2881 // Fill XOR bits
2882 UINT_32 pipeStart = m_pipeInterleaveLog2;
2883 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2884 for (UINT_32 i = 0; i < pipeXorBits; i++)
2885 {
2886 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2887 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2888 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2889
2890 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2891
2892 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2893 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2894 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2895
2896 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2897 }
2898
2899 UINT_32 bankStart = pipeStart + pipeXorBits;
2900 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2901 for (UINT_32 i = 0; i < bankXorBits; i++)
2902 {
2903 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2904 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2905 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2906
2907 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2908
2909 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2910 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2911 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2912
2913 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2914 }
2915 }
2916
2917 pEquation->numBits = blockSizeLog2;
2918 }
2919
2920 return ret;
2921 }
2922
2923 /**
2924 ************************************************************************************************************************
2925 * Gfx9Lib::IsValidDisplaySwizzleMode
2926 *
2927 * @brief
2928 * Check if a swizzle mode is supported by display engine
2929 *
2930 * @return
2931 * TRUE is swizzle mode is supported by display engine
2932 ************************************************************************************************************************
2933 */
2934 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2935 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2936 {
2937 BOOL_32 support = FALSE;
2938
2939 if (m_settings.isDce12)
2940 {
2941 switch (pIn->swizzleMode)
2942 {
2943 case ADDR_SW_256B_D:
2944 case ADDR_SW_256B_R:
2945 support = (pIn->bpp == 32);
2946 break;
2947
2948 case ADDR_SW_LINEAR:
2949 case ADDR_SW_4KB_D:
2950 case ADDR_SW_4KB_R:
2951 case ADDR_SW_64KB_D:
2952 case ADDR_SW_64KB_R:
2953 case ADDR_SW_4KB_D_X:
2954 case ADDR_SW_4KB_R_X:
2955 case ADDR_SW_64KB_D_X:
2956 case ADDR_SW_64KB_R_X:
2957 support = (pIn->bpp <= 64);
2958 break;
2959
2960 default:
2961 break;
2962 }
2963 }
2964 else if (m_settings.isDcn1)
2965 {
2966 switch (pIn->swizzleMode)
2967 {
2968 case ADDR_SW_4KB_D:
2969 case ADDR_SW_64KB_D:
2970 case ADDR_SW_64KB_D_T:
2971 case ADDR_SW_4KB_D_X:
2972 case ADDR_SW_64KB_D_X:
2973 support = (pIn->bpp == 64);
2974 break;
2975
2976 case ADDR_SW_LINEAR:
2977 case ADDR_SW_4KB_S:
2978 case ADDR_SW_64KB_S:
2979 case ADDR_SW_64KB_S_T:
2980 case ADDR_SW_4KB_S_X:
2981 case ADDR_SW_64KB_S_X:
2982 support = (pIn->bpp <= 64);
2983 break;
2984
2985 default:
2986 break;
2987 }
2988 }
2989 else
2990 {
2991 ADDR_NOT_IMPLEMENTED();
2992 }
2993
2994 return support;
2995 }
2996
2997 /**
2998 ************************************************************************************************************************
2999 * Gfx9Lib::HwlComputePipeBankXor
3000 *
3001 * @brief
3002 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3003 *
3004 * @return
3005 * PipeBankXor value
3006 ************************************************************************************************************************
3007 */
3008 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3009 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3010 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
3011 {
3012 if (IsXor(pIn->swizzleMode))
3013 {
3014 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3015 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3016 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3017
3018 UINT_32 pipeXor = 0;
3019 UINT_32 bankXor = 0;
3020
3021 const UINT_32 bankMask = (1 << bankBits) - 1;
3022 const UINT_32 index = pIn->surfIndex & bankMask;
3023
3024 const UINT_32 bpp = pIn->flags.fmask ?
3025 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3026 if (bankBits == 4)
3027 {
3028 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3029 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3030
3031 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3032 }
3033 else if (bankBits > 0)
3034 {
3035 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3036 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3037 bankXor = (index * bankIncrease) & bankMask;
3038 }
3039
3040 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3041 }
3042 else
3043 {
3044 pOut->pipeBankXor = 0;
3045 }
3046
3047 return ADDR_OK;
3048 }
3049
3050 /**
3051 ************************************************************************************************************************
3052 * Gfx9Lib::HwlComputeSlicePipeBankXor
3053 *
3054 * @brief
3055 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3056 *
3057 * @return
3058 * PipeBankXor value
3059 ************************************************************************************************************************
3060 */
3061 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3062 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3063 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3064 {
3065 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3066 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3067 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3068
3069 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3070 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3071
3072 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3073
3074 return ADDR_OK;
3075 }
3076
3077 /**
3078 ************************************************************************************************************************
3079 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3080 *
3081 * @brief
3082 * Compute sub resource offset to support swizzle pattern
3083 *
3084 * @return
3085 * Offset
3086 ************************************************************************************************************************
3087 */
3088 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3089 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3090 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3091 {
3092 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3093
3094 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3095 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3096 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3097 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3098 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3099 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3100
3101 pOut->offset = pIn->slice * pIn->sliceSize +
3102 pIn->macroBlockOffset +
3103 (pIn->mipTailOffset ^ pipeBankXor) -
3104 static_cast<UINT_64>(pipeBankXor);
3105 return ADDR_OK;
3106 }
3107
3108 /**
3109 ************************************************************************************************************************
3110 * Gfx9Lib::ValidateNonSwModeParams
3111 *
3112 * @brief
3113 * Validate compute surface info params except swizzle mode
3114 *
3115 * @return
3116 * TRUE if parameters are valid, FALSE otherwise
3117 ************************************************************************************************************************
3118 */
3119 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3120 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3121 {
3122 BOOL_32 valid = TRUE;
3123
3124 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3125 {
3126 ADDR_ASSERT_ALWAYS();
3127 valid = FALSE;
3128 }
3129
3130 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3131 {
3132 ADDR_ASSERT_ALWAYS();
3133 valid = FALSE;
3134 }
3135
3136 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3137 const BOOL_32 msaa = (pIn->numFrags > 1);
3138 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3139
3140 const AddrResourceType rsrcType = pIn->resourceType;
3141 const BOOL_32 tex3d = IsTex3d(rsrcType);
3142 const BOOL_32 tex2d = IsTex2d(rsrcType);
3143 const BOOL_32 tex1d = IsTex1d(rsrcType);
3144
3145 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3146 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3147 const BOOL_32 display = flags.display || flags.rotated;
3148 const BOOL_32 stereo = flags.qbStereo;
3149 const BOOL_32 fmask = flags.fmask;
3150
3151 // Resource type check
3152 if (tex1d)
3153 {
3154 if (msaa || zbuffer || display || stereo || isBc || fmask)
3155 {
3156 ADDR_ASSERT_ALWAYS();
3157 valid = FALSE;
3158 }
3159 }
3160 else if (tex2d)
3161 {
3162 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3163 {
3164 ADDR_ASSERT_ALWAYS();
3165 valid = FALSE;
3166 }
3167 }
3168 else if (tex3d)
3169 {
3170 if (msaa || zbuffer || display || stereo || fmask)
3171 {
3172 ADDR_ASSERT_ALWAYS();
3173 valid = FALSE;
3174 }
3175 }
3176 else
3177 {
3178 ADDR_ASSERT_ALWAYS();
3179 valid = FALSE;
3180 }
3181
3182 return valid;
3183 }
3184
3185 /**
3186 ************************************************************************************************************************
3187 * Gfx9Lib::ValidateSwModeParams
3188 *
3189 * @brief
3190 * Validate compute surface info related to swizzle mode
3191 *
3192 * @return
3193 * TRUE if parameters are valid, FALSE otherwise
3194 ************************************************************************************************************************
3195 */
3196 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3197 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3198 {
3199 BOOL_32 valid = TRUE;
3200
3201 if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3202 {
3203 ADDR_ASSERT_ALWAYS();
3204 valid = FALSE;
3205 }
3206
3207 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3208 const BOOL_32 msaa = (pIn->numFrags > 1);
3209 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3210 const BOOL_32 is422 = ElemLib::IsMacroPixelPacked(pIn->format);
3211
3212 const AddrResourceType rsrcType = pIn->resourceType;
3213 const BOOL_32 tex3d = IsTex3d(rsrcType);
3214 const BOOL_32 tex2d = IsTex2d(rsrcType);
3215 const BOOL_32 tex1d = IsTex1d(rsrcType);
3216
3217 const AddrSwizzleMode swizzle = pIn->swizzleMode;
3218 const BOOL_32 linear = IsLinear(swizzle);
3219 const BOOL_32 blk256B = IsBlock256b(swizzle);
3220 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3221
3222 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3223 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3224 const BOOL_32 color = flags.color;
3225 const BOOL_32 texture = flags.texture;
3226 const BOOL_32 display = flags.display || flags.rotated;
3227 const BOOL_32 prt = flags.prt;
3228 const BOOL_32 fmask = flags.fmask;
3229
3230 const BOOL_32 thin3d = tex3d && flags.view3dAs2dArray;
3231 const BOOL_32 zMaxMip = tex3d && mipmap &&
3232 (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3233
3234 // Misc check
3235 if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3236 {
3237 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3238 ADDR_ASSERT_ALWAYS();
3239 valid = FALSE;
3240 }
3241
3242 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3243 {
3244 ADDR_ASSERT_ALWAYS();
3245 valid = FALSE;
3246 }
3247
3248 if ((pIn->bpp == 96) && (linear == FALSE))
3249 {
3250 ADDR_ASSERT_ALWAYS();
3251 valid = FALSE;
3252 }
3253
3254 if (prt && isNonPrtXor)
3255 {
3256 ADDR_ASSERT_ALWAYS();
3257 valid = FALSE;
3258 }
3259
3260 // Resource type check
3261 if (tex1d)
3262 {
3263 if (linear == FALSE)
3264 {
3265 ADDR_ASSERT_ALWAYS();
3266 valid = FALSE;
3267 }
3268 }
3269
3270 // Swizzle type check
3271 if (linear)
3272 {
3273 if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3274 ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3275 {
3276 ADDR_ASSERT_ALWAYS();
3277 valid = FALSE;
3278 }
3279 }
3280 else if (IsZOrderSwizzle(swizzle))
3281 {
3282 if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3283 {
3284 ADDR_ASSERT_ALWAYS();
3285 valid = FALSE;
3286 }
3287 }
3288 else if (IsStandardSwizzle(swizzle))
3289 {
3290 if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3291 {
3292 ADDR_ASSERT_ALWAYS();
3293 valid = FALSE;
3294 }
3295 }
3296 else if (IsDisplaySwizzle(swizzle))
3297 {
3298 if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3299 {
3300 ADDR_ASSERT_ALWAYS();
3301 valid = FALSE;
3302 }
3303 }
3304 else if (IsRotateSwizzle(swizzle))
3305 {
3306 if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3307 {
3308 ADDR_ASSERT_ALWAYS();
3309 valid = FALSE;
3310 }
3311 }
3312 else
3313 {
3314 ADDR_ASSERT_ALWAYS();
3315 valid = FALSE;
3316 }
3317
3318 // Block type check
3319 if (blk256B)
3320 {
3321 if (prt || zbuffer || tex3d || mipmap || msaa)
3322 {
3323 ADDR_ASSERT_ALWAYS();
3324 valid = FALSE;
3325 }
3326 }
3327
3328 return valid;
3329 }
3330
3331 /**
3332 ************************************************************************************************************************
3333 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3334 *
3335 * @brief
3336 * Compute surface info sanity check
3337 *
3338 * @return
3339 * ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3340 ************************************************************************************************************************
3341 */
3342 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3343 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3344 {
3345 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3346 }
3347
3348 /**
3349 ************************************************************************************************************************
3350 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3351 *
3352 * @brief
3353 * Internal function to get suggested surface information for cliet to use
3354 *
3355 * @return
3356 * ADDR_E_RETURNCODE
3357 ************************************************************************************************************************
3358 */
3359 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3360 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3361 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3362 {
3363 ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3364 ElemLib* pElemLib = GetElemLib();
3365
3366 UINT_32 bpp = pIn->bpp;
3367 UINT_32 width = Max(pIn->width, 1u);
3368 UINT_32 height = Max(pIn->height, 1u);
3369 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3370 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3371
3372 if (pIn->flags.fmask)
3373 {
3374 bpp = GetFmaskBpp(numSamples, numFrags);
3375 numFrags = 1;
3376 numSamples = 1;
3377 pOut->resourceType = ADDR_RSRC_TEX_2D;
3378 }
3379 else
3380 {
3381 // Set format to INVALID will skip this conversion
3382 if (pIn->format != ADDR_FMT_INVALID)
3383 {
3384 UINT_32 expandX, expandY;
3385
3386 // Don't care for this case
3387 ElemMode elemMode = ADDR_UNCOMPRESSED;
3388
3389 // Get compression/expansion factors and element mode which indicates compression/expansion
3390 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3391 &elemMode,
3392 &expandX,
3393 &expandY);
3394
3395 UINT_32 basePitch = 0;
3396 GetElemLib()->AdjustSurfaceInfo(elemMode,
3397 expandX,
3398 expandY,
3399 &bpp,
3400 &basePitch,
3401 &width,
3402 &height);
3403 }
3404
3405 // The output may get changed for volume(3D) texture resource in future
3406 pOut->resourceType = pIn->resourceType;
3407 }
3408
3409 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3410 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3411 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
3412 const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated;
3413
3414 // Pre sanity check on non swizzle mode parameters
3415 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3416 localIn.flags = pIn->flags;
3417 localIn.resourceType = pOut->resourceType;
3418 localIn.format = pIn->format;
3419 localIn.bpp = bpp;
3420 localIn.width = width;
3421 localIn.height = height;
3422 localIn.numSlices = numSlices;
3423 localIn.numMipLevels = numMipLevels;
3424 localIn.numSamples = numSamples;
3425 localIn.numFrags = numFrags;
3426
3427 if (ValidateNonSwModeParams(&localIn))
3428 {
3429 // Forbid swizzle mode(s) by client setting
3430 ADDR2_SWMODE_SET allowedSwModeSet = {};
3431 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3432 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
3433 allowedSwModeSet.value |=
3434 pIn->forbiddenBlock.macroThin4KB ? 0 :
3435 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3436 allowedSwModeSet.value |=
3437 pIn->forbiddenBlock.macroThick4KB ? 0 :
3438 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3439 allowedSwModeSet.value |=
3440 pIn->forbiddenBlock.macroThin64KB ? 0 :
3441 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3442 allowedSwModeSet.value |=
3443 pIn->forbiddenBlock.macroThick64KB ? 0 :
3444 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3445
3446 if (pIn->preferredSwSet.value != 0)
3447 {
3448 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3449 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3450 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3451 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3452 }
3453
3454 if (pIn->noXor)
3455 {
3456 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3457 }
3458
3459 if (pIn->maxAlign > 0)
3460 {
3461 if (pIn->maxAlign < Size64K)
3462 {
3463 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3464 }
3465
3466 if (pIn->maxAlign < Size4K)
3467 {
3468 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3469 }
3470
3471 if (pIn->maxAlign < Size256)
3472 {
3473 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3474 }
3475 }
3476
3477 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3478 switch (pOut->resourceType)
3479 {
3480 case ADDR_RSRC_TEX_1D:
3481 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3482 break;
3483
3484 case ADDR_RSRC_TEX_2D:
3485 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3486
3487 if (bpp > 64)
3488 {
3489 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3490 }
3491 break;
3492
3493 case ADDR_RSRC_TEX_3D:
3494 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3495
3496 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3497 {
3498 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3499 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3500 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3501 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3502 }
3503
3504 if ((bpp == 128) && pIn->flags.color)
3505 {
3506 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3507 }
3508
3509 if (pIn->flags.view3dAs2dArray)
3510 {
3511 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3512 }
3513 break;
3514
3515 default:
3516 ADDR_ASSERT_ALWAYS();
3517 allowedSwModeSet.value = 0;
3518 break;
3519 }
3520
3521 if (pIn->format == ADDR_FMT_32_32_32)
3522 {
3523 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3524 }
3525
3526 if (ElemLib::IsBlockCompressed(pIn->format))
3527 {
3528 if (pIn->flags.texture)
3529 {
3530 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3531 }
3532 else
3533 {
3534 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3535 }
3536 }
3537
3538 if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3539 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3540 {
3541 allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3542 }
3543
3544 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3545 {
3546 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3547
3548 if (pIn->flags.noMetadata == FALSE)
3549 {
3550 if (pIn->flags.depth &&
3551 pIn->flags.texture &&
3552 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3553 {
3554 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3555 // equation from wrong address within memory range a tile covered and use the
3556 // garbage data for compressed Z reading which finally leads to corruption.
3557 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3558 }
3559
3560 if (m_settings.htileCacheRbConflict &&
3561 (pIn->flags.depth || pIn->flags.stencil) &&
3562 (numSlices > 1) &&
3563 (pIn->flags.metaRbUnaligned == FALSE) &&
3564 (pIn->flags.metaPipeUnaligned == FALSE))
3565 {
3566 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3567 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3568 }
3569 }
3570 }
3571
3572 if (msaa)
3573 {
3574 allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3575 }
3576
3577 if ((numFrags > 1) &&
3578 (Size4K < (m_pipeInterleaveBytes * numFrags)))
3579 {
3580 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3581 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3582 }
3583
3584 if (numMipLevels > 1)
3585 {
3586 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3587 }
3588
3589 if (displayRsrc)
3590 {
3591 if (m_settings.isDce12)
3592 {
3593 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3594 }
3595 else if (m_settings.isDcn1)
3596 {
3597 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3598 }
3599 else
3600 {
3601 ADDR_NOT_IMPLEMENTED();
3602 }
3603 }
3604
3605 if (allowedSwModeSet.value != 0)
3606 {
3607 #if DEBUG
3608 // Post sanity check, at least AddrLib should accept the output generated by its own
3609 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3610
3611 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3612 {
3613 if (validateSwModeSet & 1)
3614 {
3615 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3616 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3617 }
3618
3619 validateSwModeSet >>= 1;
3620 }
3621 #endif
3622
3623 pOut->validSwModeSet = allowedSwModeSet;
3624 pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3625 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3626 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3627
3628 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3629
3630 if (pOut->clientPreferredSwSet.value == 0)
3631 {
3632 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3633 }
3634
3635 // Apply optional restrictions
3636 if (pIn->flags.needEquation)
3637 {
3638 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3639 }
3640
3641 if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3642 {
3643 pOut->swizzleMode = ADDR_SW_LINEAR;
3644 }
3645 else
3646 {
3647 // Always ignore linear swizzle mode if there is other choice.
3648 allowedSwModeSet.swLinear = 0;
3649
3650 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3651
3652 // Determine block size if there is 2 or more block type candidates
3653 if (IsPow2(allowedBlockSet.value) == FALSE)
3654 {
3655 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR };
3656
3657 swMode[AddrBlockMicro] = ADDR_SW_256B_D;
3658 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D;
3659 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3660
3661 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3662 {
3663 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
3664 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3665 }
3666
3667 Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
3668 Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
3669 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3670
3671 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3672 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3673 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3674 UINT_32 minSizeBlk = AddrBlockMicro;
3675 UINT_64 minSize = 0;
3676
3677 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3678 {
3679 if (allowedBlockSet.value & (1 << i))
3680 {
3681 ComputeBlockDimensionForSurf(&blkDim[i].w,
3682 &blkDim[i].h,
3683 &blkDim[i].d,
3684 bpp,
3685 numFrags,
3686 pOut->resourceType,
3687 swMode[i]);
3688
3689 if (displayRsrc)
3690 {
3691 blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3692 }
3693
3694 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3695 padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
3696
3697 if ((minSize == 0) ||
3698 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3699 {
3700 minSize = padSize[i];
3701 minSizeBlk = i;
3702 }
3703 }
3704 }
3705
3706 if ((allowedBlockSet.micro == TRUE) &&
3707 (width <= blkDim[AddrBlockMicro].w) &&
3708 (height <= blkDim[AddrBlockMicro].h) &&
3709 (NextPow2(pIn->minSizeAlign) <= Size256))
3710 {
3711 minSizeBlk = AddrBlockMicro;
3712 }
3713
3714 if (minSizeBlk == AddrBlockMicro)
3715 {
3716 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3717 allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3718 }
3719 else if (minSizeBlk == AddrBlockThick4KB)
3720 {
3721 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3722 allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3723 }
3724 else if (minSizeBlk == AddrBlockThin4KB)
3725 {
3726 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3727 Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3728 }
3729 else if (minSizeBlk == AddrBlockThick64KB)
3730 {
3731 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3732 allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3733 }
3734 else
3735 {
3736 ADDR_ASSERT(minSizeBlk == AddrBlockThin64KB);
3737 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3738 Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3739 }
3740 }
3741
3742 // Block type should be determined.
3743 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3744
3745 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3746
3747 // Determine swizzle type if there is 2 or more swizzle type candidates
3748 if (IsPow2(allowedSwSet.value) == FALSE)
3749 {
3750 if (ElemLib::IsBlockCompressed(pIn->format))
3751 {
3752 if (allowedSwSet.sw_D)
3753 {
3754 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3755 }
3756 else
3757 {
3758 ADDR_ASSERT(allowedSwSet.sw_S);
3759 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3760 }
3761 }
3762 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3763 {
3764 if (allowedSwSet.sw_S)
3765 {
3766 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3767 }
3768 else if (allowedSwSet.sw_D)
3769 {
3770 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3771 }
3772 else
3773 {
3774 ADDR_ASSERT(allowedSwSet.sw_R);
3775 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3776 }
3777 }
3778 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3779 {
3780 if (pIn->flags.color && allowedSwSet.sw_D)
3781 {
3782 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3783 }
3784 else if (allowedSwSet.sw_Z)
3785 {
3786 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3787 }
3788 else
3789 {
3790 ADDR_ASSERT(allowedSwSet.sw_S);
3791 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3792 }
3793 }
3794 else
3795 {
3796 if (pIn->flags.rotated && allowedSwSet.sw_R)
3797 {
3798 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3799 }
3800 else if (allowedSwSet.sw_D)
3801 {
3802 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3803 }
3804 else if (allowedSwSet.sw_S)
3805 {
3806 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3807 }
3808 else
3809 {
3810 ADDR_ASSERT(allowedSwSet.sw_Z);
3811 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3812 }
3813 }
3814 }
3815
3816 // Swizzle type should be determined.
3817 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3818
3819 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3820 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3821 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3822 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3823 }
3824
3825 returnCode = ADDR_OK;
3826 }
3827 else
3828 {
3829 // Invalid combination...
3830 ADDR_ASSERT_ALWAYS();
3831 }
3832 }
3833 else
3834 {
3835 // Invalid combination...
3836 ADDR_ASSERT_ALWAYS();
3837 }
3838
3839 return returnCode;
3840 }
3841
3842 /**
3843 ************************************************************************************************************************
3844 * Gfx9Lib::ComputeStereoInfo
3845 *
3846 * @brief
3847 * Compute height alignment and right eye pipeBankXor for stereo surface
3848 *
3849 * @return
3850 * Error code
3851 *
3852 ************************************************************************************************************************
3853 */
3854 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3855 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3856 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
3857 UINT_32* pHeightAlign
3858 ) const
3859 {
3860 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3861
3862 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3863
3864 if (eqIndex < m_numEquations)
3865 {
3866 if (IsXor(pIn->swizzleMode))
3867 {
3868 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3869 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
3870 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
3871 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
3872 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3873 const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3874
3875 ADDR_ASSERT(maxYCoordBlock256 ==
3876 GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
3877
3878 const UINT_32 maxYCoordInBaseEquation =
3879 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
3880
3881 ADDR_ASSERT(maxYCoordInBaseEquation ==
3882 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3883
3884 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3885
3886 ADDR_ASSERT(maxYCoordInPipeXor ==
3887 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3888
3889 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3890 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3891
3892 ADDR_ASSERT(maxYCoordInBankXor ==
3893 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3894
3895 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3896
3897 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3898 {
3899 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3900
3901 if (pOut->pStereoInfo != NULL)
3902 {
3903 pOut->pStereoInfo->rightSwizzle = 0;
3904
3905 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3906 {
3907 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3908 {
3909 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3910 }
3911
3912 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3913 {
3914 pOut->pStereoInfo->rightSwizzle |=
3915 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3916 }
3917
3918 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3919 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3920 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3921 }
3922 }
3923 }
3924 }
3925 }
3926 else
3927 {
3928 ADDR_ASSERT_ALWAYS();
3929 returnCode = ADDR_ERROR;
3930 }
3931
3932 return returnCode;
3933 }
3934
3935 /**
3936 ************************************************************************************************************************
3937 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3938 *
3939 * @brief
3940 * Internal function to calculate alignment for tiled surface
3941 *
3942 * @return
3943 * ADDR_E_RETURNCODE
3944 ************************************************************************************************************************
3945 */
3946 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3947 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3948 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3949 ) const
3950 {
3951 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3952 &pOut->blockHeight,
3953 &pOut->blockSlices,
3954 pIn->bpp,
3955 pIn->numFrags,
3956 pIn->resourceType,
3957 pIn->swizzleMode);
3958
3959 if (returnCode == ADDR_OK)
3960 {
3961 UINT_32 pitchAlignInElement = pOut->blockWidth;
3962
3963 if ((IsTex2d(pIn->resourceType) == TRUE) &&
3964 (pIn->flags.display || pIn->flags.rotated) &&
3965 (pIn->numMipLevels <= 1) &&
3966 (pIn->numSamples <= 1) &&
3967 (pIn->numFrags <= 1))
3968 {
3969 // Display engine needs pitch align to be at least 32 pixels.
3970 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3971 }
3972
3973 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3974
3975 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3976 {
3977 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3978 {
3979 returnCode = ADDR_INVALIDPARAMS;
3980 }
3981 else if (pIn->pitchInElement < pOut->pitch)
3982 {
3983 returnCode = ADDR_INVALIDPARAMS;
3984 }
3985 else
3986 {
3987 pOut->pitch = pIn->pitchInElement;
3988 }
3989 }
3990
3991 UINT_32 heightAlign = 0;
3992
3993 if (pIn->flags.qbStereo)
3994 {
3995 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3996 }
3997
3998 if (returnCode == ADDR_OK)
3999 {
4000 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
4001
4002 if (heightAlign > 1)
4003 {
4004 pOut->height = PowTwoAlign(pOut->height, heightAlign);
4005 }
4006
4007 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
4008
4009 pOut->epitchIsHeight = FALSE;
4010 pOut->mipChainInTail = FALSE;
4011 pOut->firstMipIdInTail = pIn->numMipLevels;
4012
4013 pOut->mipChainPitch = pOut->pitch;
4014 pOut->mipChainHeight = pOut->height;
4015 pOut->mipChainSlice = pOut->numSlices;
4016
4017 if (pIn->numMipLevels > 1)
4018 {
4019 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4020 pIn->swizzleMode,
4021 pIn->bpp,
4022 pIn->width,
4023 pIn->height,
4024 pIn->numSlices,
4025 pOut->blockWidth,
4026 pOut->blockHeight,
4027 pOut->blockSlices,
4028 pIn->numMipLevels,
4029 pOut->pMipInfo);
4030
4031 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4032
4033 if (endingMipId == 0)
4034 {
4035 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4036 pIn->swizzleMode,
4037 pOut->blockWidth,
4038 pOut->blockHeight,
4039 pOut->blockSlices);
4040
4041 pOut->epitchIsHeight = TRUE;
4042 pOut->pitch = tailMaxDim.w;
4043 pOut->height = tailMaxDim.h;
4044 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4045 tailMaxDim.d : pIn->numSlices;
4046 pOut->mipChainInTail = TRUE;
4047 }
4048 else
4049 {
4050 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
4051 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4052
4053 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4054 pIn->swizzleMode,
4055 mip0WidthInBlk,
4056 mip0HeightInBlk,
4057 pOut->numSlices / pOut->blockSlices);
4058 if (majorMode == ADDR_MAJOR_Y)
4059 {
4060 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4061
4062 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4063 {
4064 mip1WidthInBlk++;
4065 }
4066
4067 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4068
4069 pOut->epitchIsHeight = FALSE;
4070 }
4071 else
4072 {
4073 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4074
4075 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4076 {
4077 mip1HeightInBlk++;
4078 }
4079
4080 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4081
4082 pOut->epitchIsHeight = TRUE;
4083 }
4084 }
4085
4086 if (pOut->pMipInfo != NULL)
4087 {
4088 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4089
4090 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4091 {
4092 Dim3d mipStartPos = {0};
4093 UINT_32 mipTailOffsetInBytes = 0;
4094
4095 mipStartPos = GetMipStartPos(pIn->resourceType,
4096 pIn->swizzleMode,
4097 pOut->pitch,
4098 pOut->height,
4099 pOut->numSlices,
4100 pOut->blockWidth,
4101 pOut->blockHeight,
4102 pOut->blockSlices,
4103 i,
4104 elementBytesLog2,
4105 &mipTailOffsetInBytes);
4106
4107 UINT_32 pitchInBlock =
4108 pOut->mipChainPitch / pOut->blockWidth;
4109 UINT_32 sliceInBlock =
4110 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4111 UINT_64 blockIndex =
4112 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4113 UINT_64 macroBlockOffset =
4114 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4115
4116 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4117 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
4118 }
4119 }
4120 }
4121 else if (pOut->pMipInfo != NULL)
4122 {
4123 pOut->pMipInfo[0].pitch = pOut->pitch;
4124 pOut->pMipInfo[0].height = pOut->height;
4125 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4126 pOut->pMipInfo[0].offset = 0;
4127 }
4128
4129 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4130 (pIn->bpp >> 3) * pIn->numFrags;
4131 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
4132 pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4133
4134 if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4135 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4136 (pIn->flags.texture == TRUE) &&
4137 (pIn->flags.noMetadata == FALSE) &&
4138 (pIn->flags.metaPipeUnaligned == FALSE))
4139 {
4140 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4141 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4142 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4143 // them, which may cause invalid metadata to be fetched.
4144 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4145 }
4146
4147 if (pIn->flags.prt)
4148 {
4149 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4150 }
4151 }
4152 }
4153
4154 return returnCode;
4155 }
4156
4157 /**
4158 ************************************************************************************************************************
4159 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4160 *
4161 * @brief
4162 * Internal function to calculate alignment for linear surface
4163 *
4164 * @return
4165 * ADDR_E_RETURNCODE
4166 ************************************************************************************************************************
4167 */
4168 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4169 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4170 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4171 ) const
4172 {
4173 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4174 UINT_32 pitch = 0;
4175 UINT_32 actualHeight = 0;
4176 UINT_32 elementBytes = pIn->bpp >> 3;
4177 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4178
4179 if (IsTex1d(pIn->resourceType))
4180 {
4181 if (pIn->height > 1)
4182 {
4183 returnCode = ADDR_INVALIDPARAMS;
4184 }
4185 else
4186 {
4187 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4188
4189 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4190 actualHeight = pIn->numMipLevels;
4191
4192 if (pIn->flags.prt == FALSE)
4193 {
4194 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4195 &pitch, &actualHeight);
4196 }
4197
4198 if (returnCode == ADDR_OK)
4199 {
4200 if (pOut->pMipInfo != NULL)
4201 {
4202 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4203 {
4204 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4205 pOut->pMipInfo[i].pitch = pitch;
4206 pOut->pMipInfo[i].height = 1;
4207 pOut->pMipInfo[i].depth = 1;
4208 }
4209 }
4210 }
4211 }
4212 }
4213 else
4214 {
4215 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4216 }
4217
4218 if ((pitch == 0) || (actualHeight == 0))
4219 {
4220 returnCode = ADDR_INVALIDPARAMS;
4221 }
4222
4223 if (returnCode == ADDR_OK)
4224 {
4225 pOut->pitch = pitch;
4226 pOut->height = pIn->height;
4227 pOut->numSlices = pIn->numSlices;
4228 pOut->mipChainPitch = pitch;
4229 pOut->mipChainHeight = actualHeight;
4230 pOut->mipChainSlice = pOut->numSlices;
4231 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4232 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4233 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4234 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4235 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4236 pOut->blockHeight = 1;
4237 pOut->blockSlices = 1;
4238 }
4239
4240 // Post calculation validate
4241 ADDR_ASSERT(pOut->sliceSize > 0);
4242
4243 return returnCode;
4244 }
4245
4246 /**
4247 ************************************************************************************************************************
4248 * Gfx9Lib::GetMipChainInfo
4249 *
4250 * @brief
4251 * Internal function to get out information about mip chain
4252 *
4253 * @return
4254 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4255 ************************************************************************************************************************
4256 */
4257 UINT_32 Gfx9Lib::GetMipChainInfo(
4258 AddrResourceType resourceType,
4259 AddrSwizzleMode swizzleMode,
4260 UINT_32 bpp,
4261 UINT_32 mip0Width,
4262 UINT_32 mip0Height,
4263 UINT_32 mip0Depth,
4264 UINT_32 blockWidth,
4265 UINT_32 blockHeight,
4266 UINT_32 blockDepth,
4267 UINT_32 numMipLevel,
4268 ADDR2_MIP_INFO* pMipInfo) const
4269 {
4270 const Dim3d tailMaxDim =
4271 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4272
4273 UINT_32 mipPitch = mip0Width;
4274 UINT_32 mipHeight = mip0Height;
4275 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4276 UINT_32 offset = 0;
4277 UINT_32 firstMipIdInTail = numMipLevel;
4278 BOOL_32 inTail = FALSE;
4279 BOOL_32 finalDim = FALSE;
4280 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4281 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4282
4283 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4284 {
4285 if (inTail)
4286 {
4287 if (finalDim == FALSE)
4288 {
4289 UINT_32 mipSize;
4290
4291 if (is3dThick)
4292 {
4293 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4294 }
4295 else
4296 {
4297 mipSize = mipPitch * mipHeight * (bpp >> 3);
4298 }
4299
4300 if (mipSize <= 256)
4301 {
4302 UINT_32 index = Log2(bpp >> 3);
4303
4304 if (is3dThick)
4305 {
4306 mipPitch = Block256_3dZ[index].w;
4307 mipHeight = Block256_3dZ[index].h;
4308 mipDepth = Block256_3dZ[index].d;
4309 }
4310 else
4311 {
4312 mipPitch = Block256_2d[index].w;
4313 mipHeight = Block256_2d[index].h;
4314 }
4315
4316 finalDim = TRUE;
4317 }
4318 }
4319 }
4320 else
4321 {
4322 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4323 mipPitch, mipHeight, mipDepth);
4324
4325 if (inTail)
4326 {
4327 firstMipIdInTail = mipId;
4328 mipPitch = tailMaxDim.w;
4329 mipHeight = tailMaxDim.h;
4330
4331 if (is3dThick)
4332 {
4333 mipDepth = tailMaxDim.d;
4334 }
4335 }
4336 else
4337 {
4338 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4339 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4340
4341 if (is3dThick)
4342 {
4343 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4344 }
4345 }
4346 }
4347
4348 if (pMipInfo != NULL)
4349 {
4350 pMipInfo[mipId].pitch = mipPitch;
4351 pMipInfo[mipId].height = mipHeight;
4352 pMipInfo[mipId].depth = mipDepth;
4353 pMipInfo[mipId].offset = offset;
4354 }
4355
4356 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4357
4358 if (finalDim)
4359 {
4360 if (is3dThin)
4361 {
4362 mipDepth = Max(mipDepth >> 1, 1u);
4363 }
4364 }
4365 else
4366 {
4367 mipPitch = Max(mipPitch >> 1, 1u);
4368 mipHeight = Max(mipHeight >> 1, 1u);
4369
4370 if (is3dThick || is3dThin)
4371 {
4372 mipDepth = Max(mipDepth >> 1, 1u);
4373 }
4374 }
4375 }
4376
4377 return firstMipIdInTail;
4378 }
4379
4380 /**
4381 ************************************************************************************************************************
4382 * Gfx9Lib::GetMetaMiptailInfo
4383 *
4384 * @brief
4385 * Get mip tail coordinate information.
4386 *
4387 * @return
4388 * N/A
4389 ************************************************************************************************************************
4390 */
4391 VOID Gfx9Lib::GetMetaMiptailInfo(
4392 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4393 Dim3d mipCoord, ///< [in] mip tail base coord
4394 UINT_32 numMipInTail, ///< [in] number of mips in tail
4395 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4396 ) const
4397 {
4398 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4399 UINT_32 mipWidth = pMetaBlkDim->w;
4400 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4401 UINT_32 mipDepth = pMetaBlkDim->d;
4402 UINT_32 minInc;
4403
4404 if (isThick)
4405 {
4406 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4407 }
4408 else if (pMetaBlkDim->h >= 1024)
4409 {
4410 minInc = 256;
4411 }
4412 else if (pMetaBlkDim->h == 512)
4413 {
4414 minInc = 128;
4415 }
4416 else
4417 {
4418 minInc = 64;
4419 }
4420
4421 UINT_32 blk32MipId = 0xFFFFFFFF;
4422
4423 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4424 {
4425 pInfo[mip].inMiptail = TRUE;
4426 pInfo[mip].startX = mipCoord.w;
4427 pInfo[mip].startY = mipCoord.h;
4428 pInfo[mip].startZ = mipCoord.d;
4429 pInfo[mip].width = mipWidth;
4430 pInfo[mip].height = mipHeight;
4431 pInfo[mip].depth = mipDepth;
4432
4433 if (mipWidth <= 32)
4434 {
4435 if (blk32MipId == 0xFFFFFFFF)
4436 {
4437 blk32MipId = mip;
4438 }
4439
4440 mipCoord.w = pInfo[blk32MipId].startX;
4441 mipCoord.h = pInfo[blk32MipId].startY;
4442 mipCoord.d = pInfo[blk32MipId].startZ;
4443
4444 switch (mip - blk32MipId)
4445 {
4446 case 0:
4447 mipCoord.w += 32; // 16x16
4448 break;
4449 case 1:
4450 mipCoord.h += 32; // 8x8
4451 break;
4452 case 2:
4453 mipCoord.h += 32; // 4x4
4454 mipCoord.w += 16;
4455 break;
4456 case 3:
4457 mipCoord.h += 32; // 2x2
4458 mipCoord.w += 32;
4459 break;
4460 case 4:
4461 mipCoord.h += 32; // 1x1
4462 mipCoord.w += 48;
4463 break;
4464 // The following are for BC/ASTC formats
4465 case 5:
4466 mipCoord.h += 48; // 1/2 x 1/2
4467 break;
4468 case 6:
4469 mipCoord.h += 48; // 1/4 x 1/4
4470 mipCoord.w += 16;
4471 break;
4472 case 7:
4473 mipCoord.h += 48; // 1/8 x 1/8
4474 mipCoord.w += 32;
4475 break;
4476 case 8:
4477 mipCoord.h += 48; // 1/16 x 1/16
4478 mipCoord.w += 48;
4479 break;
4480 default:
4481 ADDR_ASSERT_ALWAYS();
4482 break;
4483 }
4484
4485 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4486 mipHeight = mipWidth;
4487
4488 if (isThick)
4489 {
4490 mipDepth = mipWidth;
4491 }
4492 }
4493 else
4494 {
4495 if (mipWidth <= minInc)
4496 {
4497 // if we're below the minimal increment...
4498 if (isThick)
4499 {
4500 // For 3d, just go in z direction
4501 mipCoord.d += mipDepth;
4502 }
4503 else
4504 {
4505 // For 2d, first go across, then down
4506 if ((mipWidth * 2) == minInc)
4507 {
4508 // if we're 2 mips below, that's when we go back in x, and down in y
4509 mipCoord.w -= minInc;
4510 mipCoord.h += minInc;
4511 }
4512 else
4513 {
4514 // otherwise, just go across in x
4515 mipCoord.w += minInc;
4516 }
4517 }
4518 }
4519 else
4520 {
4521 // On even mip, go down, otherwise, go across
4522 if (mip & 1)
4523 {
4524 mipCoord.w += mipWidth;
4525 }
4526 else
4527 {
4528 mipCoord.h += mipHeight;
4529 }
4530 }
4531 // Divide the width by 2
4532 mipWidth >>= 1;
4533 // After the first mip in tail, the mip is always a square
4534 mipHeight = mipWidth;
4535 // ...or for 3d, a cube
4536 if (isThick)
4537 {
4538 mipDepth = mipWidth;
4539 }
4540 }
4541 }
4542 }
4543
4544 /**
4545 ************************************************************************************************************************
4546 * Gfx9Lib::GetMipStartPos
4547 *
4548 * @brief
4549 * Internal function to get out information about mip logical start position
4550 *
4551 * @return
4552 * logical start position in macro block width/heith/depth of one mip level within one slice
4553 ************************************************************************************************************************
4554 */
4555 Dim3d Gfx9Lib::GetMipStartPos(
4556 AddrResourceType resourceType,
4557 AddrSwizzleMode swizzleMode,
4558 UINT_32 width,
4559 UINT_32 height,
4560 UINT_32 depth,
4561 UINT_32 blockWidth,
4562 UINT_32 blockHeight,
4563 UINT_32 blockDepth,
4564 UINT_32 mipId,
4565 UINT_32 log2ElementBytes,
4566 UINT_32* pMipTailBytesOffset) const
4567 {
4568 Dim3d mipStartPos = {0};
4569 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4570
4571 // Report mip in tail if Mip0 is already in mip tail
4572 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4573 UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
4574 UINT_32 mipIndexInTail = mipId;
4575
4576 if (inMipTail == FALSE)
4577 {
4578 // Mip 0 dimension, unit in block
4579 UINT_32 mipWidthInBlk = width / blockWidth;
4580 UINT_32 mipHeightInBlk = height / blockHeight;
4581 UINT_32 mipDepthInBlk = depth / blockDepth;
4582 AddrMajorMode majorMode = GetMajorMode(resourceType,
4583 swizzleMode,
4584 mipWidthInBlk,
4585 mipHeightInBlk,
4586 mipDepthInBlk);
4587
4588 UINT_32 endingMip = mipId + 1;
4589
4590 for (UINT_32 i = 1; i <= mipId; i++)
4591 {
4592 if ((i == 1) || (i == 3))
4593 {
4594 if (majorMode == ADDR_MAJOR_Y)
4595 {
4596 mipStartPos.w += mipWidthInBlk;
4597 }
4598 else
4599 {
4600 mipStartPos.h += mipHeightInBlk;
4601 }
4602 }
4603 else
4604 {
4605 if (majorMode == ADDR_MAJOR_X)
4606 {
4607 mipStartPos.w += mipWidthInBlk;
4608 }
4609 else if (majorMode == ADDR_MAJOR_Y)
4610 {
4611 mipStartPos.h += mipHeightInBlk;
4612 }
4613 else
4614 {
4615 mipStartPos.d += mipDepthInBlk;
4616 }
4617 }
4618
4619 BOOL_32 inTail = FALSE;
4620
4621 if (IsThick(resourceType, swizzleMode))
4622 {
4623 UINT_32 dim = log2BlkSize % 3;
4624
4625 if (dim == 0)
4626 {
4627 inTail =
4628 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4629 }
4630 else if (dim == 1)
4631 {
4632 inTail =
4633 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4634 }
4635 else
4636 {
4637 inTail =
4638 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4639 }
4640 }
4641 else
4642 {
4643 if (log2BlkSize & 1)
4644 {
4645 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4646 }
4647 else
4648 {
4649 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4650 }
4651 }
4652
4653 if (inTail)
4654 {
4655 endingMip = i;
4656 break;
4657 }
4658
4659 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4660 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4661 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4662 }
4663
4664 if (mipId >= endingMip)
4665 {
4666 inMipTail = TRUE;
4667 mipIndexInTail = mipId - endingMip;
4668 }
4669 }
4670
4671 if (inMipTail)
4672 {
4673 UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4674 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4675 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4676 }
4677
4678 return mipStartPos;
4679 }
4680
4681 /**
4682 ************************************************************************************************************************
4683 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4684 *
4685 * @brief
4686 * Internal function to calculate address from coord for tiled swizzle surface
4687 *
4688 * @return
4689 * ADDR_E_RETURNCODE
4690 ************************************************************************************************************************
4691 */
4692 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4693 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4694 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4695 ) const
4696 {
4697 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4698 localIn.swizzleMode = pIn->swizzleMode;
4699 localIn.flags = pIn->flags;
4700 localIn.resourceType = pIn->resourceType;
4701 localIn.bpp = pIn->bpp;
4702 localIn.width = Max(pIn->unalignedWidth, 1u);
4703 localIn.height = Max(pIn->unalignedHeight, 1u);
4704 localIn.numSlices = Max(pIn->numSlices, 1u);
4705 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4706 localIn.numSamples = Max(pIn->numSamples, 1u);
4707 localIn.numFrags = Max(pIn->numFrags, 1u);
4708 if (localIn.numMipLevels <= 1)
4709 {
4710 localIn.pitchInElement = pIn->pitchInElement;
4711 }
4712
4713 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4714 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4715
4716 BOOL_32 valid = (returnCode == ADDR_OK) &&
4717 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4718 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4719 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4720
4721 if (valid)
4722 {
4723 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4724 Dim3d mipStartPos = {0};
4725 UINT_32 mipTailBytesOffset = 0;
4726
4727 if (pIn->numMipLevels > 1)
4728 {
4729 // Mip-map chain cannot be MSAA surface
4730 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4731
4732 mipStartPos = GetMipStartPos(pIn->resourceType,
4733 pIn->swizzleMode,
4734 localOut.pitch,
4735 localOut.height,
4736 localOut.numSlices,
4737 localOut.blockWidth,
4738 localOut.blockHeight,
4739 localOut.blockSlices,
4740 pIn->mipId,
4741 log2ElementBytes,
4742 &mipTailBytesOffset);
4743 }
4744
4745 UINT_32 interleaveOffset = 0;
4746 UINT_32 pipeBits = 0;
4747 UINT_32 pipeXor = 0;
4748 UINT_32 bankBits = 0;
4749 UINT_32 bankXor = 0;
4750
4751 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4752 {
4753 UINT_32 blockOffset = 0;
4754 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4755
4756 if (IsZOrderSwizzle(pIn->swizzleMode))
4757 {
4758 // Morton generation
4759 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4760 {
4761 UINT_32 totalLowBits = 6 - log2ElementBytes;
4762 UINT_32 mortBits = totalLowBits / 2;
4763 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4764 // Are 9 bits enough?
4765 UINT_32 highBitsValue =
4766 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4767 blockOffset = lowBitsValue | highBitsValue;
4768 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4769 }
4770 else
4771 {
4772 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4773 }
4774
4775 // Fill LSBs with sample bits
4776 if (pIn->numSamples > 1)
4777 {
4778 blockOffset *= pIn->numSamples;
4779 blockOffset |= pIn->sample;
4780 }
4781
4782 // Shift according to BytesPP
4783 blockOffset <<= log2ElementBytes;
4784 }
4785 else
4786 {
4787 // Micro block offset
4788 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4789 blockOffset = microBlockOffset;
4790
4791 // Micro block dimension
4792 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4793 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4794 // Morton generation, does 12 bit enough?
4795 blockOffset |=
4796 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4797
4798 // Sample bits start location
4799 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4800 // Join sample bits information to the highest Macro block bits
4801 if (IsNonPrtXor(pIn->swizzleMode))
4802 {
4803 // Non-prt-Xor : xor highest Macro block bits with sample bits
4804 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4805 }
4806 else
4807 {
4808 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4809 // after this op, the blockOffset only contains log2 Macro block size bits
4810 blockOffset %= (1 << sampleStart);
4811 blockOffset |= (pIn->sample << sampleStart);
4812 ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4813 }
4814 }
4815
4816 if (IsXor(pIn->swizzleMode))
4817 {
4818 // Mask off bits above Macro block bits to keep page synonyms working for prt
4819 if (IsPrt(pIn->swizzleMode))
4820 {
4821 blockOffset &= ((1 << log2BlkSize) - 1);
4822 }
4823
4824 // Preserve offset inside pipe interleave
4825 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4826 blockOffset >>= m_pipeInterleaveLog2;
4827
4828 // Pipe/Se xor bits
4829 pipeBits = GetPipeXorBits(log2BlkSize);
4830 // Pipe xor
4831 pipeXor = FoldXor2d(blockOffset, pipeBits);
4832 blockOffset >>= pipeBits;
4833
4834 // Bank xor bits
4835 bankBits = GetBankXorBits(log2BlkSize);
4836 // Bank Xor
4837 bankXor = FoldXor2d(blockOffset, bankBits);
4838 blockOffset >>= bankBits;
4839
4840 // Put all the part back together
4841 blockOffset <<= bankBits;
4842 blockOffset |= bankXor;
4843 blockOffset <<= pipeBits;
4844 blockOffset |= pipeXor;
4845 blockOffset <<= m_pipeInterleaveLog2;
4846 blockOffset |= interleaveOffset;
4847 }
4848
4849 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4850 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4851
4852 blockOffset |= mipTailBytesOffset;
4853
4854 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4855 {
4856 // Apply slice xor if not MSAA/PRT
4857 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4858 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4859 (m_pipeInterleaveLog2 + pipeBits));
4860 }
4861
4862 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4863 bankBits, pipeBits, &blockOffset);
4864
4865 blockOffset %= (1 << log2BlkSize);
4866
4867 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4868 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4869 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4870 UINT_64 macroBlockIndex =
4871 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4872 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4873 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4874
4875 pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
4876 }
4877 else
4878 {
4879 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4880
4881 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4882
4883 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4884 (pIn->y / microBlockDim.h),
4885 (pIn->slice / microBlockDim.d),
4886 8);
4887
4888 blockOffset <<= 10;
4889 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4890
4891 if (IsXor(pIn->swizzleMode))
4892 {
4893 // Mask off bits above Macro block bits to keep page synonyms working for prt
4894 if (IsPrt(pIn->swizzleMode))
4895 {
4896 blockOffset &= ((1 << log2BlkSize) - 1);
4897 }
4898
4899 // Preserve offset inside pipe interleave
4900 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4901 blockOffset >>= m_pipeInterleaveLog2;
4902
4903 // Pipe/Se xor bits
4904 pipeBits = GetPipeXorBits(log2BlkSize);
4905 // Pipe xor
4906 pipeXor = FoldXor3d(blockOffset, pipeBits);
4907 blockOffset >>= pipeBits;
4908
4909 // Bank xor bits
4910 bankBits = GetBankXorBits(log2BlkSize);
4911 // Bank Xor
4912 bankXor = FoldXor3d(blockOffset, bankBits);
4913 blockOffset >>= bankBits;
4914
4915 // Put all the part back together
4916 blockOffset <<= bankBits;
4917 blockOffset |= bankXor;
4918 blockOffset <<= pipeBits;
4919 blockOffset |= pipeXor;
4920 blockOffset <<= m_pipeInterleaveLog2;
4921 blockOffset |= interleaveOffset;
4922 }
4923
4924 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4925 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4926 blockOffset |= mipTailBytesOffset;
4927
4928 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4929 bankBits, pipeBits, &blockOffset);
4930
4931 blockOffset %= (1 << log2BlkSize);
4932
4933 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
4934 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4935 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4936
4937 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4938 UINT_32 sliceSizeInBlock =
4939 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4940 UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4941
4942 pOut->addr = blockOffset | (blockIndex << log2BlkSize);
4943 }
4944 }
4945 else
4946 {
4947 returnCode = ADDR_INVALIDPARAMS;
4948 }
4949
4950 return returnCode;
4951 }
4952
4953 /**
4954 ************************************************************************************************************************
4955 * Gfx9Lib::ComputeSurfaceInfoLinear
4956 *
4957 * @brief
4958 * Internal function to calculate padding for linear swizzle 2D/3D surface
4959 *
4960 * @return
4961 * N/A
4962 ************************************************************************************************************************
4963 */
4964 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4965 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
4966 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
4967 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
4968 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
4969 ) const
4970 {
4971 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4972
4973 UINT_32 elementBytes = pIn->bpp >> 3;
4974 UINT_32 pitchAlignInElement = 0;
4975
4976 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4977 {
4978 ADDR_ASSERT(pIn->numMipLevels <= 1);
4979 ADDR_ASSERT(pIn->numSlices <= 1);
4980 pitchAlignInElement = 1;
4981 }
4982 else
4983 {
4984 pitchAlignInElement = (256 / elementBytes);
4985 }
4986
4987 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
4988 UINT_32 slice0PaddedHeight = pIn->height;
4989
4990 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4991 &mipChainWidth, &slice0PaddedHeight);
4992
4993 if (returnCode == ADDR_OK)
4994 {
4995 UINT_32 mipChainHeight = 0;
4996 UINT_32 mipHeight = pIn->height;
4997 UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4998
4999 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
5000 {
5001 if (pMipInfo != NULL)
5002 {
5003 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
5004 pMipInfo[i].pitch = mipChainWidth;
5005 pMipInfo[i].height = mipHeight;
5006 pMipInfo[i].depth = mipDepth;
5007 }
5008
5009 mipChainHeight += mipHeight;
5010 mipHeight = RoundHalf(mipHeight);
5011 mipHeight = Max(mipHeight, 1u);
5012 }
5013
5014 *pMipmap0PaddedWidth = mipChainWidth;
5015 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
5016 }
5017
5018 return returnCode;
5019 }
5020
5021 /**
5022 ************************************************************************************************************************
5023 * Gfx9Lib::ComputeThinBlockDimension
5024 *
5025 * @brief
5026 * Internal function to get thin block width/height/depth in element from surface input params.
5027 *
5028 * @return
5029 * N/A
5030 ************************************************************************************************************************
5031 */
5032 VOID Gfx9Lib::ComputeThinBlockDimension(
5033 UINT_32* pWidth,
5034 UINT_32* pHeight,
5035 UINT_32* pDepth,
5036 UINT_32 bpp,
5037 UINT_32 numSamples,
5038 AddrResourceType resourceType,
5039 AddrSwizzleMode swizzleMode) const
5040 {
5041 ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5042
5043 const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
5044 const UINT_32 eleBytes = bpp >> 3;
5045 const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5046 const UINT_32 log2blkSizeIn256B = log2BlkSize - 8;
5047 const UINT_32 widthAmp = log2blkSizeIn256B / 2;
5048 const UINT_32 heightAmp = log2blkSizeIn256B - widthAmp;
5049
5050 ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5051
5052 *pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5053 *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5054 *pDepth = 1;
5055
5056 if (numSamples > 1)
5057 {
5058 const UINT_32 log2sample = Log2(numSamples);
5059 const UINT_32 q = log2sample >> 1;
5060 const UINT_32 r = log2sample & 1;
5061
5062 if (log2BlkSize & 1)
5063 {
5064 *pWidth >>= q;
5065 *pHeight >>= (q + r);
5066 }
5067 else
5068 {
5069 *pWidth >>= (q + r);
5070 *pHeight >>= q;
5071 }
5072 }
5073 }
5074
5075 } // V2
5076 } // Addr