ece83592fc98ea4d60809a6f5e39f72b2f3100f2
[mesa.git] / src / amd / addrlib / src / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 #include "util/macros.h"
41
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
44
45 namespace Addr
46 {
47
48 /**
49 ************************************************************************************************************************
50 * Gfx9HwlInit
51 *
52 * @brief
53 * Creates an Gfx9Lib object.
54 *
55 * @return
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
58 */
59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
60 {
61 return V2::Gfx9Lib::CreateObj(pClient);
62 }
63
64 namespace V2
65 {
66
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
70
71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_R
77
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_R
82
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_R
87
88 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
91 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
92
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0}, // ADDR_SW_64KB_R_T
97
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_4KB_R_x
102
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_64KB_R_X
107
108 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
109 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
110 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
111 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
113 };
114
115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
116
117 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
118
119 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
120
121 /**
122 ************************************************************************************************************************
123 * Gfx9Lib::Gfx9Lib
124 *
125 * @brief
126 * Constructor
127 *
128 ************************************************************************************************************************
129 */
130 Gfx9Lib::Gfx9Lib(const Client* pClient)
131 :
132 Lib(pClient)
133 {
134 m_class = AI_ADDRLIB;
135 memset(&m_settings, 0, sizeof(m_settings));
136 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
137 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
138 m_metaEqOverrideIndex = 0;
139 }
140
141 /**
142 ************************************************************************************************************************
143 * Gfx9Lib::~Gfx9Lib
144 *
145 * @brief
146 * Destructor
147 ************************************************************************************************************************
148 */
149 Gfx9Lib::~Gfx9Lib()
150 {
151 }
152
153 /**
154 ************************************************************************************************************************
155 * Gfx9Lib::HwlComputeHtileInfo
156 *
157 * @brief
158 * Interface function stub of AddrComputeHtilenfo
159 *
160 * @return
161 * ADDR_E_RETURNCODE
162 ************************************************************************************************************************
163 */
164 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
165 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
166 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
167 ) const
168 {
169 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
170 pIn->swizzleMode);
171
172 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
173
174 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
175
176 if ((numPipeTotal == 1) && (numRbTotal == 1))
177 {
178 numCompressBlkPerMetaBlkLog2 = 10;
179 }
180 else
181 {
182 if (m_settings.applyAliasFix)
183 {
184 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
185 }
186 else
187 {
188 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
189 }
190 }
191
192 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
193
194 Dim3d metaBlkDim = {8, 8, 1};
195 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
196 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
197 UINT_32 heightAmp = totalAmpBits - widthAmp;
198 metaBlkDim.w <<= widthAmp;
199 metaBlkDim.h <<= heightAmp;
200
201 #if DEBUG
202 Dim3d metaBlkDimDbg = {8, 8, 1};
203 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
204 {
205 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
206 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
207 {
208 metaBlkDimDbg.h <<= 1;
209 }
210 else
211 {
212 metaBlkDimDbg.w <<= 1;
213 }
214 }
215 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
216 #endif
217
218 UINT_32 numMetaBlkX;
219 UINT_32 numMetaBlkY;
220 UINT_32 numMetaBlkZ;
221
222 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
223 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
224 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
225
226 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
227 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
228
229 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
230 {
231 align *= (numPipeTotal >> 1);
232 }
233
234 align = Max(align, metaBlkSize);
235
236 if (m_settings.metaBaseAlignFix)
237 {
238 align = Max(align, GetBlockSize(pIn->swizzleMode));
239 }
240
241 if (m_settings.htileAlignFix)
242 {
243 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
244 const INT_32 htileCachelineSizeLog2 = 11;
245 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
246
247 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
248
249 align <<= rbMaskPadding;
250 }
251
252 pOut->pitch = numMetaBlkX * metaBlkDim.w;
253 pOut->height = numMetaBlkY * metaBlkDim.h;
254 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
255
256 pOut->metaBlkWidth = metaBlkDim.w;
257 pOut->metaBlkHeight = metaBlkDim.h;
258 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
259
260 pOut->baseAlign = align;
261 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
262
263 return ADDR_OK;
264 }
265
266 /**
267 ************************************************************************************************************************
268 * Gfx9Lib::HwlComputeCmaskInfo
269 *
270 * @brief
271 * Interface function stub of AddrComputeCmaskInfo
272 *
273 * @return
274 * ADDR_E_RETURNCODE
275 ************************************************************************************************************************
276 */
277 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
278 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
279 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
280 ) const
281 {
282 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
283
284 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
285 pIn->swizzleMode);
286
287 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
288
289 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
290
291 if ((numPipeTotal == 1) && (numRbTotal == 1))
292 {
293 numCompressBlkPerMetaBlkLog2 = 13;
294 }
295 else
296 {
297 if (m_settings.applyAliasFix)
298 {
299 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
300 }
301 else
302 {
303 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
304 }
305
306 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
307 }
308
309 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
310
311 Dim2d metaBlkDim = {8, 8};
312 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
313 UINT_32 heightAmp = totalAmpBits >> 1;
314 UINT_32 widthAmp = totalAmpBits - heightAmp;
315 metaBlkDim.w <<= widthAmp;
316 metaBlkDim.h <<= heightAmp;
317
318 #if DEBUG
319 Dim2d metaBlkDimDbg = {8, 8};
320 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
321 {
322 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
323 {
324 metaBlkDimDbg.h <<= 1;
325 }
326 else
327 {
328 metaBlkDimDbg.w <<= 1;
329 }
330 }
331 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
332 #endif
333
334 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
335 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
336 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
337
338 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
339
340 if (m_settings.metaBaseAlignFix)
341 {
342 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
343 }
344
345 pOut->pitch = numMetaBlkX * metaBlkDim.w;
346 pOut->height = numMetaBlkY * metaBlkDim.h;
347 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
348 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
349 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
350
351 pOut->metaBlkWidth = metaBlkDim.w;
352 pOut->metaBlkHeight = metaBlkDim.h;
353
354 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
355
356 return ADDR_OK;
357 }
358
359 /**
360 ************************************************************************************************************************
361 * Gfx9Lib::GetMetaMipInfo
362 *
363 * @brief
364 * Get meta mip info
365 *
366 * @return
367 * N/A
368 ************************************************************************************************************************
369 */
370 VOID Gfx9Lib::GetMetaMipInfo(
371 UINT_32 numMipLevels, ///< [in] number of mip levels
372 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
373 BOOL_32 dataThick, ///< [in] data surface is thick
374 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
375 UINT_32 mip0Width, ///< [in] mip0 width
376 UINT_32 mip0Height, ///< [in] mip0 height
377 UINT_32 mip0Depth, ///< [in] mip0 depth
378 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
379 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
380 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
381 const
382 {
383 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
384 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
385 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
386 UINT_32 tailWidth = pMetaBlkDim->w;
387 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
388 UINT_32 tailDepth = pMetaBlkDim->d;
389 BOOL_32 inTail = FALSE;
390 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
391
392 if (numMipLevels > 1)
393 {
394 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
395 {
396 // Z major
397 major = ADDR_MAJOR_Z;
398 }
399 else if (numMetaBlkX >= numMetaBlkY)
400 {
401 // X major
402 major = ADDR_MAJOR_X;
403 }
404 else
405 {
406 // Y major
407 major = ADDR_MAJOR_Y;
408 }
409
410 inTail = ((mip0Width <= tailWidth) &&
411 (mip0Height <= tailHeight) &&
412 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
413
414 if (inTail == FALSE)
415 {
416 UINT_32 orderLimit;
417 UINT_32 *pMipDim;
418 UINT_32 *pOrderDim;
419
420 if (major == ADDR_MAJOR_Z)
421 {
422 // Z major
423 pMipDim = &numMetaBlkY;
424 pOrderDim = &numMetaBlkZ;
425 orderLimit = 4;
426 }
427 else if (major == ADDR_MAJOR_X)
428 {
429 // X major
430 pMipDim = &numMetaBlkY;
431 pOrderDim = &numMetaBlkX;
432 orderLimit = 4;
433 }
434 else
435 {
436 // Y major
437 pMipDim = &numMetaBlkX;
438 pOrderDim = &numMetaBlkY;
439 orderLimit = 2;
440 }
441
442 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
443 {
444 *pMipDim += 2;
445 }
446 else
447 {
448 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
449 }
450 }
451 }
452
453 if (pInfo != NULL)
454 {
455 UINT_32 mipWidth = mip0Width;
456 UINT_32 mipHeight = mip0Height;
457 UINT_32 mipDepth = mip0Depth;
458 Dim3d mipCoord = {0};
459
460 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
461 {
462 if (inTail)
463 {
464 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
465 pMetaBlkDim);
466 break;
467 }
468 else
469 {
470 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
471 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
472 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
473
474 pInfo[mip].inMiptail = FALSE;
475 pInfo[mip].startX = mipCoord.w;
476 pInfo[mip].startY = mipCoord.h;
477 pInfo[mip].startZ = mipCoord.d;
478 pInfo[mip].width = mipWidth;
479 pInfo[mip].height = mipHeight;
480 pInfo[mip].depth = dataThick ? mipDepth : 1;
481
482 if ((mip >= 3) || (mip & 1))
483 {
484 switch (major)
485 {
486 case ADDR_MAJOR_X:
487 mipCoord.w += mipWidth;
488 break;
489 case ADDR_MAJOR_Y:
490 mipCoord.h += mipHeight;
491 break;
492 case ADDR_MAJOR_Z:
493 mipCoord.d += mipDepth;
494 break;
495 default:
496 break;
497 }
498 }
499 else
500 {
501 switch (major)
502 {
503 case ADDR_MAJOR_X:
504 mipCoord.h += mipHeight;
505 break;
506 case ADDR_MAJOR_Y:
507 mipCoord.w += mipWidth;
508 break;
509 case ADDR_MAJOR_Z:
510 mipCoord.h += mipHeight;
511 break;
512 default:
513 break;
514 }
515 }
516
517 mipWidth = Max(mipWidth >> 1, 1u);
518 mipHeight = Max(mipHeight >> 1, 1u);
519 mipDepth = Max(mipDepth >> 1, 1u);
520
521 inTail = ((mipWidth <= tailWidth) &&
522 (mipHeight <= tailHeight) &&
523 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
524 }
525 }
526 }
527
528 *pNumMetaBlkX = numMetaBlkX;
529 *pNumMetaBlkY = numMetaBlkY;
530 *pNumMetaBlkZ = numMetaBlkZ;
531 }
532
533 /**
534 ************************************************************************************************************************
535 * Gfx9Lib::HwlComputeDccInfo
536 *
537 * @brief
538 * Interface function to compute DCC key info
539 *
540 * @return
541 * ADDR_E_RETURNCODE
542 ************************************************************************************************************************
543 */
544 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
545 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
546 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
547 ) const
548 {
549 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
550 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
551 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
552
553 if (dataLinear)
554 {
555 metaLinear = TRUE;
556 }
557 else if (metaLinear == TRUE)
558 {
559 pipeAligned = FALSE;
560 }
561
562 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
563
564 if (metaLinear)
565 {
566 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
567 ADDR_ASSERT_ALWAYS();
568
569 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
570 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
571 }
572 else
573 {
574 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
575
576 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
577
578 UINT_32 numFrags = Max(pIn->numFrags, 1u);
579 UINT_32 numSlices = Max(pIn->numSlices, 1u);
580
581 minMetaBlkSize /= numFrags;
582
583 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
584
585 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
586
587 if ((numPipeTotal > 1) || (numRbTotal > 1))
588 {
589 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
590
591 numCompressBlkPerMetaBlk =
592 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
593
594 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
595 {
596 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
597 }
598 }
599
600 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
601 Dim3d metaBlkDim = compressBlkDim;
602
603 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
604 {
605 if ((metaBlkDim.h < metaBlkDim.w) ||
606 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
607 {
608 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
609 {
610 metaBlkDim.h <<= 1;
611 }
612 else
613 {
614 metaBlkDim.d <<= 1;
615 }
616 }
617 else
618 {
619 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
620 {
621 metaBlkDim.w <<= 1;
622 }
623 else
624 {
625 metaBlkDim.d <<= 1;
626 }
627 }
628 }
629
630 UINT_32 numMetaBlkX;
631 UINT_32 numMetaBlkY;
632 UINT_32 numMetaBlkZ;
633
634 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
635 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
636 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
637
638 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
639
640 if (numFrags > m_maxCompFrag)
641 {
642 sizeAlign *= (numFrags / m_maxCompFrag);
643 }
644
645 if (m_settings.metaBaseAlignFix)
646 {
647 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
648 }
649
650 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
651 numCompressBlkPerMetaBlk * numFrags;
652 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
653 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
654
655 pOut->pitch = numMetaBlkX * metaBlkDim.w;
656 pOut->height = numMetaBlkY * metaBlkDim.h;
657 pOut->depth = numMetaBlkZ * metaBlkDim.d;
658
659 pOut->compressBlkWidth = compressBlkDim.w;
660 pOut->compressBlkHeight = compressBlkDim.h;
661 pOut->compressBlkDepth = compressBlkDim.d;
662
663 pOut->metaBlkWidth = metaBlkDim.w;
664 pOut->metaBlkHeight = metaBlkDim.h;
665 pOut->metaBlkDepth = metaBlkDim.d;
666
667 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
668 pOut->fastClearSizePerSlice =
669 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
670 }
671
672 return ADDR_OK;
673 }
674
675 /**
676 ************************************************************************************************************************
677 * Gfx9Lib::HwlComputeMaxBaseAlignments
678 *
679 * @brief
680 * Gets maximum alignments
681 * @return
682 * maximum alignments
683 ************************************************************************************************************************
684 */
685 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
686 {
687 return Size64K;
688 }
689
690 /**
691 ************************************************************************************************************************
692 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
693 *
694 * @brief
695 * Gets maximum alignments for metadata
696 * @return
697 * maximum alignments for metadata
698 ************************************************************************************************************************
699 */
700 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
701 {
702 // Max base alignment for Htile
703 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
704 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
705
706 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
707 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
708 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
709 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
710
711 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
712
713 if (maxNumPipeTotal > 2)
714 {
715 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
716 }
717
718 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
719
720 if (m_settings.metaBaseAlignFix)
721 {
722 maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
723 }
724
725 if (m_settings.htileAlignFix)
726 {
727 maxBaseAlignHtile *= maxNumPipeTotal;
728 }
729
730 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
731
732 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
733 UINT_32 maxBaseAlignDcc3D = 65536;
734
735 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
736 {
737 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
738 }
739
740 // Max base alignment for Msaa Dcc
741 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
742
743 if (m_settings.metaBaseAlignFix)
744 {
745 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
746 }
747
748 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
749 }
750
751 /**
752 ************************************************************************************************************************
753 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
754 *
755 * @brief
756 * Interface function stub of AddrComputeCmaskAddrFromCoord
757 *
758 * @return
759 * ADDR_E_RETURNCODE
760 ************************************************************************************************************************
761 */
762 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
763 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
764 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
765 {
766 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
767 input.size = sizeof(input);
768 input.cMaskFlags = pIn->cMaskFlags;
769 input.colorFlags = pIn->colorFlags;
770 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
771 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
772 input.numSlices = Max(pIn->numSlices, 1u);
773 input.swizzleMode = pIn->swizzleMode;
774 input.resourceType = pIn->resourceType;
775
776 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
777 output.size = sizeof(output);
778
779 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
780
781 if (returnCode == ADDR_OK)
782 {
783 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
784 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
785 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
786 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
787
788 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
789 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
790 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
791
792 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
793
794 UINT_32 xb = pIn->x / output.metaBlkWidth;
795 UINT_32 yb = pIn->y / output.metaBlkHeight;
796 UINT_32 zb = pIn->slice;
797
798 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
799 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
800 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
801
802 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
803 UINT_64 address = pMetaEq->solve(coords);
804
805 pOut->addr = address >> 1;
806 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
807
808 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
809 pIn->swizzleMode);
810
811 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
812
813 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
814 }
815
816 return returnCode;
817 }
818
819 /**
820 ************************************************************************************************************************
821 * Gfx9Lib::HwlComputeHtileAddrFromCoord
822 *
823 * @brief
824 * Interface function stub of AddrComputeHtileAddrFromCoord
825 *
826 * @return
827 * ADDR_E_RETURNCODE
828 ************************************************************************************************************************
829 */
830 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
831 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
832 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
833 {
834 ADDR_E_RETURNCODE returnCode = ADDR_OK;
835
836 if (pIn->numMipLevels > 1)
837 {
838 returnCode = ADDR_NOTIMPLEMENTED;
839 }
840 else
841 {
842 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
843 input.size = sizeof(input);
844 input.hTileFlags = pIn->hTileFlags;
845 input.depthFlags = pIn->depthflags;
846 input.swizzleMode = pIn->swizzleMode;
847 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
848 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
849 input.numSlices = Max(pIn->numSlices, 1u);
850 input.numMipLevels = Max(pIn->numMipLevels, 1u);
851
852 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
853 output.size = sizeof(output);
854
855 returnCode = ComputeHtileInfo(&input, &output);
856
857 if (returnCode == ADDR_OK)
858 {
859 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
860 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
861 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
862 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
863
864 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
865 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
866 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
867
868 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
869
870 UINT_32 xb = pIn->x / output.metaBlkWidth;
871 UINT_32 yb = pIn->y / output.metaBlkHeight;
872 UINT_32 zb = pIn->slice;
873
874 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
875 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
876 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
877
878 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
879 UINT_64 address = pMetaEq->solve(coords);
880
881 pOut->addr = address >> 1;
882
883 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
884 pIn->swizzleMode);
885
886 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
887
888 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
889 }
890 }
891
892 return returnCode;
893 }
894
895 /**
896 ************************************************************************************************************************
897 * Gfx9Lib::HwlComputeHtileCoordFromAddr
898 *
899 * @brief
900 * Interface function stub of AddrComputeHtileCoordFromAddr
901 *
902 * @return
903 * ADDR_E_RETURNCODE
904 ************************************************************************************************************************
905 */
906 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
907 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
908 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
909 {
910 ADDR_E_RETURNCODE returnCode = ADDR_OK;
911
912 if (pIn->numMipLevels > 1)
913 {
914 returnCode = ADDR_NOTIMPLEMENTED;
915 }
916 else
917 {
918 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
919 input.size = sizeof(input);
920 input.hTileFlags = pIn->hTileFlags;
921 input.swizzleMode = pIn->swizzleMode;
922 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
923 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
924 input.numSlices = Max(pIn->numSlices, 1u);
925 input.numMipLevels = Max(pIn->numMipLevels, 1u);
926
927 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
928 output.size = sizeof(output);
929
930 returnCode = ComputeHtileInfo(&input, &output);
931
932 if (returnCode == ADDR_OK)
933 {
934 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
935 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
936 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
937 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
938
939 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
940 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
941 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
942
943 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
944
945 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
946 pIn->swizzleMode);
947
948 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
949
950 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
951
952 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
953 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
954
955 UINT_32 coords[NUM_DIMS];
956 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
957
958 pOut->slice = coords[DIM_M] / sliceSizeInBlock;
959 pOut->y = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
960 pOut->x = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
961 }
962 }
963
964 return returnCode;
965 }
966
967 /**
968 ************************************************************************************************************************
969 * Gfx9Lib::HwlComputeDccAddrFromCoord
970 *
971 * @brief
972 * Interface function stub of AddrComputeDccAddrFromCoord
973 *
974 * @return
975 * ADDR_E_RETURNCODE
976 ************************************************************************************************************************
977 */
978 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
979 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
980 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
981 {
982 ADDR_E_RETURNCODE returnCode = ADDR_OK;
983
984 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
985 {
986 returnCode = ADDR_NOTIMPLEMENTED;
987 }
988 else
989 {
990 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
991 input.size = sizeof(input);
992 input.dccKeyFlags = pIn->dccKeyFlags;
993 input.colorFlags = pIn->colorFlags;
994 input.swizzleMode = pIn->swizzleMode;
995 input.resourceType = pIn->resourceType;
996 input.bpp = pIn->bpp;
997 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
998 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
999 input.numSlices = Max(pIn->numSlices, 1u);
1000 input.numFrags = Max(pIn->numFrags, 1u);
1001 input.numMipLevels = Max(pIn->numMipLevels, 1u);
1002
1003 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
1004 output.size = sizeof(output);
1005
1006 returnCode = ComputeDccInfo(&input, &output);
1007
1008 if (returnCode == ADDR_OK)
1009 {
1010 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1011 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
1012 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
1013 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1014 UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
1015 UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
1016 UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
1017 UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
1018
1019 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1020 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1021 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1022 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1023
1024 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1025
1026 UINT_32 xb = pIn->x / output.metaBlkWidth;
1027 UINT_32 yb = pIn->y / output.metaBlkHeight;
1028 UINT_32 zb = pIn->slice / output.metaBlkDepth;
1029
1030 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
1031 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1032 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1033
1034 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
1035 UINT_64 address = pMetaEq->solve(coords);
1036
1037 pOut->addr = address >> 1;
1038
1039 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1040 pIn->swizzleMode);
1041
1042 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1043
1044 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1045 }
1046 }
1047
1048 return returnCode;
1049 }
1050
1051 /**
1052 ************************************************************************************************************************
1053 * Gfx9Lib::HwlInitGlobalParams
1054 *
1055 * @brief
1056 * Initializes global parameters
1057 *
1058 * @return
1059 * TRUE if all settings are valid
1060 *
1061 ************************************************************************************************************************
1062 */
1063 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1064 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1065 {
1066 BOOL_32 valid = TRUE;
1067
1068 if (m_settings.isArcticIsland)
1069 {
1070 GB_ADDR_CONFIG gbAddrConfig;
1071
1072 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1073
1074 // These values are copied from CModel code
1075 switch (gbAddrConfig.bits.NUM_PIPES)
1076 {
1077 case ADDR_CONFIG_1_PIPE:
1078 m_pipes = 1;
1079 m_pipesLog2 = 0;
1080 break;
1081 case ADDR_CONFIG_2_PIPE:
1082 m_pipes = 2;
1083 m_pipesLog2 = 1;
1084 break;
1085 case ADDR_CONFIG_4_PIPE:
1086 m_pipes = 4;
1087 m_pipesLog2 = 2;
1088 break;
1089 case ADDR_CONFIG_8_PIPE:
1090 m_pipes = 8;
1091 m_pipesLog2 = 3;
1092 break;
1093 case ADDR_CONFIG_16_PIPE:
1094 m_pipes = 16;
1095 m_pipesLog2 = 4;
1096 break;
1097 case ADDR_CONFIG_32_PIPE:
1098 m_pipes = 32;
1099 m_pipesLog2 = 5;
1100 break;
1101 default:
1102 ADDR_ASSERT_ALWAYS();
1103 break;
1104 }
1105
1106 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1107 {
1108 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1109 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1110 m_pipeInterleaveLog2 = 8;
1111 break;
1112 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1113 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1114 m_pipeInterleaveLog2 = 9;
1115 break;
1116 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1117 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1118 m_pipeInterleaveLog2 = 10;
1119 break;
1120 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1121 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1122 m_pipeInterleaveLog2 = 11;
1123 break;
1124 default:
1125 ADDR_ASSERT_ALWAYS();
1126 break;
1127 }
1128
1129 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1130 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1131 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1132
1133 switch (gbAddrConfig.bits.NUM_BANKS)
1134 {
1135 case ADDR_CONFIG_1_BANK:
1136 m_banks = 1;
1137 m_banksLog2 = 0;
1138 break;
1139 case ADDR_CONFIG_2_BANK:
1140 m_banks = 2;
1141 m_banksLog2 = 1;
1142 break;
1143 case ADDR_CONFIG_4_BANK:
1144 m_banks = 4;
1145 m_banksLog2 = 2;
1146 break;
1147 case ADDR_CONFIG_8_BANK:
1148 m_banks = 8;
1149 m_banksLog2 = 3;
1150 break;
1151 case ADDR_CONFIG_16_BANK:
1152 m_banks = 16;
1153 m_banksLog2 = 4;
1154 break;
1155 default:
1156 ADDR_ASSERT_ALWAYS();
1157 break;
1158 }
1159
1160 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1161 {
1162 case ADDR_CONFIG_1_SHADER_ENGINE:
1163 m_se = 1;
1164 m_seLog2 = 0;
1165 break;
1166 case ADDR_CONFIG_2_SHADER_ENGINE:
1167 m_se = 2;
1168 m_seLog2 = 1;
1169 break;
1170 case ADDR_CONFIG_4_SHADER_ENGINE:
1171 m_se = 4;
1172 m_seLog2 = 2;
1173 break;
1174 case ADDR_CONFIG_8_SHADER_ENGINE:
1175 m_se = 8;
1176 m_seLog2 = 3;
1177 break;
1178 default:
1179 ADDR_ASSERT_ALWAYS();
1180 break;
1181 }
1182
1183 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1184 {
1185 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1186 m_rbPerSe = 1;
1187 m_rbPerSeLog2 = 0;
1188 break;
1189 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1190 m_rbPerSe = 2;
1191 m_rbPerSeLog2 = 1;
1192 break;
1193 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1194 m_rbPerSe = 4;
1195 m_rbPerSeLog2 = 2;
1196 break;
1197 default:
1198 ADDR_ASSERT_ALWAYS();
1199 break;
1200 }
1201
1202 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1203 {
1204 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1205 m_maxCompFrag = 1;
1206 m_maxCompFragLog2 = 0;
1207 break;
1208 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1209 m_maxCompFrag = 2;
1210 m_maxCompFragLog2 = 1;
1211 break;
1212 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1213 m_maxCompFrag = 4;
1214 m_maxCompFragLog2 = 2;
1215 break;
1216 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1217 m_maxCompFrag = 8;
1218 m_maxCompFragLog2 = 3;
1219 break;
1220 default:
1221 ADDR_ASSERT_ALWAYS();
1222 break;
1223 }
1224
1225 if ((m_rbPerSeLog2 == 1) &&
1226 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1227 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1228 {
1229 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1230 ADDR_ASSERT(m_settings.isRaven == FALSE);
1231
1232 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1233
1234 if (m_settings.isVega12)
1235 {
1236 m_settings.htileCacheRbConflict = 1;
1237 }
1238 }
1239
1240 // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1241 m_blockVarSizeLog2 = 0;
1242 }
1243 else
1244 {
1245 valid = FALSE;
1246 ADDR_NOT_IMPLEMENTED();
1247 }
1248
1249 if (valid)
1250 {
1251 InitEquationTable();
1252 }
1253
1254 return valid;
1255 }
1256
1257 /**
1258 ************************************************************************************************************************
1259 * Gfx9Lib::HwlConvertChipFamily
1260 *
1261 * @brief
1262 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1263 * @return
1264 * ChipFamily
1265 ************************************************************************************************************************
1266 */
1267 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1268 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1269 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1270 {
1271 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1272
1273 switch (uChipFamily)
1274 {
1275 case FAMILY_AI:
1276 m_settings.isArcticIsland = 1;
1277 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1278 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1279 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1280 m_settings.isDce12 = 1;
1281
1282 if (m_settings.isVega10 == 0)
1283 {
1284 m_settings.htileAlignFix = 1;
1285 m_settings.applyAliasFix = 1;
1286 }
1287
1288 m_settings.metaBaseAlignFix = 1;
1289
1290 m_settings.depthPipeXorDisable = 1;
1291 break;
1292 case FAMILY_RV:
1293 m_settings.isArcticIsland = 1;
1294
1295 if (ASICREV_IS_RAVEN(uChipRevision))
1296 {
1297 m_settings.isRaven = 1;
1298
1299 m_settings.depthPipeXorDisable = 1;
1300 }
1301
1302 if (ASICREV_IS_RAVEN2(uChipRevision))
1303 {
1304 m_settings.isRaven = 1;
1305 }
1306
1307 if (m_settings.isRaven == 0)
1308 {
1309 m_settings.htileAlignFix = 1;
1310 m_settings.applyAliasFix = 1;
1311 }
1312
1313 if (ASICREV_IS_RENOIR(uChipRevision))
1314 {
1315 m_settings.isRaven = 1;
1316 }
1317
1318 m_settings.isDcn1 = m_settings.isRaven;
1319
1320 m_settings.metaBaseAlignFix = 1;
1321 break;
1322
1323 default:
1324 ADDR_ASSERT(!"This should be a Fusion");
1325 break;
1326 }
1327
1328 return family;
1329 }
1330
1331 /**
1332 ************************************************************************************************************************
1333 * Gfx9Lib::InitRbEquation
1334 *
1335 * @brief
1336 * Init RB equation
1337 * @return
1338 * N/A
1339 ************************************************************************************************************************
1340 */
1341 VOID Gfx9Lib::GetRbEquation(
1342 CoordEq* pRbEq, ///< [out] rb equation
1343 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1344 UINT_32 numSeLog2) ///< [in] number of shader engine
1345 const
1346 {
1347 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1348 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1349 Coordinate cx(DIM_X, rbRegion);
1350 Coordinate cy(DIM_Y, rbRegion);
1351
1352 UINT_32 start = 0;
1353 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1354
1355 // Clear the rb equation
1356 pRbEq->resize(0);
1357 pRbEq->resize(numRbTotalLog2);
1358
1359 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1360 {
1361 // Special case when more than 1 SE, and 2 RB per SE
1362 (*pRbEq)[0].add(cx);
1363 (*pRbEq)[0].add(cy);
1364 cx++;
1365 cy++;
1366
1367 if (m_settings.applyAliasFix == false)
1368 {
1369 (*pRbEq)[0].add(cy);
1370 }
1371
1372 (*pRbEq)[0].add(cy);
1373 start++;
1374 }
1375
1376 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1377
1378 for (UINT_32 i = 0; i < numBits; i++)
1379 {
1380 UINT_32 idx =
1381 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1382
1383 if ((i % 2) == 1)
1384 {
1385 (*pRbEq)[idx].add(cx);
1386 cx++;
1387 }
1388 else
1389 {
1390 (*pRbEq)[idx].add(cy);
1391 cy++;
1392 }
1393 }
1394 }
1395
1396 /**
1397 ************************************************************************************************************************
1398 * Gfx9Lib::GetDataEquation
1399 *
1400 * @brief
1401 * Get data equation for fmask and Z
1402 * @return
1403 * N/A
1404 ************************************************************************************************************************
1405 */
1406 VOID Gfx9Lib::GetDataEquation(
1407 CoordEq* pDataEq, ///< [out] data surface equation
1408 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1409 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1410 AddrResourceType resourceType, ///< [in] data surface resource type
1411 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1412 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1413 const
1414 {
1415 Coordinate cx(DIM_X, 0);
1416 Coordinate cy(DIM_Y, 0);
1417 Coordinate cz(DIM_Z, 0);
1418 Coordinate cs(DIM_S, 0);
1419
1420 // Clear the equation
1421 pDataEq->resize(0);
1422 pDataEq->resize(27);
1423
1424 if (dataSurfaceType == Gfx9DataColor)
1425 {
1426 if (IsLinear(swizzleMode))
1427 {
1428 Coordinate cm(DIM_M, 0);
1429
1430 pDataEq->resize(49);
1431
1432 for (UINT_32 i = 0; i < 49; i++)
1433 {
1434 (*pDataEq)[i].add(cm);
1435 cm++;
1436 }
1437 }
1438 else if (IsThick(resourceType, swizzleMode))
1439 {
1440 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1441 UINT_32 i;
1442 if (IsStandardSwizzle(resourceType, swizzleMode))
1443 {
1444 // Standard 3d swizzle
1445 // Fill in bottom x bits
1446 for (i = elementBytesLog2; i < 4; i++)
1447 {
1448 (*pDataEq)[i].add(cx);
1449 cx++;
1450 }
1451 // Fill in 2 bits of y and then z
1452 for (i = 4; i < 6; i++)
1453 {
1454 (*pDataEq)[i].add(cy);
1455 cy++;
1456 }
1457 for (i = 6; i < 8; i++)
1458 {
1459 (*pDataEq)[i].add(cz);
1460 cz++;
1461 }
1462 if (elementBytesLog2 < 2)
1463 {
1464 // fill in z & y bit
1465 (*pDataEq)[8].add(cz);
1466 (*pDataEq)[9].add(cy);
1467 cz++;
1468 cy++;
1469 }
1470 else if (elementBytesLog2 == 2)
1471 {
1472 // fill in y and x bit
1473 (*pDataEq)[8].add(cy);
1474 (*pDataEq)[9].add(cx);
1475 cy++;
1476 cx++;
1477 }
1478 else
1479 {
1480 // fill in 2 x bits
1481 (*pDataEq)[8].add(cx);
1482 cx++;
1483 (*pDataEq)[9].add(cx);
1484 cx++;
1485 }
1486 }
1487 else
1488 {
1489 // Z 3d swizzle
1490 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1491 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1492 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1493 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1494 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1495 {
1496 (*pDataEq)[i].add(cz);
1497 cz++;
1498 }
1499 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1500 {
1501 // add an x and z
1502 (*pDataEq)[6].add(cx);
1503 (*pDataEq)[7].add(cz);
1504 cx++;
1505 cz++;
1506 }
1507 else if (elementBytesLog2 == 2)
1508 {
1509 // add a y and z
1510 (*pDataEq)[6].add(cy);
1511 (*pDataEq)[7].add(cz);
1512 cy++;
1513 cz++;
1514 }
1515 // add y and x
1516 (*pDataEq)[8].add(cy);
1517 (*pDataEq)[9].add(cx);
1518 cy++;
1519 cx++;
1520 }
1521 // Fill in bit 10 and up
1522 pDataEq->mort3d( cz, cy, cx, 10 );
1523 }
1524 else if (IsThin(resourceType, swizzleMode))
1525 {
1526 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1527 // Color 2D
1528 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1529 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1530 UINT_32 i;
1531 // Fill in bottom x bits
1532 for (i = elementBytesLog2; i < 4; i++)
1533 {
1534 (*pDataEq)[i].add(cx);
1535 cx++;
1536 }
1537 // Fill in bottom y bits
1538 for (i = 4; i < 4 + microYBits; i++)
1539 {
1540 (*pDataEq)[i].add(cy);
1541 cy++;
1542 }
1543 // Fill in last of the micro_x bits
1544 for (i = 4 + microYBits; i < 8; i++)
1545 {
1546 (*pDataEq)[i].add(cx);
1547 cx++;
1548 }
1549 // Fill in x/y bits below sample split
1550 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1551 // Fill in sample bits
1552 for (i = 0; i < numSamplesLog2; i++)
1553 {
1554 cs.set(DIM_S, i);
1555 (*pDataEq)[tileSplitStart + i].add(cs);
1556 }
1557 // Fill in x/y bits above sample split
1558 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1559 {
1560 pDataEq->mort2d(cx, cy, blockSizeLog2);
1561 }
1562 else
1563 {
1564 pDataEq->mort2d(cy, cx, blockSizeLog2);
1565 }
1566 }
1567 else
1568 {
1569 ADDR_ASSERT_ALWAYS();
1570 }
1571 }
1572 else
1573 {
1574 // Fmask or depth
1575 UINT_32 sampleStart = elementBytesLog2;
1576 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1577 UINT_32 ymajStart = 6 + numSamplesLog2;
1578
1579 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1580 {
1581 cs.set(DIM_S, s);
1582 (*pDataEq)[sampleStart + s].add(cs);
1583 }
1584
1585 // Put in the x-major order pixel bits
1586 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1587 // Put in the y-major order pixel bits
1588 pDataEq->mort2d(cy, cx, ymajStart);
1589 }
1590 }
1591
1592 /**
1593 ************************************************************************************************************************
1594 * Gfx9Lib::GetPipeEquation
1595 *
1596 * @brief
1597 * Get pipe equation
1598 * @return
1599 * N/A
1600 ************************************************************************************************************************
1601 */
1602 VOID Gfx9Lib::GetPipeEquation(
1603 CoordEq* pPipeEq, ///< [out] pipe equation
1604 CoordEq* pDataEq, ///< [in] data equation
1605 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1606 UINT_32 numPipeLog2, ///< [in] number of pipes
1607 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1608 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1609 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1610 AddrResourceType resourceType ///< [in] data surface resource type
1611 ) const
1612 {
1613 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1614 CoordEq dataEq;
1615
1616 pDataEq->copy(dataEq);
1617
1618 if (dataSurfaceType == Gfx9DataColor)
1619 {
1620 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1621 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1622 }
1623
1624 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1625
1626 // This section should only apply to z/stencil, maybe fmask
1627 // If the pipe bit is below the comp block size,
1628 // then keep moving up the address until we find a bit that is above
1629 UINT_32 pipeStart = 0;
1630
1631 if (dataSurfaceType != Gfx9DataColor)
1632 {
1633 Coordinate tileMin(DIM_X, 3);
1634
1635 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1636 {
1637 pipeStart++;
1638 }
1639
1640 // if pipe is 0, then the first pipe bit is above the comp block size,
1641 // so we don't need to do anything
1642 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1643 // we will get the same pipe equation
1644 if (pipeStart != 0)
1645 {
1646 for (UINT_32 i = 0; i < numPipeLog2; i++)
1647 {
1648 // Copy the jth bit above pipe interleave to the current pipe equation bit
1649 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1650 }
1651 }
1652 }
1653
1654 if (IsPrt(swizzleMode))
1655 {
1656 // Clear out bits above the block size if prt's are enabled
1657 dataEq.resize(blockSizeLog2);
1658 dataEq.resize(48);
1659 }
1660
1661 if (IsXor(swizzleMode))
1662 {
1663 CoordEq xorMask;
1664
1665 if (IsThick(resourceType, swizzleMode))
1666 {
1667 CoordEq xorMask2;
1668
1669 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1670
1671 xorMask.resize(numPipeLog2);
1672
1673 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1674 {
1675 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1676 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1677 }
1678 }
1679 else
1680 {
1681 // Xor in the bits above the pipe+gpu bits
1682 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1683
1684 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1685 {
1686 Coordinate co;
1687 CoordEq xorMask2;
1688 // if 1xaa and not prt, then xor in the z bits
1689 xorMask2.resize(0);
1690 xorMask2.resize(numPipeLog2);
1691 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1692 {
1693 co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1694 xorMask2[pipeIdx].add(co);
1695 }
1696
1697 pPipeEq->xorin(xorMask2);
1698 }
1699 }
1700
1701 xorMask.reverse();
1702 pPipeEq->xorin(xorMask);
1703 }
1704 }
1705 /**
1706 ************************************************************************************************************************
1707 * Gfx9Lib::GetMetaEquation
1708 *
1709 * @brief
1710 * Get meta equation for cmask/htile/DCC
1711 * @return
1712 * Pointer to a calculated meta equation
1713 ************************************************************************************************************************
1714 */
1715 const CoordEq* Gfx9Lib::GetMetaEquation(
1716 const MetaEqParams& metaEqParams)
1717 {
1718 UINT_32 cachedMetaEqIndex;
1719
1720 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1721 {
1722 if (memcmp(&metaEqParams,
1723 &m_cachedMetaEqKey[cachedMetaEqIndex],
1724 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1725 {
1726 break;
1727 }
1728 }
1729
1730 CoordEq* pMetaEq = NULL;
1731
1732 if (cachedMetaEqIndex < MaxCachedMetaEq)
1733 {
1734 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1735 }
1736 else
1737 {
1738 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1739
1740 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1741
1742 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1743
1744 GenMetaEquation(pMetaEq,
1745 metaEqParams.maxMip,
1746 metaEqParams.elementBytesLog2,
1747 metaEqParams.numSamplesLog2,
1748 metaEqParams.metaFlag,
1749 metaEqParams.dataSurfaceType,
1750 metaEqParams.swizzleMode,
1751 metaEqParams.resourceType,
1752 metaEqParams.metaBlkWidthLog2,
1753 metaEqParams.metaBlkHeightLog2,
1754 metaEqParams.metaBlkDepthLog2,
1755 metaEqParams.compBlkWidthLog2,
1756 metaEqParams.compBlkHeightLog2,
1757 metaEqParams.compBlkDepthLog2);
1758 }
1759
1760 return pMetaEq;
1761 }
1762
1763 /**
1764 ************************************************************************************************************************
1765 * Gfx9Lib::GenMetaEquation
1766 *
1767 * @brief
1768 * Get meta equation for cmask/htile/DCC
1769 * @return
1770 * N/A
1771 ************************************************************************************************************************
1772 */
1773 VOID Gfx9Lib::GenMetaEquation(
1774 CoordEq* pMetaEq, ///< [out] meta equation
1775 UINT_32 maxMip, ///< [in] max mip Id
1776 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1777 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1778 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1779 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1780 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1781 AddrResourceType resourceType, ///< [in] data surface resource type
1782 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1783 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1784 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1785 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1786 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1787 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1788 const
1789 {
1790 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1791 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1792
1793 // Get the correct data address and rb equation
1794 CoordEq dataEq;
1795 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1796 elementBytesLog2, numSamplesLog2);
1797
1798 // Get pipe and rb equations
1799 CoordEq pipeEquation;
1800 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1801 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1802 numPipeTotalLog2 = pipeEquation.getsize();
1803
1804 if (metaFlag.linear)
1805 {
1806 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1807 ADDR_ASSERT_ALWAYS();
1808
1809 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1810
1811 dataEq.copy(*pMetaEq);
1812
1813 if (IsLinear(swizzleMode))
1814 {
1815 if (metaFlag.pipeAligned)
1816 {
1817 // Remove the pipe bits
1818 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1819 pMetaEq->shift(-shift, pipeInterleaveLog2);
1820 }
1821 // Divide by comp block size, which for linear (which is always color) is 256 B
1822 pMetaEq->shift(-8);
1823
1824 if (metaFlag.pipeAligned)
1825 {
1826 // Put pipe bits back in
1827 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1828
1829 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1830 {
1831 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1832 }
1833 }
1834 }
1835
1836 pMetaEq->shift(1);
1837 }
1838 else
1839 {
1840 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1841 UINT_32 compFragLog2 =
1842 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1843 maxCompFragLog2 : numSamplesLog2;
1844
1845 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1846
1847 // Make sure the metaaddr is cleared
1848 pMetaEq->resize(0);
1849 pMetaEq->resize(27);
1850
1851 if (IsThick(resourceType, swizzleMode))
1852 {
1853 Coordinate cx(DIM_X, 0);
1854 Coordinate cy(DIM_Y, 0);
1855 Coordinate cz(DIM_Z, 0);
1856
1857 if (maxMip > 0)
1858 {
1859 pMetaEq->mort3d(cy, cx, cz);
1860 }
1861 else
1862 {
1863 pMetaEq->mort3d(cx, cy, cz);
1864 }
1865 }
1866 else
1867 {
1868 Coordinate cx(DIM_X, 0);
1869 Coordinate cy(DIM_Y, 0);
1870 Coordinate cs;
1871
1872 if (maxMip > 0)
1873 {
1874 pMetaEq->mort2d(cy, cx, compFragLog2);
1875 }
1876 else
1877 {
1878 pMetaEq->mort2d(cx, cy, compFragLog2);
1879 }
1880
1881 //------------------------------------------------------------------------------------------------------------------------
1882 // Put the compressible fragments at the lsb
1883 // the uncompressible frags will be at the msb of the micro address
1884 //------------------------------------------------------------------------------------------------------------------------
1885 for (UINT_32 s = 0; s < compFragLog2; s++)
1886 {
1887 cs.set(DIM_S, s);
1888 (*pMetaEq)[s].add(cs);
1889 }
1890 }
1891
1892 // Keep a copy of the pipe equations
1893 CoordEq origPipeEquation;
1894 pipeEquation.copy(origPipeEquation);
1895
1896 Coordinate co;
1897 // filter out everything under the compressed block size
1898 co.set(DIM_X, compBlkWidthLog2);
1899 pMetaEq->Filter('<', co, 0, DIM_X);
1900 co.set(DIM_Y, compBlkHeightLog2);
1901 pMetaEq->Filter('<', co, 0, DIM_Y);
1902 co.set(DIM_Z, compBlkDepthLog2);
1903 pMetaEq->Filter('<', co, 0, DIM_Z);
1904
1905 // For non-color, filter out sample bits
1906 if (dataSurfaceType != Gfx9DataColor)
1907 {
1908 co.set(DIM_X, 0);
1909 pMetaEq->Filter('<', co, 0, DIM_S);
1910 }
1911
1912 // filter out everything above the metablock size
1913 co.set(DIM_X, metaBlkWidthLog2 - 1);
1914 pMetaEq->Filter('>', co, 0, DIM_X);
1915 co.set(DIM_Y, metaBlkHeightLog2 - 1);
1916 pMetaEq->Filter('>', co, 0, DIM_Y);
1917 co.set(DIM_Z, metaBlkDepthLog2 - 1);
1918 pMetaEq->Filter('>', co, 0, DIM_Z);
1919
1920 // filter out everything above the metablock size for the channel bits
1921 co.set(DIM_X, metaBlkWidthLog2 - 1);
1922 pipeEquation.Filter('>', co, 0, DIM_X);
1923 co.set(DIM_Y, metaBlkHeightLog2 - 1);
1924 pipeEquation.Filter('>', co, 0, DIM_Y);
1925 co.set(DIM_Z, metaBlkDepthLog2 - 1);
1926 pipeEquation.Filter('>', co, 0, DIM_Z);
1927
1928 // Make sure we still have the same number of channel bits
1929 if (pipeEquation.getsize() != numPipeTotalLog2)
1930 {
1931 ADDR_ASSERT_ALWAYS();
1932 }
1933
1934 // Loop through all channel and rb bits,
1935 // and make sure these components exist in the metadata address
1936 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1937 {
1938 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1939 {
1940 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1941 {
1942 ADDR_ASSERT_ALWAYS();
1943 }
1944 }
1945 }
1946
1947 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1948 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1949 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1950 CoordEq origRbEquation;
1951
1952 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1953
1954 CoordEq rbEquation = origRbEquation;
1955
1956 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1957 {
1958 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1959 {
1960 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1961 {
1962 ADDR_ASSERT_ALWAYS();
1963 }
1964 }
1965 }
1966
1967 if (m_settings.applyAliasFix)
1968 {
1969 co.set(DIM_Z, -1);
1970 }
1971
1972 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1973 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1974 {
1975 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1976 {
1977 BOOL_32 isRbEquationInPipeEquation = FALSE;
1978
1979 if (m_settings.applyAliasFix)
1980 {
1981 CoordTerm filteredPipeEq;
1982 filteredPipeEq = pipeEquation[j];
1983
1984 filteredPipeEq.Filter('>', co, 0, DIM_Z);
1985
1986 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1987 }
1988 else
1989 {
1990 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1991 }
1992
1993 if (isRbEquationInPipeEquation)
1994 {
1995 rbEquation[i].Clear();
1996 }
1997 }
1998 }
1999
2000 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
2001
2002 // Loop through each bit of the channel, get the smallest coordinate,
2003 // and remove it from the metaaddr, and rb_equation
2004 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2005 {
2006 pipeEquation[i].getsmallest(co);
2007
2008 UINT_32 old_size = pMetaEq->getsize();
2009 pMetaEq->Filter('=', co);
2010 UINT_32 new_size = pMetaEq->getsize();
2011 if (new_size != old_size-1)
2012 {
2013 ADDR_ASSERT_ALWAYS();
2014 }
2015 pipeEquation.remove(co);
2016 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2017 {
2018 if (rbEquation[j].remove(co))
2019 {
2020 // if we actually removed something from this bit, then add the remaining
2021 // channel bits, as these can be removed for this bit
2022 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2023 {
2024 if (pipeEquation[i][k] != co)
2025 {
2026 rbEquation[j].add(pipeEquation[i][k]);
2027 rbAppendedWithPipeBits[j] = true;
2028 }
2029 }
2030 }
2031 }
2032 }
2033
2034 // Loop through the rb bits and see what remain;
2035 // filter out the smallest coordinate if it remains
2036 UINT_32 rbBitsLeft = 0;
2037 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2038 {
2039 BOOL_32 isRbEqAppended = FALSE;
2040
2041 if (m_settings.applyAliasFix)
2042 {
2043 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2044 }
2045 else
2046 {
2047 isRbEqAppended = (rbEquation[i].getsize() > 0);
2048 }
2049
2050 if (isRbEqAppended)
2051 {
2052 rbBitsLeft++;
2053 rbEquation[i].getsmallest(co);
2054 UINT_32 old_size = pMetaEq->getsize();
2055 pMetaEq->Filter('=', co);
2056 UINT_32 new_size = pMetaEq->getsize();
2057 if (new_size != old_size - 1)
2058 {
2059 // assert warning
2060 }
2061 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2062 {
2063 if (rbEquation[j].remove(co))
2064 {
2065 // if we actually removed something from this bit, then add the remaining
2066 // rb bits, as these can be removed for this bit
2067 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2068 {
2069 if (rbEquation[i][k] != co)
2070 {
2071 rbEquation[j].add(rbEquation[i][k]);
2072 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2073 }
2074 }
2075 }
2076 }
2077 }
2078 }
2079
2080 // capture the size of the metaaddr
2081 UINT_32 metaSize = pMetaEq->getsize();
2082 // resize to 49 bits...make this a nibble address
2083 pMetaEq->resize(49);
2084 // Concatenate the macro address above the current address
2085 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2086 {
2087 co.set(DIM_M, j);
2088 (*pMetaEq)[i].add(co);
2089 }
2090
2091 // Multiply by meta element size (in nibbles)
2092 if (dataSurfaceType == Gfx9DataColor)
2093 {
2094 pMetaEq->shift(1);
2095 }
2096 else if (dataSurfaceType == Gfx9DataDepthStencil)
2097 {
2098 pMetaEq->shift(3);
2099 }
2100
2101 //------------------------------------------------------------------------------------------
2102 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2103 // Shift up from pipe interleave number of channel
2104 // and rb bits left, and uncompressed fragments
2105 //------------------------------------------------------------------------------------------
2106
2107 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2108
2109 // Put in the channel bits
2110 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2111 {
2112 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2113 }
2114
2115 // Put in remaining rb bits
2116 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2117 {
2118 BOOL_32 isRbEqAppended = FALSE;
2119
2120 if (m_settings.applyAliasFix)
2121 {
2122 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2123 }
2124 else
2125 {
2126 isRbEqAppended = (rbEquation[i].getsize() > 0);
2127 }
2128
2129 if (isRbEqAppended)
2130 {
2131 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2132 // Mark any rb bit we add in to the rb mask
2133 j++;
2134 }
2135 }
2136
2137 //------------------------------------------------------------------------------------------
2138 // Put in the uncompressed fragment bits
2139 //------------------------------------------------------------------------------------------
2140 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2141 {
2142 co.set(DIM_S, compFragLog2 + i);
2143 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2144 }
2145 }
2146 }
2147
2148 /**
2149 ************************************************************************************************************************
2150 * Gfx9Lib::IsEquationSupported
2151 *
2152 * @brief
2153 * Check if equation is supported for given swizzle mode and resource type.
2154 *
2155 * @return
2156 * TRUE if supported
2157 ************************************************************************************************************************
2158 */
2159 BOOL_32 Gfx9Lib::IsEquationSupported(
2160 AddrResourceType rsrcType,
2161 AddrSwizzleMode swMode,
2162 UINT_32 elementBytesLog2) const
2163 {
2164 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2165 (IsValidSwMode(swMode) == TRUE) &&
2166 (IsLinear(swMode) == FALSE) &&
2167 (((IsTex2d(rsrcType) == TRUE) &&
2168 ((elementBytesLog2 < 4) ||
2169 ((IsRotateSwizzle(swMode) == FALSE) &&
2170 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2171 ((IsTex3d(rsrcType) == TRUE) &&
2172 (IsRotateSwizzle(swMode) == FALSE) &&
2173 (IsBlock256b(swMode) == FALSE)));
2174
2175 return supported;
2176 }
2177
2178 /**
2179 ************************************************************************************************************************
2180 * Gfx9Lib::InitEquationTable
2181 *
2182 * @brief
2183 * Initialize Equation table.
2184 *
2185 * @return
2186 * N/A
2187 ************************************************************************************************************************
2188 */
2189 VOID Gfx9Lib::InitEquationTable()
2190 {
2191 memset(m_equationTable, 0, sizeof(m_equationTable));
2192
2193 // Loop all possible resource type (2D/3D)
2194 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2195 {
2196 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2197
2198 // Loop all possible swizzle mode
2199 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2200 {
2201 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2202
2203 // Loop all possible bpp
2204 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2205 {
2206 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2207
2208 // Check if the input is supported
2209 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2210 {
2211 ADDR_EQUATION equation;
2212 ADDR_E_RETURNCODE retCode;
2213
2214 memset(&equation, 0, sizeof(ADDR_EQUATION));
2215
2216 // Generate the equation
2217 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2218 {
2219 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2220 }
2221 else if (IsThin(rsrcType, swMode))
2222 {
2223 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2224 }
2225 else
2226 {
2227 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2228 }
2229
2230 // Only fill the equation into the table if the return code is ADDR_OK,
2231 // otherwise if the return code is not ADDR_OK, it indicates this is not
2232 // a valid input, we do nothing but just fill invalid equation index
2233 // into the lookup table.
2234 if (retCode == ADDR_OK)
2235 {
2236 equationIndex = m_numEquations;
2237 ADDR_ASSERT(equationIndex < EquationTableSize);
2238
2239 m_equationTable[equationIndex] = equation;
2240
2241 m_numEquations++;
2242 }
2243 else
2244 {
2245 ADDR_ASSERT_ALWAYS();
2246 }
2247 }
2248
2249 // Fill the index into the lookup table, if the combination is not supported
2250 // fill the invalid equation index
2251 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2252 }
2253 }
2254 }
2255 }
2256
2257 /**
2258 ************************************************************************************************************************
2259 * Gfx9Lib::HwlGetEquationIndex
2260 *
2261 * @brief
2262 * Interface function stub of GetEquationIndex
2263 *
2264 * @return
2265 * ADDR_E_RETURNCODE
2266 ************************************************************************************************************************
2267 */
2268 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2269 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2270 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2271 ) const
2272 {
2273 AddrResourceType rsrcType = pIn->resourceType;
2274 AddrSwizzleMode swMode = pIn->swizzleMode;
2275 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2276 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2277
2278 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2279 {
2280 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2281 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2282
2283 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2284 }
2285
2286 if (pOut->pMipInfo != NULL)
2287 {
2288 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2289 {
2290 pOut->pMipInfo[i].equationIndex = index;
2291 }
2292 }
2293
2294 return index;
2295 }
2296
2297 /**
2298 ************************************************************************************************************************
2299 * Gfx9Lib::HwlComputeBlock256Equation
2300 *
2301 * @brief
2302 * Interface function stub of ComputeBlock256Equation
2303 *
2304 * @return
2305 * ADDR_E_RETURNCODE
2306 ************************************************************************************************************************
2307 */
2308 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2309 AddrResourceType rsrcType,
2310 AddrSwizzleMode swMode,
2311 UINT_32 elementBytesLog2,
2312 ADDR_EQUATION* pEquation) const
2313 {
2314 ADDR_E_RETURNCODE ret = ADDR_OK;
2315
2316 pEquation->numBits = 8;
2317
2318 UINT_32 i = 0;
2319 for (; i < elementBytesLog2; i++)
2320 {
2321 InitChannel(1, 0 , i, &pEquation->addr[i]);
2322 }
2323
2324 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2325
2326 const UINT_32 maxBitsUsed = 4;
2327 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2328 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2329
2330 for (i = 0; i < maxBitsUsed; i++)
2331 {
2332 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2333 InitChannel(1, 1, i, &y[i]);
2334 }
2335
2336 if (IsStandardSwizzle(rsrcType, swMode))
2337 {
2338 switch (elementBytesLog2)
2339 {
2340 case 0:
2341 pixelBit[0] = x[0];
2342 pixelBit[1] = x[1];
2343 pixelBit[2] = x[2];
2344 pixelBit[3] = x[3];
2345 pixelBit[4] = y[0];
2346 pixelBit[5] = y[1];
2347 pixelBit[6] = y[2];
2348 pixelBit[7] = y[3];
2349 break;
2350 case 1:
2351 pixelBit[0] = x[0];
2352 pixelBit[1] = x[1];
2353 pixelBit[2] = x[2];
2354 pixelBit[3] = y[0];
2355 pixelBit[4] = y[1];
2356 pixelBit[5] = y[2];
2357 pixelBit[6] = x[3];
2358 break;
2359 case 2:
2360 pixelBit[0] = x[0];
2361 pixelBit[1] = x[1];
2362 pixelBit[2] = y[0];
2363 pixelBit[3] = y[1];
2364 pixelBit[4] = y[2];
2365 pixelBit[5] = x[2];
2366 break;
2367 case 3:
2368 pixelBit[0] = x[0];
2369 pixelBit[1] = y[0];
2370 pixelBit[2] = y[1];
2371 pixelBit[3] = x[1];
2372 pixelBit[4] = x[2];
2373 break;
2374 case 4:
2375 pixelBit[0] = y[0];
2376 pixelBit[1] = y[1];
2377 pixelBit[2] = x[0];
2378 pixelBit[3] = x[1];
2379 break;
2380 default:
2381 ADDR_ASSERT_ALWAYS();
2382 ret = ADDR_INVALIDPARAMS;
2383 break;
2384 }
2385 }
2386 else if (IsDisplaySwizzle(rsrcType, swMode))
2387 {
2388 switch (elementBytesLog2)
2389 {
2390 case 0:
2391 pixelBit[0] = x[0];
2392 pixelBit[1] = x[1];
2393 pixelBit[2] = x[2];
2394 pixelBit[3] = y[1];
2395 pixelBit[4] = y[0];
2396 pixelBit[5] = y[2];
2397 pixelBit[6] = x[3];
2398 pixelBit[7] = y[3];
2399 break;
2400 case 1:
2401 pixelBit[0] = x[0];
2402 pixelBit[1] = x[1];
2403 pixelBit[2] = x[2];
2404 pixelBit[3] = y[0];
2405 pixelBit[4] = y[1];
2406 pixelBit[5] = y[2];
2407 pixelBit[6] = x[3];
2408 break;
2409 case 2:
2410 pixelBit[0] = x[0];
2411 pixelBit[1] = x[1];
2412 pixelBit[2] = y[0];
2413 pixelBit[3] = x[2];
2414 pixelBit[4] = y[1];
2415 pixelBit[5] = y[2];
2416 break;
2417 case 3:
2418 pixelBit[0] = x[0];
2419 pixelBit[1] = y[0];
2420 pixelBit[2] = x[1];
2421 pixelBit[3] = x[2];
2422 pixelBit[4] = y[1];
2423 break;
2424 case 4:
2425 pixelBit[0] = x[0];
2426 pixelBit[1] = y[0];
2427 pixelBit[2] = x[1];
2428 pixelBit[3] = y[1];
2429 break;
2430 default:
2431 ADDR_ASSERT_ALWAYS();
2432 ret = ADDR_INVALIDPARAMS;
2433 break;
2434 }
2435 }
2436 else if (IsRotateSwizzle(swMode))
2437 {
2438 switch (elementBytesLog2)
2439 {
2440 case 0:
2441 pixelBit[0] = y[0];
2442 pixelBit[1] = y[1];
2443 pixelBit[2] = y[2];
2444 pixelBit[3] = x[1];
2445 pixelBit[4] = x[0];
2446 pixelBit[5] = x[2];
2447 pixelBit[6] = x[3];
2448 pixelBit[7] = y[3];
2449 break;
2450 case 1:
2451 pixelBit[0] = y[0];
2452 pixelBit[1] = y[1];
2453 pixelBit[2] = y[2];
2454 pixelBit[3] = x[0];
2455 pixelBit[4] = x[1];
2456 pixelBit[5] = x[2];
2457 pixelBit[6] = x[3];
2458 break;
2459 case 2:
2460 pixelBit[0] = y[0];
2461 pixelBit[1] = y[1];
2462 pixelBit[2] = x[0];
2463 pixelBit[3] = y[2];
2464 pixelBit[4] = x[1];
2465 pixelBit[5] = x[2];
2466 break;
2467 case 3:
2468 pixelBit[0] = y[0];
2469 pixelBit[1] = x[0];
2470 pixelBit[2] = y[1];
2471 pixelBit[3] = x[1];
2472 pixelBit[4] = x[2];
2473 break;
2474 default:
2475 ADDR_ASSERT_ALWAYS();
2476 case 4:
2477 ret = ADDR_INVALIDPARAMS;
2478 break;
2479 }
2480 }
2481 else
2482 {
2483 ADDR_ASSERT_ALWAYS();
2484 ret = ADDR_INVALIDPARAMS;
2485 }
2486
2487 // Post validation
2488 if (ret == ADDR_OK)
2489 {
2490 ASSERTED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2491 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2492 (microBlockDim.w * (1 << elementBytesLog2)));
2493 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2494 }
2495
2496 return ret;
2497 }
2498
2499 /**
2500 ************************************************************************************************************************
2501 * Gfx9Lib::HwlComputeThinEquation
2502 *
2503 * @brief
2504 * Interface function stub of ComputeThinEquation
2505 *
2506 * @return
2507 * ADDR_E_RETURNCODE
2508 ************************************************************************************************************************
2509 */
2510 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2511 AddrResourceType rsrcType,
2512 AddrSwizzleMode swMode,
2513 UINT_32 elementBytesLog2,
2514 ADDR_EQUATION* pEquation) const
2515 {
2516 ADDR_E_RETURNCODE ret = ADDR_OK;
2517
2518 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2519
2520 UINT_32 maxXorBits = blockSizeLog2;
2521 if (IsNonPrtXor(swMode))
2522 {
2523 // For non-prt-xor, maybe need to initialize some more bits for xor
2524 // The highest xor bit used in equation will be max the following 3 items:
2525 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2526 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2527 // 3. blockSizeLog2
2528
2529 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2530 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2531 GetPipeXorBits(blockSizeLog2) +
2532 2 * GetBankXorBits(blockSizeLog2));
2533 }
2534
2535 const UINT_32 maxBitsUsed = 14;
2536 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2537 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2538 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2539
2540 const UINT_32 extraXorBits = 16;
2541 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2542 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2543
2544 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2545 {
2546 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2547 InitChannel(1, 1, i, &y[i]);
2548 }
2549
2550 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2551
2552 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2553 {
2554 InitChannel(1, 0 , i, &pixelBit[i]);
2555 }
2556
2557 UINT_32 xIdx = 0;
2558 UINT_32 yIdx = 0;
2559 UINT_32 lowBits = 0;
2560
2561 if (IsZOrderSwizzle(swMode))
2562 {
2563 if (elementBytesLog2 <= 3)
2564 {
2565 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2566 {
2567 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2568 }
2569
2570 lowBits = 6;
2571 }
2572 else
2573 {
2574 ret = ADDR_INVALIDPARAMS;
2575 }
2576 }
2577 else
2578 {
2579 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2580
2581 if (ret == ADDR_OK)
2582 {
2583 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2584 xIdx = Log2(microBlockDim.w);
2585 yIdx = Log2(microBlockDim.h);
2586 lowBits = 8;
2587 }
2588 }
2589
2590 if (ret == ADDR_OK)
2591 {
2592 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2593 {
2594 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2595 }
2596
2597 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2598 {
2599 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2600 }
2601
2602 if (IsXor(swMode))
2603 {
2604 // Fill XOR bits
2605 UINT_32 pipeStart = m_pipeInterleaveLog2;
2606 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2607
2608 UINT_32 bankStart = pipeStart + pipeXorBits;
2609 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2610
2611 for (UINT_32 i = 0; i < pipeXorBits; i++)
2612 {
2613 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2614 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2615 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2616
2617 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2618 }
2619
2620 for (UINT_32 i = 0; i < bankXorBits; i++)
2621 {
2622 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2623 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2624 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2625
2626 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2627 }
2628
2629 if (IsPrt(swMode) == FALSE)
2630 {
2631 for (UINT_32 i = 0; i < pipeXorBits; i++)
2632 {
2633 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2634 }
2635
2636 for (UINT_32 i = 0; i < bankXorBits; i++)
2637 {
2638 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2639 }
2640 }
2641 }
2642
2643 pEquation->numBits = blockSizeLog2;
2644 }
2645
2646 return ret;
2647 }
2648
2649 /**
2650 ************************************************************************************************************************
2651 * Gfx9Lib::HwlComputeThickEquation
2652 *
2653 * @brief
2654 * Interface function stub of ComputeThickEquation
2655 *
2656 * @return
2657 * ADDR_E_RETURNCODE
2658 ************************************************************************************************************************
2659 */
2660 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2661 AddrResourceType rsrcType,
2662 AddrSwizzleMode swMode,
2663 UINT_32 elementBytesLog2,
2664 ADDR_EQUATION* pEquation) const
2665 {
2666 ADDR_E_RETURNCODE ret = ADDR_OK;
2667
2668 ADDR_ASSERT(IsTex3d(rsrcType));
2669
2670 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2671
2672 UINT_32 maxXorBits = blockSizeLog2;
2673 if (IsNonPrtXor(swMode))
2674 {
2675 // For non-prt-xor, maybe need to initialize some more bits for xor
2676 // The highest xor bit used in equation will be max the following 3:
2677 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2678 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2679 // 3. blockSizeLog2
2680
2681 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2682 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2683 GetPipeXorBits(blockSizeLog2) +
2684 3 * GetBankXorBits(blockSizeLog2));
2685 }
2686
2687 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2688 {
2689 InitChannel(1, 0 , i, &pEquation->addr[i]);
2690 }
2691
2692 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2693
2694 const UINT_32 maxBitsUsed = 12;
2695 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2696 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2697 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2698 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2699
2700 const UINT_32 extraXorBits = 24;
2701 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2702 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2703
2704 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2705 {
2706 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2707 InitChannel(1, 1, i, &y[i]);
2708 InitChannel(1, 2, i, &z[i]);
2709 }
2710
2711 if (IsZOrderSwizzle(swMode))
2712 {
2713 switch (elementBytesLog2)
2714 {
2715 case 0:
2716 pixelBit[0] = x[0];
2717 pixelBit[1] = y[0];
2718 pixelBit[2] = x[1];
2719 pixelBit[3] = y[1];
2720 pixelBit[4] = z[0];
2721 pixelBit[5] = z[1];
2722 pixelBit[6] = x[2];
2723 pixelBit[7] = z[2];
2724 pixelBit[8] = y[2];
2725 pixelBit[9] = x[3];
2726 break;
2727 case 1:
2728 pixelBit[0] = x[0];
2729 pixelBit[1] = y[0];
2730 pixelBit[2] = x[1];
2731 pixelBit[3] = y[1];
2732 pixelBit[4] = z[0];
2733 pixelBit[5] = z[1];
2734 pixelBit[6] = z[2];
2735 pixelBit[7] = y[2];
2736 pixelBit[8] = x[2];
2737 break;
2738 case 2:
2739 pixelBit[0] = x[0];
2740 pixelBit[1] = y[0];
2741 pixelBit[2] = x[1];
2742 pixelBit[3] = z[0];
2743 pixelBit[4] = y[1];
2744 pixelBit[5] = z[1];
2745 pixelBit[6] = y[2];
2746 pixelBit[7] = x[2];
2747 break;
2748 case 3:
2749 pixelBit[0] = x[0];
2750 pixelBit[1] = y[0];
2751 pixelBit[2] = z[0];
2752 pixelBit[3] = x[1];
2753 pixelBit[4] = z[1];
2754 pixelBit[5] = y[1];
2755 pixelBit[6] = x[2];
2756 break;
2757 case 4:
2758 pixelBit[0] = x[0];
2759 pixelBit[1] = y[0];
2760 pixelBit[2] = z[0];
2761 pixelBit[3] = z[1];
2762 pixelBit[4] = y[1];
2763 pixelBit[5] = x[1];
2764 break;
2765 default:
2766 ADDR_ASSERT_ALWAYS();
2767 ret = ADDR_INVALIDPARAMS;
2768 break;
2769 }
2770 }
2771 else if (IsStandardSwizzle(rsrcType, swMode))
2772 {
2773 switch (elementBytesLog2)
2774 {
2775 case 0:
2776 pixelBit[0] = x[0];
2777 pixelBit[1] = x[1];
2778 pixelBit[2] = x[2];
2779 pixelBit[3] = x[3];
2780 pixelBit[4] = y[0];
2781 pixelBit[5] = y[1];
2782 pixelBit[6] = z[0];
2783 pixelBit[7] = z[1];
2784 pixelBit[8] = z[2];
2785 pixelBit[9] = y[2];
2786 break;
2787 case 1:
2788 pixelBit[0] = x[0];
2789 pixelBit[1] = x[1];
2790 pixelBit[2] = x[2];
2791 pixelBit[3] = y[0];
2792 pixelBit[4] = y[1];
2793 pixelBit[5] = z[0];
2794 pixelBit[6] = z[1];
2795 pixelBit[7] = z[2];
2796 pixelBit[8] = y[2];
2797 break;
2798 case 2:
2799 pixelBit[0] = x[0];
2800 pixelBit[1] = x[1];
2801 pixelBit[2] = y[0];
2802 pixelBit[3] = y[1];
2803 pixelBit[4] = z[0];
2804 pixelBit[5] = z[1];
2805 pixelBit[6] = y[2];
2806 pixelBit[7] = x[2];
2807 break;
2808 case 3:
2809 pixelBit[0] = x[0];
2810 pixelBit[1] = y[0];
2811 pixelBit[2] = y[1];
2812 pixelBit[3] = z[0];
2813 pixelBit[4] = z[1];
2814 pixelBit[5] = x[1];
2815 pixelBit[6] = x[2];
2816 break;
2817 case 4:
2818 pixelBit[0] = y[0];
2819 pixelBit[1] = y[1];
2820 pixelBit[2] = z[0];
2821 pixelBit[3] = z[1];
2822 pixelBit[4] = x[0];
2823 pixelBit[5] = x[1];
2824 break;
2825 default:
2826 ADDR_ASSERT_ALWAYS();
2827 ret = ADDR_INVALIDPARAMS;
2828 break;
2829 }
2830 }
2831 else
2832 {
2833 ADDR_ASSERT_ALWAYS();
2834 ret = ADDR_INVALIDPARAMS;
2835 }
2836
2837 if (ret == ADDR_OK)
2838 {
2839 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2840 UINT_32 xIdx = Log2(microBlockDim.w);
2841 UINT_32 yIdx = Log2(microBlockDim.h);
2842 UINT_32 zIdx = Log2(microBlockDim.d);
2843
2844 pixelBit = pEquation->addr;
2845
2846 const UINT_32 lowBits = 10;
2847 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2848 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2849
2850 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2851 {
2852 if ((i % 3) == 0)
2853 {
2854 pixelBit[i] = x[xIdx++];
2855 }
2856 else if ((i % 3) == 1)
2857 {
2858 pixelBit[i] = z[zIdx++];
2859 }
2860 else
2861 {
2862 pixelBit[i] = y[yIdx++];
2863 }
2864 }
2865
2866 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2867 {
2868 if ((i % 3) == 0)
2869 {
2870 xorExtra[i - blockSizeLog2] = x[xIdx++];
2871 }
2872 else if ((i % 3) == 1)
2873 {
2874 xorExtra[i - blockSizeLog2] = z[zIdx++];
2875 }
2876 else
2877 {
2878 xorExtra[i - blockSizeLog2] = y[yIdx++];
2879 }
2880 }
2881
2882 if (IsXor(swMode))
2883 {
2884 // Fill XOR bits
2885 UINT_32 pipeStart = m_pipeInterleaveLog2;
2886 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2887 for (UINT_32 i = 0; i < pipeXorBits; i++)
2888 {
2889 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2890 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2891 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2892
2893 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2894
2895 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2896 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2897 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2898
2899 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2900 }
2901
2902 UINT_32 bankStart = pipeStart + pipeXorBits;
2903 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2904 for (UINT_32 i = 0; i < bankXorBits; i++)
2905 {
2906 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2907 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2908 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2909
2910 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2911
2912 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2913 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2914 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2915
2916 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2917 }
2918 }
2919
2920 pEquation->numBits = blockSizeLog2;
2921 }
2922
2923 return ret;
2924 }
2925
2926 /**
2927 ************************************************************************************************************************
2928 * Gfx9Lib::IsValidDisplaySwizzleMode
2929 *
2930 * @brief
2931 * Check if a swizzle mode is supported by display engine
2932 *
2933 * @return
2934 * TRUE is swizzle mode is supported by display engine
2935 ************************************************************************************************************************
2936 */
2937 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2938 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2939 {
2940 BOOL_32 support = FALSE;
2941
2942 if (m_settings.isDce12)
2943 {
2944 switch (pIn->swizzleMode)
2945 {
2946 case ADDR_SW_256B_D:
2947 case ADDR_SW_256B_R:
2948 support = (pIn->bpp == 32);
2949 break;
2950
2951 case ADDR_SW_LINEAR:
2952 case ADDR_SW_4KB_D:
2953 case ADDR_SW_4KB_R:
2954 case ADDR_SW_64KB_D:
2955 case ADDR_SW_64KB_R:
2956 case ADDR_SW_4KB_D_X:
2957 case ADDR_SW_4KB_R_X:
2958 case ADDR_SW_64KB_D_X:
2959 case ADDR_SW_64KB_R_X:
2960 support = (pIn->bpp <= 64);
2961 break;
2962
2963 default:
2964 break;
2965 }
2966 }
2967 else if (m_settings.isDcn1)
2968 {
2969 switch (pIn->swizzleMode)
2970 {
2971 case ADDR_SW_4KB_D:
2972 case ADDR_SW_64KB_D:
2973 case ADDR_SW_64KB_D_T:
2974 case ADDR_SW_4KB_D_X:
2975 case ADDR_SW_64KB_D_X:
2976 support = (pIn->bpp == 64);
2977 break;
2978
2979 case ADDR_SW_LINEAR:
2980 case ADDR_SW_4KB_S:
2981 case ADDR_SW_64KB_S:
2982 case ADDR_SW_64KB_S_T:
2983 case ADDR_SW_4KB_S_X:
2984 case ADDR_SW_64KB_S_X:
2985 support = (pIn->bpp <= 64);
2986 break;
2987
2988 default:
2989 break;
2990 }
2991 }
2992 else
2993 {
2994 ADDR_NOT_IMPLEMENTED();
2995 }
2996
2997 return support;
2998 }
2999
3000 /**
3001 ************************************************************************************************************************
3002 * Gfx9Lib::HwlComputePipeBankXor
3003 *
3004 * @brief
3005 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3006 *
3007 * @return
3008 * PipeBankXor value
3009 ************************************************************************************************************************
3010 */
3011 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3012 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3013 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
3014 {
3015 if (IsXor(pIn->swizzleMode))
3016 {
3017 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3018 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3019 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3020
3021 UINT_32 pipeXor = 0;
3022 UINT_32 bankXor = 0;
3023
3024 const UINT_32 bankMask = (1 << bankBits) - 1;
3025 const UINT_32 index = pIn->surfIndex & bankMask;
3026
3027 const UINT_32 bpp = pIn->flags.fmask ?
3028 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3029 if (bankBits == 4)
3030 {
3031 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3032 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3033
3034 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3035 }
3036 else if (bankBits > 0)
3037 {
3038 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3039 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3040 bankXor = (index * bankIncrease) & bankMask;
3041 }
3042
3043 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3044 }
3045 else
3046 {
3047 pOut->pipeBankXor = 0;
3048 }
3049
3050 return ADDR_OK;
3051 }
3052
3053 /**
3054 ************************************************************************************************************************
3055 * Gfx9Lib::HwlComputeSlicePipeBankXor
3056 *
3057 * @brief
3058 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3059 *
3060 * @return
3061 * PipeBankXor value
3062 ************************************************************************************************************************
3063 */
3064 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3065 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3066 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3067 {
3068 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3069 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3070 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3071
3072 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3073 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3074
3075 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3076
3077 return ADDR_OK;
3078 }
3079
3080 /**
3081 ************************************************************************************************************************
3082 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3083 *
3084 * @brief
3085 * Compute sub resource offset to support swizzle pattern
3086 *
3087 * @return
3088 * Offset
3089 ************************************************************************************************************************
3090 */
3091 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3092 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3093 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3094 {
3095 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3096
3097 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3098 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3099 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3100 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3101 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3102 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3103
3104 pOut->offset = pIn->slice * pIn->sliceSize +
3105 pIn->macroBlockOffset +
3106 (pIn->mipTailOffset ^ pipeBankXor) -
3107 static_cast<UINT_64>(pipeBankXor);
3108 return ADDR_OK;
3109 }
3110
3111 /**
3112 ************************************************************************************************************************
3113 * Gfx9Lib::ValidateNonSwModeParams
3114 *
3115 * @brief
3116 * Validate compute surface info params except swizzle mode
3117 *
3118 * @return
3119 * TRUE if parameters are valid, FALSE otherwise
3120 ************************************************************************************************************************
3121 */
3122 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3123 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3124 {
3125 BOOL_32 valid = TRUE;
3126
3127 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3128 {
3129 ADDR_ASSERT_ALWAYS();
3130 valid = FALSE;
3131 }
3132
3133 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3134 {
3135 ADDR_ASSERT_ALWAYS();
3136 valid = FALSE;
3137 }
3138
3139 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3140 const BOOL_32 msaa = (pIn->numFrags > 1);
3141 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3142
3143 const AddrResourceType rsrcType = pIn->resourceType;
3144 const BOOL_32 tex3d = IsTex3d(rsrcType);
3145 const BOOL_32 tex2d = IsTex2d(rsrcType);
3146 const BOOL_32 tex1d = IsTex1d(rsrcType);
3147
3148 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3149 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3150 const BOOL_32 display = flags.display || flags.rotated;
3151 const BOOL_32 stereo = flags.qbStereo;
3152 const BOOL_32 fmask = flags.fmask;
3153
3154 // Resource type check
3155 if (tex1d)
3156 {
3157 if (msaa || zbuffer || display || stereo || isBc || fmask)
3158 {
3159 ADDR_ASSERT_ALWAYS();
3160 valid = FALSE;
3161 }
3162 }
3163 else if (tex2d)
3164 {
3165 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3166 {
3167 ADDR_ASSERT_ALWAYS();
3168 valid = FALSE;
3169 }
3170 }
3171 else if (tex3d)
3172 {
3173 if (msaa || zbuffer || display || stereo || fmask)
3174 {
3175 ADDR_ASSERT_ALWAYS();
3176 valid = FALSE;
3177 }
3178 }
3179 else
3180 {
3181 ADDR_ASSERT_ALWAYS();
3182 valid = FALSE;
3183 }
3184
3185 return valid;
3186 }
3187
3188 /**
3189 ************************************************************************************************************************
3190 * Gfx9Lib::ValidateSwModeParams
3191 *
3192 * @brief
3193 * Validate compute surface info related to swizzle mode
3194 *
3195 * @return
3196 * TRUE if parameters are valid, FALSE otherwise
3197 ************************************************************************************************************************
3198 */
3199 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3200 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3201 {
3202 BOOL_32 valid = TRUE;
3203
3204 if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3205 {
3206 ADDR_ASSERT_ALWAYS();
3207 valid = FALSE;
3208 }
3209
3210 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3211 const BOOL_32 msaa = (pIn->numFrags > 1);
3212 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3213 const BOOL_32 is422 = ElemLib::IsMacroPixelPacked(pIn->format);
3214
3215 const AddrResourceType rsrcType = pIn->resourceType;
3216 const BOOL_32 tex3d = IsTex3d(rsrcType);
3217 const BOOL_32 tex2d = IsTex2d(rsrcType);
3218 const BOOL_32 tex1d = IsTex1d(rsrcType);
3219
3220 const AddrSwizzleMode swizzle = pIn->swizzleMode;
3221 const BOOL_32 linear = IsLinear(swizzle);
3222 const BOOL_32 blk256B = IsBlock256b(swizzle);
3223 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3224
3225 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3226 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3227 const BOOL_32 color = flags.color;
3228 const BOOL_32 texture = flags.texture;
3229 const BOOL_32 display = flags.display || flags.rotated;
3230 const BOOL_32 prt = flags.prt;
3231 const BOOL_32 fmask = flags.fmask;
3232
3233 const BOOL_32 thin3d = tex3d && flags.view3dAs2dArray;
3234 const BOOL_32 zMaxMip = tex3d && mipmap &&
3235 (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3236
3237 // Misc check
3238 if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3239 {
3240 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3241 ADDR_ASSERT_ALWAYS();
3242 valid = FALSE;
3243 }
3244
3245 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3246 {
3247 ADDR_ASSERT_ALWAYS();
3248 valid = FALSE;
3249 }
3250
3251 if ((pIn->bpp == 96) && (linear == FALSE))
3252 {
3253 ADDR_ASSERT_ALWAYS();
3254 valid = FALSE;
3255 }
3256
3257 if (prt && isNonPrtXor)
3258 {
3259 ADDR_ASSERT_ALWAYS();
3260 valid = FALSE;
3261 }
3262
3263 // Resource type check
3264 if (tex1d)
3265 {
3266 if (linear == FALSE)
3267 {
3268 ADDR_ASSERT_ALWAYS();
3269 valid = FALSE;
3270 }
3271 }
3272
3273 // Swizzle type check
3274 if (linear)
3275 {
3276 if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3277 ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3278 {
3279 ADDR_ASSERT_ALWAYS();
3280 valid = FALSE;
3281 }
3282 }
3283 else if (IsZOrderSwizzle(swizzle))
3284 {
3285 if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3286 {
3287 ADDR_ASSERT_ALWAYS();
3288 valid = FALSE;
3289 }
3290 }
3291 else if (IsStandardSwizzle(swizzle))
3292 {
3293 if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3294 {
3295 ADDR_ASSERT_ALWAYS();
3296 valid = FALSE;
3297 }
3298 }
3299 else if (IsDisplaySwizzle(swizzle))
3300 {
3301 if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3302 {
3303 ADDR_ASSERT_ALWAYS();
3304 valid = FALSE;
3305 }
3306 }
3307 else if (IsRotateSwizzle(swizzle))
3308 {
3309 if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3310 {
3311 ADDR_ASSERT_ALWAYS();
3312 valid = FALSE;
3313 }
3314 }
3315 else
3316 {
3317 ADDR_ASSERT_ALWAYS();
3318 valid = FALSE;
3319 }
3320
3321 // Block type check
3322 if (blk256B)
3323 {
3324 if (prt || zbuffer || tex3d || mipmap || msaa)
3325 {
3326 ADDR_ASSERT_ALWAYS();
3327 valid = FALSE;
3328 }
3329 }
3330
3331 return valid;
3332 }
3333
3334 /**
3335 ************************************************************************************************************************
3336 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3337 *
3338 * @brief
3339 * Compute surface info sanity check
3340 *
3341 * @return
3342 * ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3343 ************************************************************************************************************************
3344 */
3345 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3346 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3347 {
3348 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3349 }
3350
3351 /**
3352 ************************************************************************************************************************
3353 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3354 *
3355 * @brief
3356 * Internal function to get suggested surface information for cliet to use
3357 *
3358 * @return
3359 * ADDR_E_RETURNCODE
3360 ************************************************************************************************************************
3361 */
3362 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3363 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3364 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3365 {
3366 ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3367 ElemLib* pElemLib = GetElemLib();
3368
3369 UINT_32 bpp = pIn->bpp;
3370 UINT_32 width = Max(pIn->width, 1u);
3371 UINT_32 height = Max(pIn->height, 1u);
3372 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3373 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3374
3375 if (pIn->flags.fmask)
3376 {
3377 bpp = GetFmaskBpp(numSamples, numFrags);
3378 numFrags = 1;
3379 numSamples = 1;
3380 pOut->resourceType = ADDR_RSRC_TEX_2D;
3381 }
3382 else
3383 {
3384 // Set format to INVALID will skip this conversion
3385 if (pIn->format != ADDR_FMT_INVALID)
3386 {
3387 UINT_32 expandX, expandY;
3388
3389 // Don't care for this case
3390 ElemMode elemMode = ADDR_UNCOMPRESSED;
3391
3392 // Get compression/expansion factors and element mode which indicates compression/expansion
3393 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3394 &elemMode,
3395 &expandX,
3396 &expandY);
3397
3398 UINT_32 basePitch = 0;
3399 GetElemLib()->AdjustSurfaceInfo(elemMode,
3400 expandX,
3401 expandY,
3402 &bpp,
3403 &basePitch,
3404 &width,
3405 &height);
3406 }
3407
3408 // The output may get changed for volume(3D) texture resource in future
3409 pOut->resourceType = pIn->resourceType;
3410 }
3411
3412 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3413 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3414 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
3415 const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated;
3416
3417 // Pre sanity check on non swizzle mode parameters
3418 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3419 localIn.flags = pIn->flags;
3420 localIn.resourceType = pOut->resourceType;
3421 localIn.format = pIn->format;
3422 localIn.bpp = bpp;
3423 localIn.width = width;
3424 localIn.height = height;
3425 localIn.numSlices = numSlices;
3426 localIn.numMipLevels = numMipLevels;
3427 localIn.numSamples = numSamples;
3428 localIn.numFrags = numFrags;
3429
3430 if (ValidateNonSwModeParams(&localIn))
3431 {
3432 // Forbid swizzle mode(s) by client setting
3433 ADDR2_SWMODE_SET allowedSwModeSet = {};
3434 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3435 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
3436 allowedSwModeSet.value |=
3437 pIn->forbiddenBlock.macroThin4KB ? 0 :
3438 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3439 allowedSwModeSet.value |=
3440 pIn->forbiddenBlock.macroThick4KB ? 0 :
3441 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3442 allowedSwModeSet.value |=
3443 pIn->forbiddenBlock.macroThin64KB ? 0 :
3444 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3445 allowedSwModeSet.value |=
3446 pIn->forbiddenBlock.macroThick64KB ? 0 :
3447 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3448
3449 if (pIn->preferredSwSet.value != 0)
3450 {
3451 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3452 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3453 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3454 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3455 }
3456
3457 if (pIn->noXor)
3458 {
3459 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3460 }
3461
3462 if (pIn->maxAlign > 0)
3463 {
3464 if (pIn->maxAlign < Size64K)
3465 {
3466 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3467 }
3468
3469 if (pIn->maxAlign < Size4K)
3470 {
3471 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3472 }
3473
3474 if (pIn->maxAlign < Size256)
3475 {
3476 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3477 }
3478 }
3479
3480 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3481 switch (pOut->resourceType)
3482 {
3483 case ADDR_RSRC_TEX_1D:
3484 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3485 break;
3486
3487 case ADDR_RSRC_TEX_2D:
3488 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3489
3490 if (bpp > 64)
3491 {
3492 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3493 }
3494 break;
3495
3496 case ADDR_RSRC_TEX_3D:
3497 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3498
3499 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3500 {
3501 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3502 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3503 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3504 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3505 }
3506
3507 if ((bpp == 128) && pIn->flags.color)
3508 {
3509 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3510 }
3511
3512 if (pIn->flags.view3dAs2dArray)
3513 {
3514 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3515 }
3516 break;
3517
3518 default:
3519 ADDR_ASSERT_ALWAYS();
3520 allowedSwModeSet.value = 0;
3521 break;
3522 }
3523
3524 if (pIn->format == ADDR_FMT_32_32_32)
3525 {
3526 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3527 }
3528
3529 if (ElemLib::IsBlockCompressed(pIn->format))
3530 {
3531 if (pIn->flags.texture)
3532 {
3533 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3534 }
3535 else
3536 {
3537 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3538 }
3539 }
3540
3541 if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3542 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3543 {
3544 allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3545 }
3546
3547 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3548 {
3549 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3550
3551 if (pIn->flags.noMetadata == FALSE)
3552 {
3553 if (pIn->flags.depth &&
3554 pIn->flags.texture &&
3555 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3556 {
3557 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3558 // equation from wrong address within memory range a tile covered and use the
3559 // garbage data for compressed Z reading which finally leads to corruption.
3560 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3561 }
3562
3563 if (m_settings.htileCacheRbConflict &&
3564 (pIn->flags.depth || pIn->flags.stencil) &&
3565 (numSlices > 1) &&
3566 (pIn->flags.metaRbUnaligned == FALSE) &&
3567 (pIn->flags.metaPipeUnaligned == FALSE))
3568 {
3569 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3570 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3571 }
3572 }
3573 }
3574
3575 if (msaa)
3576 {
3577 allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3578 }
3579
3580 if ((numFrags > 1) &&
3581 (Size4K < (m_pipeInterleaveBytes * numFrags)))
3582 {
3583 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3584 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3585 }
3586
3587 if (numMipLevels > 1)
3588 {
3589 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3590 }
3591
3592 if (displayRsrc)
3593 {
3594 if (m_settings.isDce12)
3595 {
3596 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3597 }
3598 else if (m_settings.isDcn1)
3599 {
3600 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3601 }
3602 else
3603 {
3604 ADDR_NOT_IMPLEMENTED();
3605 }
3606 }
3607
3608 if (allowedSwModeSet.value != 0)
3609 {
3610 #if DEBUG
3611 // Post sanity check, at least AddrLib should accept the output generated by its own
3612 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3613
3614 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3615 {
3616 if (validateSwModeSet & 1)
3617 {
3618 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3619 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3620 }
3621
3622 validateSwModeSet >>= 1;
3623 }
3624 #endif
3625
3626 pOut->validSwModeSet = allowedSwModeSet;
3627 pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3628 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3629 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3630
3631 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3632
3633 if (pOut->clientPreferredSwSet.value == 0)
3634 {
3635 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3636 }
3637
3638 // Apply optional restrictions
3639 if (pIn->flags.needEquation)
3640 {
3641 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3642 }
3643
3644 if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3645 {
3646 pOut->swizzleMode = ADDR_SW_LINEAR;
3647 }
3648 else
3649 {
3650 // Always ignore linear swizzle mode if there is other choice.
3651 allowedSwModeSet.swLinear = 0;
3652
3653 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3654
3655 // Determine block size if there is 2 or more block type candidates
3656 if (IsPow2(allowedBlockSet.value) == FALSE)
3657 {
3658 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR };
3659
3660 swMode[AddrBlockMicro] = ADDR_SW_256B_D;
3661 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D;
3662 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3663
3664 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3665 {
3666 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
3667 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3668 }
3669
3670 Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
3671 Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
3672 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3673
3674 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3675 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3676 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3677 UINT_32 minSizeBlk = AddrBlockMicro;
3678 UINT_64 minSize = 0;
3679
3680 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3681 {
3682 if (allowedBlockSet.value & (1 << i))
3683 {
3684 ComputeBlockDimensionForSurf(&blkDim[i].w,
3685 &blkDim[i].h,
3686 &blkDim[i].d,
3687 bpp,
3688 numFrags,
3689 pOut->resourceType,
3690 swMode[i]);
3691
3692 if (displayRsrc)
3693 {
3694 blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3695 }
3696
3697 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3698 padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
3699
3700 if ((minSize == 0) ||
3701 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3702 {
3703 minSize = padSize[i];
3704 minSizeBlk = i;
3705 }
3706 }
3707 }
3708
3709 if ((allowedBlockSet.micro == TRUE) &&
3710 (width <= blkDim[AddrBlockMicro].w) &&
3711 (height <= blkDim[AddrBlockMicro].h) &&
3712 (NextPow2(pIn->minSizeAlign) <= Size256))
3713 {
3714 minSizeBlk = AddrBlockMicro;
3715 }
3716
3717 if (minSizeBlk == AddrBlockMicro)
3718 {
3719 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3720 allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3721 }
3722 else if (minSizeBlk == AddrBlockThick4KB)
3723 {
3724 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3725 allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3726 }
3727 else if (minSizeBlk == AddrBlockThin4KB)
3728 {
3729 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3730 Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3731 }
3732 else if (minSizeBlk == AddrBlockThick64KB)
3733 {
3734 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3735 allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3736 }
3737 else
3738 {
3739 ADDR_ASSERT(minSizeBlk == AddrBlockThin64KB);
3740 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3741 Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3742 }
3743 }
3744
3745 // Block type should be determined.
3746 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3747
3748 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3749
3750 // Determine swizzle type if there is 2 or more swizzle type candidates
3751 if (IsPow2(allowedSwSet.value) == FALSE)
3752 {
3753 if (ElemLib::IsBlockCompressed(pIn->format))
3754 {
3755 if (allowedSwSet.sw_D)
3756 {
3757 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3758 }
3759 else
3760 {
3761 ADDR_ASSERT(allowedSwSet.sw_S);
3762 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3763 }
3764 }
3765 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3766 {
3767 if (allowedSwSet.sw_S)
3768 {
3769 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3770 }
3771 else if (allowedSwSet.sw_D)
3772 {
3773 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3774 }
3775 else
3776 {
3777 ADDR_ASSERT(allowedSwSet.sw_R);
3778 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3779 }
3780 }
3781 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3782 {
3783 if (pIn->flags.color && allowedSwSet.sw_D)
3784 {
3785 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3786 }
3787 else if (allowedSwSet.sw_Z)
3788 {
3789 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3790 }
3791 else
3792 {
3793 ADDR_ASSERT(allowedSwSet.sw_S);
3794 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3795 }
3796 }
3797 else
3798 {
3799 if (pIn->flags.rotated && allowedSwSet.sw_R)
3800 {
3801 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3802 }
3803 else if (allowedSwSet.sw_D)
3804 {
3805 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3806 }
3807 else if (allowedSwSet.sw_S)
3808 {
3809 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3810 }
3811 else
3812 {
3813 ADDR_ASSERT(allowedSwSet.sw_Z);
3814 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3815 }
3816 }
3817 }
3818
3819 // Swizzle type should be determined.
3820 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3821
3822 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3823 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3824 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3825 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3826 }
3827
3828 returnCode = ADDR_OK;
3829 }
3830 else
3831 {
3832 // Invalid combination...
3833 ADDR_ASSERT_ALWAYS();
3834 }
3835 }
3836 else
3837 {
3838 // Invalid combination...
3839 ADDR_ASSERT_ALWAYS();
3840 }
3841
3842 return returnCode;
3843 }
3844
3845 /**
3846 ************************************************************************************************************************
3847 * Gfx9Lib::ComputeStereoInfo
3848 *
3849 * @brief
3850 * Compute height alignment and right eye pipeBankXor for stereo surface
3851 *
3852 * @return
3853 * Error code
3854 *
3855 ************************************************************************************************************************
3856 */
3857 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3858 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3859 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
3860 UINT_32* pHeightAlign
3861 ) const
3862 {
3863 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3864
3865 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3866
3867 if (eqIndex < m_numEquations)
3868 {
3869 if (IsXor(pIn->swizzleMode))
3870 {
3871 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3872 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
3873 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
3874 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
3875 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3876 const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3877
3878 ADDR_ASSERT(maxYCoordBlock256 ==
3879 GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
3880
3881 const UINT_32 maxYCoordInBaseEquation =
3882 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
3883
3884 ADDR_ASSERT(maxYCoordInBaseEquation ==
3885 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3886
3887 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3888
3889 ADDR_ASSERT(maxYCoordInPipeXor ==
3890 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3891
3892 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3893 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3894
3895 ADDR_ASSERT(maxYCoordInBankXor ==
3896 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3897
3898 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3899
3900 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3901 {
3902 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3903
3904 if (pOut->pStereoInfo != NULL)
3905 {
3906 pOut->pStereoInfo->rightSwizzle = 0;
3907
3908 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3909 {
3910 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3911 {
3912 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3913 }
3914
3915 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3916 {
3917 pOut->pStereoInfo->rightSwizzle |=
3918 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3919 }
3920
3921 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3922 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3923 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3924 }
3925 }
3926 }
3927 }
3928 }
3929 else
3930 {
3931 ADDR_ASSERT_ALWAYS();
3932 returnCode = ADDR_ERROR;
3933 }
3934
3935 return returnCode;
3936 }
3937
3938 /**
3939 ************************************************************************************************************************
3940 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3941 *
3942 * @brief
3943 * Internal function to calculate alignment for tiled surface
3944 *
3945 * @return
3946 * ADDR_E_RETURNCODE
3947 ************************************************************************************************************************
3948 */
3949 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3950 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3951 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3952 ) const
3953 {
3954 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3955 &pOut->blockHeight,
3956 &pOut->blockSlices,
3957 pIn->bpp,
3958 pIn->numFrags,
3959 pIn->resourceType,
3960 pIn->swizzleMode);
3961
3962 if (returnCode == ADDR_OK)
3963 {
3964 UINT_32 pitchAlignInElement = pOut->blockWidth;
3965
3966 if ((IsTex2d(pIn->resourceType) == TRUE) &&
3967 (pIn->flags.display || pIn->flags.rotated) &&
3968 (pIn->numMipLevels <= 1) &&
3969 (pIn->numSamples <= 1) &&
3970 (pIn->numFrags <= 1))
3971 {
3972 // Display engine needs pitch align to be at least 32 pixels.
3973 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3974 }
3975
3976 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3977
3978 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3979 {
3980 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3981 {
3982 returnCode = ADDR_INVALIDPARAMS;
3983 }
3984 else if (pIn->pitchInElement < pOut->pitch)
3985 {
3986 returnCode = ADDR_INVALIDPARAMS;
3987 }
3988 else
3989 {
3990 pOut->pitch = pIn->pitchInElement;
3991 }
3992 }
3993
3994 UINT_32 heightAlign = 0;
3995
3996 if (pIn->flags.qbStereo)
3997 {
3998 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3999 }
4000
4001 if (returnCode == ADDR_OK)
4002 {
4003 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
4004
4005 if (heightAlign > 1)
4006 {
4007 pOut->height = PowTwoAlign(pOut->height, heightAlign);
4008 }
4009
4010 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
4011
4012 pOut->epitchIsHeight = FALSE;
4013 pOut->mipChainInTail = FALSE;
4014 pOut->firstMipIdInTail = pIn->numMipLevels;
4015
4016 pOut->mipChainPitch = pOut->pitch;
4017 pOut->mipChainHeight = pOut->height;
4018 pOut->mipChainSlice = pOut->numSlices;
4019
4020 if (pIn->numMipLevels > 1)
4021 {
4022 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4023 pIn->swizzleMode,
4024 pIn->bpp,
4025 pIn->width,
4026 pIn->height,
4027 pIn->numSlices,
4028 pOut->blockWidth,
4029 pOut->blockHeight,
4030 pOut->blockSlices,
4031 pIn->numMipLevels,
4032 pOut->pMipInfo);
4033
4034 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4035
4036 if (endingMipId == 0)
4037 {
4038 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4039 pIn->swizzleMode,
4040 pOut->blockWidth,
4041 pOut->blockHeight,
4042 pOut->blockSlices);
4043
4044 pOut->epitchIsHeight = TRUE;
4045 pOut->pitch = tailMaxDim.w;
4046 pOut->height = tailMaxDim.h;
4047 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4048 tailMaxDim.d : pIn->numSlices;
4049 pOut->mipChainInTail = TRUE;
4050 }
4051 else
4052 {
4053 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
4054 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4055
4056 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4057 pIn->swizzleMode,
4058 mip0WidthInBlk,
4059 mip0HeightInBlk,
4060 pOut->numSlices / pOut->blockSlices);
4061 if (majorMode == ADDR_MAJOR_Y)
4062 {
4063 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4064
4065 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4066 {
4067 mip1WidthInBlk++;
4068 }
4069
4070 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4071
4072 pOut->epitchIsHeight = FALSE;
4073 }
4074 else
4075 {
4076 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4077
4078 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4079 {
4080 mip1HeightInBlk++;
4081 }
4082
4083 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4084
4085 pOut->epitchIsHeight = TRUE;
4086 }
4087 }
4088
4089 if (pOut->pMipInfo != NULL)
4090 {
4091 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4092
4093 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4094 {
4095 Dim3d mipStartPos = {0};
4096 UINT_32 mipTailOffsetInBytes = 0;
4097
4098 mipStartPos = GetMipStartPos(pIn->resourceType,
4099 pIn->swizzleMode,
4100 pOut->pitch,
4101 pOut->height,
4102 pOut->numSlices,
4103 pOut->blockWidth,
4104 pOut->blockHeight,
4105 pOut->blockSlices,
4106 i,
4107 elementBytesLog2,
4108 &mipTailOffsetInBytes);
4109
4110 UINT_32 pitchInBlock =
4111 pOut->mipChainPitch / pOut->blockWidth;
4112 UINT_32 sliceInBlock =
4113 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4114 UINT_64 blockIndex =
4115 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4116 UINT_64 macroBlockOffset =
4117 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4118
4119 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4120 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
4121 }
4122 }
4123 }
4124 else if (pOut->pMipInfo != NULL)
4125 {
4126 pOut->pMipInfo[0].pitch = pOut->pitch;
4127 pOut->pMipInfo[0].height = pOut->height;
4128 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4129 pOut->pMipInfo[0].offset = 0;
4130 }
4131
4132 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4133 (pIn->bpp >> 3) * pIn->numFrags;
4134 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
4135 pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4136
4137 if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4138 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4139 (pIn->flags.texture == TRUE) &&
4140 (pIn->flags.noMetadata == FALSE) &&
4141 (pIn->flags.metaPipeUnaligned == FALSE))
4142 {
4143 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4144 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4145 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4146 // them, which may cause invalid metadata to be fetched.
4147 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4148 }
4149
4150 if (pIn->flags.prt)
4151 {
4152 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4153 }
4154 }
4155 }
4156
4157 return returnCode;
4158 }
4159
4160 /**
4161 ************************************************************************************************************************
4162 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4163 *
4164 * @brief
4165 * Internal function to calculate alignment for linear surface
4166 *
4167 * @return
4168 * ADDR_E_RETURNCODE
4169 ************************************************************************************************************************
4170 */
4171 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4172 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4173 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4174 ) const
4175 {
4176 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4177 UINT_32 pitch = 0;
4178 UINT_32 actualHeight = 0;
4179 UINT_32 elementBytes = pIn->bpp >> 3;
4180 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4181
4182 if (IsTex1d(pIn->resourceType))
4183 {
4184 if (pIn->height > 1)
4185 {
4186 returnCode = ADDR_INVALIDPARAMS;
4187 }
4188 else
4189 {
4190 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4191
4192 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4193 actualHeight = pIn->numMipLevels;
4194
4195 if (pIn->flags.prt == FALSE)
4196 {
4197 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4198 &pitch, &actualHeight);
4199 }
4200
4201 if (returnCode == ADDR_OK)
4202 {
4203 if (pOut->pMipInfo != NULL)
4204 {
4205 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4206 {
4207 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4208 pOut->pMipInfo[i].pitch = pitch;
4209 pOut->pMipInfo[i].height = 1;
4210 pOut->pMipInfo[i].depth = 1;
4211 }
4212 }
4213 }
4214 }
4215 }
4216 else
4217 {
4218 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4219 }
4220
4221 if ((pitch == 0) || (actualHeight == 0))
4222 {
4223 returnCode = ADDR_INVALIDPARAMS;
4224 }
4225
4226 if (returnCode == ADDR_OK)
4227 {
4228 pOut->pitch = pitch;
4229 pOut->height = pIn->height;
4230 pOut->numSlices = pIn->numSlices;
4231 pOut->mipChainPitch = pitch;
4232 pOut->mipChainHeight = actualHeight;
4233 pOut->mipChainSlice = pOut->numSlices;
4234 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4235 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4236 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4237 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4238 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4239 pOut->blockHeight = 1;
4240 pOut->blockSlices = 1;
4241 }
4242
4243 // Post calculation validate
4244 ADDR_ASSERT(pOut->sliceSize > 0);
4245
4246 return returnCode;
4247 }
4248
4249 /**
4250 ************************************************************************************************************************
4251 * Gfx9Lib::GetMipChainInfo
4252 *
4253 * @brief
4254 * Internal function to get out information about mip chain
4255 *
4256 * @return
4257 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4258 ************************************************************************************************************************
4259 */
4260 UINT_32 Gfx9Lib::GetMipChainInfo(
4261 AddrResourceType resourceType,
4262 AddrSwizzleMode swizzleMode,
4263 UINT_32 bpp,
4264 UINT_32 mip0Width,
4265 UINT_32 mip0Height,
4266 UINT_32 mip0Depth,
4267 UINT_32 blockWidth,
4268 UINT_32 blockHeight,
4269 UINT_32 blockDepth,
4270 UINT_32 numMipLevel,
4271 ADDR2_MIP_INFO* pMipInfo) const
4272 {
4273 const Dim3d tailMaxDim =
4274 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4275
4276 UINT_32 mipPitch = mip0Width;
4277 UINT_32 mipHeight = mip0Height;
4278 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4279 UINT_32 offset = 0;
4280 UINT_32 firstMipIdInTail = numMipLevel;
4281 BOOL_32 inTail = FALSE;
4282 BOOL_32 finalDim = FALSE;
4283 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4284 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4285
4286 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4287 {
4288 if (inTail)
4289 {
4290 if (finalDim == FALSE)
4291 {
4292 UINT_32 mipSize;
4293
4294 if (is3dThick)
4295 {
4296 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4297 }
4298 else
4299 {
4300 mipSize = mipPitch * mipHeight * (bpp >> 3);
4301 }
4302
4303 if (mipSize <= 256)
4304 {
4305 UINT_32 index = Log2(bpp >> 3);
4306
4307 if (is3dThick)
4308 {
4309 mipPitch = Block256_3dZ[index].w;
4310 mipHeight = Block256_3dZ[index].h;
4311 mipDepth = Block256_3dZ[index].d;
4312 }
4313 else
4314 {
4315 mipPitch = Block256_2d[index].w;
4316 mipHeight = Block256_2d[index].h;
4317 }
4318
4319 finalDim = TRUE;
4320 }
4321 }
4322 }
4323 else
4324 {
4325 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4326 mipPitch, mipHeight, mipDepth);
4327
4328 if (inTail)
4329 {
4330 firstMipIdInTail = mipId;
4331 mipPitch = tailMaxDim.w;
4332 mipHeight = tailMaxDim.h;
4333
4334 if (is3dThick)
4335 {
4336 mipDepth = tailMaxDim.d;
4337 }
4338 }
4339 else
4340 {
4341 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4342 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4343
4344 if (is3dThick)
4345 {
4346 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4347 }
4348 }
4349 }
4350
4351 if (pMipInfo != NULL)
4352 {
4353 pMipInfo[mipId].pitch = mipPitch;
4354 pMipInfo[mipId].height = mipHeight;
4355 pMipInfo[mipId].depth = mipDepth;
4356 pMipInfo[mipId].offset = offset;
4357 }
4358
4359 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4360
4361 if (finalDim)
4362 {
4363 if (is3dThin)
4364 {
4365 mipDepth = Max(mipDepth >> 1, 1u);
4366 }
4367 }
4368 else
4369 {
4370 mipPitch = Max(mipPitch >> 1, 1u);
4371 mipHeight = Max(mipHeight >> 1, 1u);
4372
4373 if (is3dThick || is3dThin)
4374 {
4375 mipDepth = Max(mipDepth >> 1, 1u);
4376 }
4377 }
4378 }
4379
4380 return firstMipIdInTail;
4381 }
4382
4383 /**
4384 ************************************************************************************************************************
4385 * Gfx9Lib::GetMetaMiptailInfo
4386 *
4387 * @brief
4388 * Get mip tail coordinate information.
4389 *
4390 * @return
4391 * N/A
4392 ************************************************************************************************************************
4393 */
4394 VOID Gfx9Lib::GetMetaMiptailInfo(
4395 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4396 Dim3d mipCoord, ///< [in] mip tail base coord
4397 UINT_32 numMipInTail, ///< [in] number of mips in tail
4398 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4399 ) const
4400 {
4401 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4402 UINT_32 mipWidth = pMetaBlkDim->w;
4403 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4404 UINT_32 mipDepth = pMetaBlkDim->d;
4405 UINT_32 minInc;
4406
4407 if (isThick)
4408 {
4409 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4410 }
4411 else if (pMetaBlkDim->h >= 1024)
4412 {
4413 minInc = 256;
4414 }
4415 else if (pMetaBlkDim->h == 512)
4416 {
4417 minInc = 128;
4418 }
4419 else
4420 {
4421 minInc = 64;
4422 }
4423
4424 UINT_32 blk32MipId = 0xFFFFFFFF;
4425
4426 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4427 {
4428 pInfo[mip].inMiptail = TRUE;
4429 pInfo[mip].startX = mipCoord.w;
4430 pInfo[mip].startY = mipCoord.h;
4431 pInfo[mip].startZ = mipCoord.d;
4432 pInfo[mip].width = mipWidth;
4433 pInfo[mip].height = mipHeight;
4434 pInfo[mip].depth = mipDepth;
4435
4436 if (mipWidth <= 32)
4437 {
4438 if (blk32MipId == 0xFFFFFFFF)
4439 {
4440 blk32MipId = mip;
4441 }
4442
4443 mipCoord.w = pInfo[blk32MipId].startX;
4444 mipCoord.h = pInfo[blk32MipId].startY;
4445 mipCoord.d = pInfo[blk32MipId].startZ;
4446
4447 switch (mip - blk32MipId)
4448 {
4449 case 0:
4450 mipCoord.w += 32; // 16x16
4451 break;
4452 case 1:
4453 mipCoord.h += 32; // 8x8
4454 break;
4455 case 2:
4456 mipCoord.h += 32; // 4x4
4457 mipCoord.w += 16;
4458 break;
4459 case 3:
4460 mipCoord.h += 32; // 2x2
4461 mipCoord.w += 32;
4462 break;
4463 case 4:
4464 mipCoord.h += 32; // 1x1
4465 mipCoord.w += 48;
4466 break;
4467 // The following are for BC/ASTC formats
4468 case 5:
4469 mipCoord.h += 48; // 1/2 x 1/2
4470 break;
4471 case 6:
4472 mipCoord.h += 48; // 1/4 x 1/4
4473 mipCoord.w += 16;
4474 break;
4475 case 7:
4476 mipCoord.h += 48; // 1/8 x 1/8
4477 mipCoord.w += 32;
4478 break;
4479 case 8:
4480 mipCoord.h += 48; // 1/16 x 1/16
4481 mipCoord.w += 48;
4482 break;
4483 default:
4484 ADDR_ASSERT_ALWAYS();
4485 break;
4486 }
4487
4488 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4489 mipHeight = mipWidth;
4490
4491 if (isThick)
4492 {
4493 mipDepth = mipWidth;
4494 }
4495 }
4496 else
4497 {
4498 if (mipWidth <= minInc)
4499 {
4500 // if we're below the minimal increment...
4501 if (isThick)
4502 {
4503 // For 3d, just go in z direction
4504 mipCoord.d += mipDepth;
4505 }
4506 else
4507 {
4508 // For 2d, first go across, then down
4509 if ((mipWidth * 2) == minInc)
4510 {
4511 // if we're 2 mips below, that's when we go back in x, and down in y
4512 mipCoord.w -= minInc;
4513 mipCoord.h += minInc;
4514 }
4515 else
4516 {
4517 // otherwise, just go across in x
4518 mipCoord.w += minInc;
4519 }
4520 }
4521 }
4522 else
4523 {
4524 // On even mip, go down, otherwise, go across
4525 if (mip & 1)
4526 {
4527 mipCoord.w += mipWidth;
4528 }
4529 else
4530 {
4531 mipCoord.h += mipHeight;
4532 }
4533 }
4534 // Divide the width by 2
4535 mipWidth >>= 1;
4536 // After the first mip in tail, the mip is always a square
4537 mipHeight = mipWidth;
4538 // ...or for 3d, a cube
4539 if (isThick)
4540 {
4541 mipDepth = mipWidth;
4542 }
4543 }
4544 }
4545 }
4546
4547 /**
4548 ************************************************************************************************************************
4549 * Gfx9Lib::GetMipStartPos
4550 *
4551 * @brief
4552 * Internal function to get out information about mip logical start position
4553 *
4554 * @return
4555 * logical start position in macro block width/heith/depth of one mip level within one slice
4556 ************************************************************************************************************************
4557 */
4558 Dim3d Gfx9Lib::GetMipStartPos(
4559 AddrResourceType resourceType,
4560 AddrSwizzleMode swizzleMode,
4561 UINT_32 width,
4562 UINT_32 height,
4563 UINT_32 depth,
4564 UINT_32 blockWidth,
4565 UINT_32 blockHeight,
4566 UINT_32 blockDepth,
4567 UINT_32 mipId,
4568 UINT_32 log2ElementBytes,
4569 UINT_32* pMipTailBytesOffset) const
4570 {
4571 Dim3d mipStartPos = {0};
4572 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4573
4574 // Report mip in tail if Mip0 is already in mip tail
4575 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4576 UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
4577 UINT_32 mipIndexInTail = mipId;
4578
4579 if (inMipTail == FALSE)
4580 {
4581 // Mip 0 dimension, unit in block
4582 UINT_32 mipWidthInBlk = width / blockWidth;
4583 UINT_32 mipHeightInBlk = height / blockHeight;
4584 UINT_32 mipDepthInBlk = depth / blockDepth;
4585 AddrMajorMode majorMode = GetMajorMode(resourceType,
4586 swizzleMode,
4587 mipWidthInBlk,
4588 mipHeightInBlk,
4589 mipDepthInBlk);
4590
4591 UINT_32 endingMip = mipId + 1;
4592
4593 for (UINT_32 i = 1; i <= mipId; i++)
4594 {
4595 if ((i == 1) || (i == 3))
4596 {
4597 if (majorMode == ADDR_MAJOR_Y)
4598 {
4599 mipStartPos.w += mipWidthInBlk;
4600 }
4601 else
4602 {
4603 mipStartPos.h += mipHeightInBlk;
4604 }
4605 }
4606 else
4607 {
4608 if (majorMode == ADDR_MAJOR_X)
4609 {
4610 mipStartPos.w += mipWidthInBlk;
4611 }
4612 else if (majorMode == ADDR_MAJOR_Y)
4613 {
4614 mipStartPos.h += mipHeightInBlk;
4615 }
4616 else
4617 {
4618 mipStartPos.d += mipDepthInBlk;
4619 }
4620 }
4621
4622 BOOL_32 inTail = FALSE;
4623
4624 if (IsThick(resourceType, swizzleMode))
4625 {
4626 UINT_32 dim = log2BlkSize % 3;
4627
4628 if (dim == 0)
4629 {
4630 inTail =
4631 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4632 }
4633 else if (dim == 1)
4634 {
4635 inTail =
4636 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4637 }
4638 else
4639 {
4640 inTail =
4641 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4642 }
4643 }
4644 else
4645 {
4646 if (log2BlkSize & 1)
4647 {
4648 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4649 }
4650 else
4651 {
4652 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4653 }
4654 }
4655
4656 if (inTail)
4657 {
4658 endingMip = i;
4659 break;
4660 }
4661
4662 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4663 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4664 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4665 }
4666
4667 if (mipId >= endingMip)
4668 {
4669 inMipTail = TRUE;
4670 mipIndexInTail = mipId - endingMip;
4671 }
4672 }
4673
4674 if (inMipTail)
4675 {
4676 UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4677 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4678 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4679 }
4680
4681 return mipStartPos;
4682 }
4683
4684 /**
4685 ************************************************************************************************************************
4686 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4687 *
4688 * @brief
4689 * Internal function to calculate address from coord for tiled swizzle surface
4690 *
4691 * @return
4692 * ADDR_E_RETURNCODE
4693 ************************************************************************************************************************
4694 */
4695 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4696 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4697 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4698 ) const
4699 {
4700 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4701 localIn.swizzleMode = pIn->swizzleMode;
4702 localIn.flags = pIn->flags;
4703 localIn.resourceType = pIn->resourceType;
4704 localIn.bpp = pIn->bpp;
4705 localIn.width = Max(pIn->unalignedWidth, 1u);
4706 localIn.height = Max(pIn->unalignedHeight, 1u);
4707 localIn.numSlices = Max(pIn->numSlices, 1u);
4708 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4709 localIn.numSamples = Max(pIn->numSamples, 1u);
4710 localIn.numFrags = Max(pIn->numFrags, 1u);
4711 if (localIn.numMipLevels <= 1)
4712 {
4713 localIn.pitchInElement = pIn->pitchInElement;
4714 }
4715
4716 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4717 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4718
4719 BOOL_32 valid = (returnCode == ADDR_OK) &&
4720 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4721 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4722 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4723
4724 if (valid)
4725 {
4726 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4727 Dim3d mipStartPos = {0};
4728 UINT_32 mipTailBytesOffset = 0;
4729
4730 if (pIn->numMipLevels > 1)
4731 {
4732 // Mip-map chain cannot be MSAA surface
4733 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4734
4735 mipStartPos = GetMipStartPos(pIn->resourceType,
4736 pIn->swizzleMode,
4737 localOut.pitch,
4738 localOut.height,
4739 localOut.numSlices,
4740 localOut.blockWidth,
4741 localOut.blockHeight,
4742 localOut.blockSlices,
4743 pIn->mipId,
4744 log2ElementBytes,
4745 &mipTailBytesOffset);
4746 }
4747
4748 UINT_32 interleaveOffset = 0;
4749 UINT_32 pipeBits = 0;
4750 UINT_32 pipeXor = 0;
4751 UINT_32 bankBits = 0;
4752 UINT_32 bankXor = 0;
4753
4754 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4755 {
4756 UINT_32 blockOffset = 0;
4757 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4758
4759 if (IsZOrderSwizzle(pIn->swizzleMode))
4760 {
4761 // Morton generation
4762 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4763 {
4764 UINT_32 totalLowBits = 6 - log2ElementBytes;
4765 UINT_32 mortBits = totalLowBits / 2;
4766 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4767 // Are 9 bits enough?
4768 UINT_32 highBitsValue =
4769 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4770 blockOffset = lowBitsValue | highBitsValue;
4771 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4772 }
4773 else
4774 {
4775 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4776 }
4777
4778 // Fill LSBs with sample bits
4779 if (pIn->numSamples > 1)
4780 {
4781 blockOffset *= pIn->numSamples;
4782 blockOffset |= pIn->sample;
4783 }
4784
4785 // Shift according to BytesPP
4786 blockOffset <<= log2ElementBytes;
4787 }
4788 else
4789 {
4790 // Micro block offset
4791 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4792 blockOffset = microBlockOffset;
4793
4794 // Micro block dimension
4795 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4796 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4797 // Morton generation, does 12 bit enough?
4798 blockOffset |=
4799 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4800
4801 // Sample bits start location
4802 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4803 // Join sample bits information to the highest Macro block bits
4804 if (IsNonPrtXor(pIn->swizzleMode))
4805 {
4806 // Non-prt-Xor : xor highest Macro block bits with sample bits
4807 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4808 }
4809 else
4810 {
4811 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4812 // after this op, the blockOffset only contains log2 Macro block size bits
4813 blockOffset %= (1 << sampleStart);
4814 blockOffset |= (pIn->sample << sampleStart);
4815 ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4816 }
4817 }
4818
4819 if (IsXor(pIn->swizzleMode))
4820 {
4821 // Mask off bits above Macro block bits to keep page synonyms working for prt
4822 if (IsPrt(pIn->swizzleMode))
4823 {
4824 blockOffset &= ((1 << log2BlkSize) - 1);
4825 }
4826
4827 // Preserve offset inside pipe interleave
4828 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4829 blockOffset >>= m_pipeInterleaveLog2;
4830
4831 // Pipe/Se xor bits
4832 pipeBits = GetPipeXorBits(log2BlkSize);
4833 // Pipe xor
4834 pipeXor = FoldXor2d(blockOffset, pipeBits);
4835 blockOffset >>= pipeBits;
4836
4837 // Bank xor bits
4838 bankBits = GetBankXorBits(log2BlkSize);
4839 // Bank Xor
4840 bankXor = FoldXor2d(blockOffset, bankBits);
4841 blockOffset >>= bankBits;
4842
4843 // Put all the part back together
4844 blockOffset <<= bankBits;
4845 blockOffset |= bankXor;
4846 blockOffset <<= pipeBits;
4847 blockOffset |= pipeXor;
4848 blockOffset <<= m_pipeInterleaveLog2;
4849 blockOffset |= interleaveOffset;
4850 }
4851
4852 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4853 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4854
4855 blockOffset |= mipTailBytesOffset;
4856
4857 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4858 {
4859 // Apply slice xor if not MSAA/PRT
4860 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4861 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4862 (m_pipeInterleaveLog2 + pipeBits));
4863 }
4864
4865 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4866 bankBits, pipeBits, &blockOffset);
4867
4868 blockOffset %= (1 << log2BlkSize);
4869
4870 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4871 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4872 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4873 UINT_64 macroBlockIndex =
4874 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4875 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4876 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4877
4878 pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
4879 }
4880 else
4881 {
4882 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4883
4884 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4885
4886 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4887 (pIn->y / microBlockDim.h),
4888 (pIn->slice / microBlockDim.d),
4889 8);
4890
4891 blockOffset <<= 10;
4892 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4893
4894 if (IsXor(pIn->swizzleMode))
4895 {
4896 // Mask off bits above Macro block bits to keep page synonyms working for prt
4897 if (IsPrt(pIn->swizzleMode))
4898 {
4899 blockOffset &= ((1 << log2BlkSize) - 1);
4900 }
4901
4902 // Preserve offset inside pipe interleave
4903 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4904 blockOffset >>= m_pipeInterleaveLog2;
4905
4906 // Pipe/Se xor bits
4907 pipeBits = GetPipeXorBits(log2BlkSize);
4908 // Pipe xor
4909 pipeXor = FoldXor3d(blockOffset, pipeBits);
4910 blockOffset >>= pipeBits;
4911
4912 // Bank xor bits
4913 bankBits = GetBankXorBits(log2BlkSize);
4914 // Bank Xor
4915 bankXor = FoldXor3d(blockOffset, bankBits);
4916 blockOffset >>= bankBits;
4917
4918 // Put all the part back together
4919 blockOffset <<= bankBits;
4920 blockOffset |= bankXor;
4921 blockOffset <<= pipeBits;
4922 blockOffset |= pipeXor;
4923 blockOffset <<= m_pipeInterleaveLog2;
4924 blockOffset |= interleaveOffset;
4925 }
4926
4927 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4928 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4929 blockOffset |= mipTailBytesOffset;
4930
4931 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4932 bankBits, pipeBits, &blockOffset);
4933
4934 blockOffset %= (1 << log2BlkSize);
4935
4936 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
4937 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4938 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4939
4940 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4941 UINT_32 sliceSizeInBlock =
4942 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4943 UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4944
4945 pOut->addr = blockOffset | (blockIndex << log2BlkSize);
4946 }
4947 }
4948 else
4949 {
4950 returnCode = ADDR_INVALIDPARAMS;
4951 }
4952
4953 return returnCode;
4954 }
4955
4956 /**
4957 ************************************************************************************************************************
4958 * Gfx9Lib::ComputeSurfaceInfoLinear
4959 *
4960 * @brief
4961 * Internal function to calculate padding for linear swizzle 2D/3D surface
4962 *
4963 * @return
4964 * N/A
4965 ************************************************************************************************************************
4966 */
4967 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4968 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
4969 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
4970 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
4971 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
4972 ) const
4973 {
4974 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4975
4976 UINT_32 elementBytes = pIn->bpp >> 3;
4977 UINT_32 pitchAlignInElement = 0;
4978
4979 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4980 {
4981 ADDR_ASSERT(pIn->numMipLevels <= 1);
4982 ADDR_ASSERT(pIn->numSlices <= 1);
4983 pitchAlignInElement = 1;
4984 }
4985 else
4986 {
4987 pitchAlignInElement = (256 / elementBytes);
4988 }
4989
4990 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
4991 UINT_32 slice0PaddedHeight = pIn->height;
4992
4993 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4994 &mipChainWidth, &slice0PaddedHeight);
4995
4996 if (returnCode == ADDR_OK)
4997 {
4998 UINT_32 mipChainHeight = 0;
4999 UINT_32 mipHeight = pIn->height;
5000 UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
5001
5002 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
5003 {
5004 if (pMipInfo != NULL)
5005 {
5006 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
5007 pMipInfo[i].pitch = mipChainWidth;
5008 pMipInfo[i].height = mipHeight;
5009 pMipInfo[i].depth = mipDepth;
5010 }
5011
5012 mipChainHeight += mipHeight;
5013 mipHeight = RoundHalf(mipHeight);
5014 mipHeight = Max(mipHeight, 1u);
5015 }
5016
5017 *pMipmap0PaddedWidth = mipChainWidth;
5018 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
5019 }
5020
5021 return returnCode;
5022 }
5023
5024 /**
5025 ************************************************************************************************************************
5026 * Gfx9Lib::ComputeThinBlockDimension
5027 *
5028 * @brief
5029 * Internal function to get thin block width/height/depth in element from surface input params.
5030 *
5031 * @return
5032 * N/A
5033 ************************************************************************************************************************
5034 */
5035 VOID Gfx9Lib::ComputeThinBlockDimension(
5036 UINT_32* pWidth,
5037 UINT_32* pHeight,
5038 UINT_32* pDepth,
5039 UINT_32 bpp,
5040 UINT_32 numSamples,
5041 AddrResourceType resourceType,
5042 AddrSwizzleMode swizzleMode) const
5043 {
5044 ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5045
5046 const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
5047 const UINT_32 eleBytes = bpp >> 3;
5048 const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5049 const UINT_32 log2blkSizeIn256B = log2BlkSize - 8;
5050 const UINT_32 widthAmp = log2blkSizeIn256B / 2;
5051 const UINT_32 heightAmp = log2blkSizeIn256B - widthAmp;
5052
5053 ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5054
5055 *pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5056 *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5057 *pDepth = 1;
5058
5059 if (numSamples > 1)
5060 {
5061 const UINT_32 log2sample = Log2(numSamples);
5062 const UINT_32 q = log2sample >> 1;
5063 const UINT_32 r = log2sample & 1;
5064
5065 if (log2BlkSize & 1)
5066 {
5067 *pWidth >>= q;
5068 *pHeight >>= (q + r);
5069 }
5070 else
5071 {
5072 *pWidth >>= (q + r);
5073 *pHeight >>= q;
5074 }
5075 }
5076 }
5077
5078 } // V2
5079 } // Addr