amd: update addrlib
[mesa.git] / src / amd / addrlib / src / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 #include "util/macros.h"
41
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
44
45 namespace Addr
46 {
47
48 /**
49 ************************************************************************************************************************
50 * Gfx9HwlInit
51 *
52 * @brief
53 * Creates an Gfx9Lib object.
54 *
55 * @return
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
58 */
59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
60 {
61 return V2::Gfx9Lib::CreateObj(pClient);
62 }
63
64 namespace V2
65 {
66
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
70
71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
77
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
82
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
87
88 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
89 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
90 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
91 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
92
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
97
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
102
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
107
108 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
109 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
110 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
111 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
113 };
114
115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
116 8, 6, 5, 4, 3, 2, 1, 0};
117
118 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
119
120 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
121
122 /**
123 ************************************************************************************************************************
124 * Gfx9Lib::Gfx9Lib
125 *
126 * @brief
127 * Constructor
128 *
129 ************************************************************************************************************************
130 */
131 Gfx9Lib::Gfx9Lib(const Client* pClient)
132 :
133 Lib(pClient),
134 m_numEquations(0)
135 {
136 m_class = AI_ADDRLIB;
137 memset(&m_settings, 0, sizeof(m_settings));
138 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
139 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
140 m_metaEqOverrideIndex = 0;
141 }
142
143 /**
144 ************************************************************************************************************************
145 * Gfx9Lib::~Gfx9Lib
146 *
147 * @brief
148 * Destructor
149 ************************************************************************************************************************
150 */
151 Gfx9Lib::~Gfx9Lib()
152 {
153 }
154
155 /**
156 ************************************************************************************************************************
157 * Gfx9Lib::HwlComputeHtileInfo
158 *
159 * @brief
160 * Interface function stub of AddrComputeHtilenfo
161 *
162 * @return
163 * ADDR_E_RETURNCODE
164 ************************************************************************************************************************
165 */
166 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
167 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
168 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
169 ) const
170 {
171 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
172 pIn->swizzleMode);
173
174 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
175
176 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
177
178 if ((numPipeTotal == 1) && (numRbTotal == 1))
179 {
180 numCompressBlkPerMetaBlkLog2 = 10;
181 }
182 else
183 {
184 if (m_settings.applyAliasFix)
185 {
186 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
187 }
188 else
189 {
190 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
191 }
192 }
193
194 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
195
196 Dim3d metaBlkDim = {8, 8, 1};
197 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
198 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
199 UINT_32 heightAmp = totalAmpBits - widthAmp;
200 metaBlkDim.w <<= widthAmp;
201 metaBlkDim.h <<= heightAmp;
202
203 #if DEBUG
204 Dim3d metaBlkDimDbg = {8, 8, 1};
205 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
206 {
207 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
208 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
209 {
210 metaBlkDimDbg.h <<= 1;
211 }
212 else
213 {
214 metaBlkDimDbg.w <<= 1;
215 }
216 }
217 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
218 #endif
219
220 UINT_32 numMetaBlkX;
221 UINT_32 numMetaBlkY;
222 UINT_32 numMetaBlkZ;
223
224 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
225 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
226 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
227
228 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
229 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
230
231 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
232 {
233 align *= (numPipeTotal >> 1);
234 }
235
236 align = Max(align, metaBlkSize);
237
238 if (m_settings.metaBaseAlignFix)
239 {
240 align = Max(align, GetBlockSize(pIn->swizzleMode));
241 }
242
243 if (m_settings.htileAlignFix)
244 {
245 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
246 const INT_32 htileCachelineSizeLog2 = 11;
247 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
248
249 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
250
251 align <<= rbMaskPadding;
252 }
253
254 pOut->pitch = numMetaBlkX * metaBlkDim.w;
255 pOut->height = numMetaBlkY * metaBlkDim.h;
256 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
257
258 pOut->metaBlkWidth = metaBlkDim.w;
259 pOut->metaBlkHeight = metaBlkDim.h;
260 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
261
262 pOut->baseAlign = align;
263 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
264
265 return ADDR_OK;
266 }
267
268 /**
269 ************************************************************************************************************************
270 * Gfx9Lib::HwlComputeCmaskInfo
271 *
272 * @brief
273 * Interface function stub of AddrComputeCmaskInfo
274 *
275 * @return
276 * ADDR_E_RETURNCODE
277 ************************************************************************************************************************
278 */
279 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
280 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
281 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
282 ) const
283 {
284 // TODO: Clarify with AddrLib team
285 // ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
286
287 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
288 pIn->swizzleMode);
289
290 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
291
292 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
293
294 if ((numPipeTotal == 1) && (numRbTotal == 1))
295 {
296 numCompressBlkPerMetaBlkLog2 = 13;
297 }
298 else
299 {
300 if (m_settings.applyAliasFix)
301 {
302 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
303 }
304 else
305 {
306 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
307 }
308
309 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
310 }
311
312 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
313
314 Dim2d metaBlkDim = {8, 8};
315 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
316 UINT_32 heightAmp = totalAmpBits >> 1;
317 UINT_32 widthAmp = totalAmpBits - heightAmp;
318 metaBlkDim.w <<= widthAmp;
319 metaBlkDim.h <<= heightAmp;
320
321 #if DEBUG
322 Dim2d metaBlkDimDbg = {8, 8};
323 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
324 {
325 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
326 {
327 metaBlkDimDbg.h <<= 1;
328 }
329 else
330 {
331 metaBlkDimDbg.w <<= 1;
332 }
333 }
334 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
335 #endif
336
337 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
338 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
339 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
340
341 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
342
343 if (m_settings.metaBaseAlignFix)
344 {
345 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
346 }
347
348 pOut->pitch = numMetaBlkX * metaBlkDim.w;
349 pOut->height = numMetaBlkY * metaBlkDim.h;
350 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
351 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
352 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
353
354 pOut->metaBlkWidth = metaBlkDim.w;
355 pOut->metaBlkHeight = metaBlkDim.h;
356
357 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
358
359 return ADDR_OK;
360 }
361
362 /**
363 ************************************************************************************************************************
364 * Gfx9Lib::GetMetaMipInfo
365 *
366 * @brief
367 * Get meta mip info
368 *
369 * @return
370 * N/A
371 ************************************************************************************************************************
372 */
373 VOID Gfx9Lib::GetMetaMipInfo(
374 UINT_32 numMipLevels, ///< [in] number of mip levels
375 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
376 BOOL_32 dataThick, ///< [in] data surface is thick
377 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
378 UINT_32 mip0Width, ///< [in] mip0 width
379 UINT_32 mip0Height, ///< [in] mip0 height
380 UINT_32 mip0Depth, ///< [in] mip0 depth
381 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
382 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
383 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
384 const
385 {
386 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
387 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
388 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
389 UINT_32 tailWidth = pMetaBlkDim->w;
390 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
391 UINT_32 tailDepth = pMetaBlkDim->d;
392 BOOL_32 inTail = FALSE;
393 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
394
395 if (numMipLevels > 1)
396 {
397 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
398 {
399 // Z major
400 major = ADDR_MAJOR_Z;
401 }
402 else if (numMetaBlkX >= numMetaBlkY)
403 {
404 // X major
405 major = ADDR_MAJOR_X;
406 }
407 else
408 {
409 // Y major
410 major = ADDR_MAJOR_Y;
411 }
412
413 inTail = ((mip0Width <= tailWidth) &&
414 (mip0Height <= tailHeight) &&
415 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
416
417 if (inTail == FALSE)
418 {
419 UINT_32 orderLimit;
420 UINT_32 *pMipDim;
421 UINT_32 *pOrderDim;
422
423 if (major == ADDR_MAJOR_Z)
424 {
425 // Z major
426 pMipDim = &numMetaBlkY;
427 pOrderDim = &numMetaBlkZ;
428 orderLimit = 4;
429 }
430 else if (major == ADDR_MAJOR_X)
431 {
432 // X major
433 pMipDim = &numMetaBlkY;
434 pOrderDim = &numMetaBlkX;
435 orderLimit = 4;
436 }
437 else
438 {
439 // Y major
440 pMipDim = &numMetaBlkX;
441 pOrderDim = &numMetaBlkY;
442 orderLimit = 2;
443 }
444
445 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
446 {
447 *pMipDim += 2;
448 }
449 else
450 {
451 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
452 }
453 }
454 }
455
456 if (pInfo != NULL)
457 {
458 UINT_32 mipWidth = mip0Width;
459 UINT_32 mipHeight = mip0Height;
460 UINT_32 mipDepth = mip0Depth;
461 Dim3d mipCoord = {0};
462
463 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
464 {
465 if (inTail)
466 {
467 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
468 pMetaBlkDim);
469 break;
470 }
471 else
472 {
473 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
474 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
475 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
476
477 pInfo[mip].inMiptail = FALSE;
478 pInfo[mip].startX = mipCoord.w;
479 pInfo[mip].startY = mipCoord.h;
480 pInfo[mip].startZ = mipCoord.d;
481 pInfo[mip].width = mipWidth;
482 pInfo[mip].height = mipHeight;
483 pInfo[mip].depth = dataThick ? mipDepth : 1;
484
485 if ((mip >= 3) || (mip & 1))
486 {
487 switch (major)
488 {
489 case ADDR_MAJOR_X:
490 mipCoord.w += mipWidth;
491 break;
492 case ADDR_MAJOR_Y:
493 mipCoord.h += mipHeight;
494 break;
495 case ADDR_MAJOR_Z:
496 mipCoord.d += mipDepth;
497 break;
498 default:
499 break;
500 }
501 }
502 else
503 {
504 switch (major)
505 {
506 case ADDR_MAJOR_X:
507 mipCoord.h += mipHeight;
508 break;
509 case ADDR_MAJOR_Y:
510 mipCoord.w += mipWidth;
511 break;
512 case ADDR_MAJOR_Z:
513 mipCoord.h += mipHeight;
514 break;
515 default:
516 break;
517 }
518 }
519
520 mipWidth = Max(mipWidth >> 1, 1u);
521 mipHeight = Max(mipHeight >> 1, 1u);
522 mipDepth = Max(mipDepth >> 1, 1u);
523
524 inTail = ((mipWidth <= tailWidth) &&
525 (mipHeight <= tailHeight) &&
526 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
527 }
528 }
529 }
530
531 *pNumMetaBlkX = numMetaBlkX;
532 *pNumMetaBlkY = numMetaBlkY;
533 *pNumMetaBlkZ = numMetaBlkZ;
534 }
535
536 /**
537 ************************************************************************************************************************
538 * Gfx9Lib::HwlComputeDccInfo
539 *
540 * @brief
541 * Interface function to compute DCC key info
542 *
543 * @return
544 * ADDR_E_RETURNCODE
545 ************************************************************************************************************************
546 */
547 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
548 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
549 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
550 ) const
551 {
552 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
553 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
554 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
555
556 if (dataLinear)
557 {
558 metaLinear = TRUE;
559 }
560 else if (metaLinear == TRUE)
561 {
562 pipeAligned = FALSE;
563 }
564
565 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
566
567 if (metaLinear)
568 {
569 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
570 ADDR_ASSERT_ALWAYS();
571
572 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
573 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
574 }
575 else
576 {
577 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
578
579 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
580
581 UINT_32 numFrags = Max(pIn->numFrags, 1u);
582 UINT_32 numSlices = Max(pIn->numSlices, 1u);
583
584 minMetaBlkSize /= numFrags;
585
586 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
587
588 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
589
590 if ((numPipeTotal > 1) || (numRbTotal > 1))
591 {
592 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
593
594 numCompressBlkPerMetaBlk =
595 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
596
597 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
598 {
599 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
600 }
601 }
602
603 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
604 Dim3d metaBlkDim = compressBlkDim;
605
606 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
607 {
608 if ((metaBlkDim.h < metaBlkDim.w) ||
609 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
610 {
611 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
612 {
613 metaBlkDim.h <<= 1;
614 }
615 else
616 {
617 metaBlkDim.d <<= 1;
618 }
619 }
620 else
621 {
622 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
623 {
624 metaBlkDim.w <<= 1;
625 }
626 else
627 {
628 metaBlkDim.d <<= 1;
629 }
630 }
631 }
632
633 UINT_32 numMetaBlkX;
634 UINT_32 numMetaBlkY;
635 UINT_32 numMetaBlkZ;
636
637 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
638 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
639 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
640
641 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
642
643 if (numFrags > m_maxCompFrag)
644 {
645 sizeAlign *= (numFrags / m_maxCompFrag);
646 }
647
648 if (m_settings.metaBaseAlignFix)
649 {
650 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
651 }
652
653 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
654 numCompressBlkPerMetaBlk * numFrags;
655 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
656 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
657
658 pOut->pitch = numMetaBlkX * metaBlkDim.w;
659 pOut->height = numMetaBlkY * metaBlkDim.h;
660 pOut->depth = numMetaBlkZ * metaBlkDim.d;
661
662 pOut->compressBlkWidth = compressBlkDim.w;
663 pOut->compressBlkHeight = compressBlkDim.h;
664 pOut->compressBlkDepth = compressBlkDim.d;
665
666 pOut->metaBlkWidth = metaBlkDim.w;
667 pOut->metaBlkHeight = metaBlkDim.h;
668 pOut->metaBlkDepth = metaBlkDim.d;
669
670 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
671 pOut->fastClearSizePerSlice =
672 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
673 }
674
675 return ADDR_OK;
676 }
677
678 /**
679 ************************************************************************************************************************
680 * Gfx9Lib::HwlComputeMaxBaseAlignments
681 *
682 * @brief
683 * Gets maximum alignments
684 * @return
685 * maximum alignments
686 ************************************************************************************************************************
687 */
688 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
689 {
690 return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
691 }
692
693 /**
694 ************************************************************************************************************************
695 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
696 *
697 * @brief
698 * Gets maximum alignments for metadata
699 * @return
700 * maximum alignments for metadata
701 ************************************************************************************************************************
702 */
703 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
704 {
705 // Max base alignment for Htile
706 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
707 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
708
709 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
710 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
711 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
712 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
713
714 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
715
716 if (maxNumPipeTotal > 2)
717 {
718 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
719 }
720
721 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
722
723 if (m_settings.metaBaseAlignFix)
724 {
725 maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
726 }
727
728 if (m_settings.htileAlignFix)
729 {
730 maxBaseAlignHtile *= maxNumPipeTotal;
731 }
732
733 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
734
735 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
736 UINT_32 maxBaseAlignDcc3D = 65536;
737
738 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
739 {
740 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
741 }
742
743 // Max base alignment for Msaa Dcc
744 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
745
746 if (m_settings.metaBaseAlignFix)
747 {
748 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
749 }
750
751 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
752 }
753
754 /**
755 ************************************************************************************************************************
756 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
757 *
758 * @brief
759 * Interface function stub of AddrComputeCmaskAddrFromCoord
760 *
761 * @return
762 * ADDR_E_RETURNCODE
763 ************************************************************************************************************************
764 */
765 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
766 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
767 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
768 {
769 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
770 input.size = sizeof(input);
771 input.cMaskFlags = pIn->cMaskFlags;
772 input.colorFlags = pIn->colorFlags;
773 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
774 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
775 input.numSlices = Max(pIn->numSlices, 1u);
776 input.swizzleMode = pIn->swizzleMode;
777 input.resourceType = pIn->resourceType;
778
779 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
780 output.size = sizeof(output);
781
782 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
783
784 if (returnCode == ADDR_OK)
785 {
786 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
787 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
788 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
789 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
790
791 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
792 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
793 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
794
795 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
796
797 UINT_32 xb = pIn->x / output.metaBlkWidth;
798 UINT_32 yb = pIn->y / output.metaBlkHeight;
799 UINT_32 zb = pIn->slice;
800
801 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
802 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
803 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
804
805 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
806
807 pOut->addr = address >> 1;
808 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
809
810 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
811 pIn->swizzleMode);
812
813 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
814
815 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
816 }
817
818 return returnCode;
819 }
820
821 /**
822 ************************************************************************************************************************
823 * Gfx9Lib::HwlComputeHtileAddrFromCoord
824 *
825 * @brief
826 * Interface function stub of AddrComputeHtileAddrFromCoord
827 *
828 * @return
829 * ADDR_E_RETURNCODE
830 ************************************************************************************************************************
831 */
832 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
833 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
834 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
835 {
836 ADDR_E_RETURNCODE returnCode = ADDR_OK;
837
838 if (pIn->numMipLevels > 1)
839 {
840 returnCode = ADDR_NOTIMPLEMENTED;
841 }
842 else
843 {
844 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
845 input.size = sizeof(input);
846 input.hTileFlags = pIn->hTileFlags;
847 input.depthFlags = pIn->depthflags;
848 input.swizzleMode = pIn->swizzleMode;
849 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
850 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
851 input.numSlices = Max(pIn->numSlices, 1u);
852 input.numMipLevels = Max(pIn->numMipLevels, 1u);
853
854 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
855 output.size = sizeof(output);
856
857 returnCode = ComputeHtileInfo(&input, &output);
858
859 if (returnCode == ADDR_OK)
860 {
861 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
862 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
863 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
864 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
865
866 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
867 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
868 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
869
870 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
871
872 UINT_32 xb = pIn->x / output.metaBlkWidth;
873 UINT_32 yb = pIn->y / output.metaBlkHeight;
874 UINT_32 zb = pIn->slice;
875
876 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
877 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
878 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
879
880 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
881
882 pOut->addr = address >> 1;
883
884 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
885 pIn->swizzleMode);
886
887 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
888
889 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
890 }
891 }
892
893 return returnCode;
894 }
895
896 /**
897 ************************************************************************************************************************
898 * Gfx9Lib::HwlComputeHtileCoordFromAddr
899 *
900 * @brief
901 * Interface function stub of AddrComputeHtileCoordFromAddr
902 *
903 * @return
904 * ADDR_E_RETURNCODE
905 ************************************************************************************************************************
906 */
907 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
908 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
909 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
910 {
911 ADDR_E_RETURNCODE returnCode = ADDR_OK;
912
913 if (pIn->numMipLevels > 1)
914 {
915 returnCode = ADDR_NOTIMPLEMENTED;
916 }
917 else
918 {
919 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
920 input.size = sizeof(input);
921 input.hTileFlags = pIn->hTileFlags;
922 input.swizzleMode = pIn->swizzleMode;
923 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
924 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
925 input.numSlices = Max(pIn->numSlices, 1u);
926 input.numMipLevels = Max(pIn->numMipLevels, 1u);
927
928 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
929 output.size = sizeof(output);
930
931 returnCode = ComputeHtileInfo(&input, &output);
932
933 if (returnCode == ADDR_OK)
934 {
935 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
936 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
937 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
938 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
939
940 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
941 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
942 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
943
944 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
945
946 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
947 pIn->swizzleMode);
948
949 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
950
951 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
952
953 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
954 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
955
956 UINT_32 x, y, z, s, m;
957 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
958
959 pOut->slice = m / sliceSizeInBlock;
960 pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
961 pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x;
962 }
963 }
964
965 return returnCode;
966 }
967
968 /**
969 ************************************************************************************************************************
970 * Gfx9Lib::HwlComputeDccAddrFromCoord
971 *
972 * @brief
973 * Interface function stub of AddrComputeDccAddrFromCoord
974 *
975 * @return
976 * ADDR_E_RETURNCODE
977 ************************************************************************************************************************
978 */
979 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
980 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
981 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
982 {
983 ADDR_E_RETURNCODE returnCode = ADDR_OK;
984
985 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
986 {
987 returnCode = ADDR_NOTIMPLEMENTED;
988 }
989 else
990 {
991 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
992 input.size = sizeof(input);
993 input.dccKeyFlags = pIn->dccKeyFlags;
994 input.colorFlags = pIn->colorFlags;
995 input.swizzleMode = pIn->swizzleMode;
996 input.resourceType = pIn->resourceType;
997 input.bpp = pIn->bpp;
998 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
999 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
1000 input.numSlices = Max(pIn->numSlices, 1u);
1001 input.numFrags = Max(pIn->numFrags, 1u);
1002 input.numMipLevels = Max(pIn->numMipLevels, 1u);
1003
1004 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
1005 output.size = sizeof(output);
1006
1007 returnCode = ComputeDccInfo(&input, &output);
1008
1009 if (returnCode == ADDR_OK)
1010 {
1011 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1012 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
1013 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
1014 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1015 UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
1016 UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
1017 UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
1018 UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
1019
1020 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1021 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1022 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1023 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1024
1025 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1026
1027 UINT_32 xb = pIn->x / output.metaBlkWidth;
1028 UINT_32 yb = pIn->y / output.metaBlkHeight;
1029 UINT_32 zb = pIn->slice / output.metaBlkDepth;
1030
1031 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
1032 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1033 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1034
1035 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
1036
1037 pOut->addr = address >> 1;
1038
1039 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1040 pIn->swizzleMode);
1041
1042 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1043
1044 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1045 }
1046 }
1047
1048 return returnCode;
1049 }
1050
1051 /**
1052 ************************************************************************************************************************
1053 * Gfx9Lib::HwlInitGlobalParams
1054 *
1055 * @brief
1056 * Initializes global parameters
1057 *
1058 * @return
1059 * TRUE if all settings are valid
1060 *
1061 ************************************************************************************************************************
1062 */
1063 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1064 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1065 {
1066 BOOL_32 valid = TRUE;
1067
1068 if (m_settings.isArcticIsland)
1069 {
1070 GB_ADDR_CONFIG gbAddrConfig;
1071
1072 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1073
1074 // These values are copied from CModel code
1075 switch (gbAddrConfig.bits.NUM_PIPES)
1076 {
1077 case ADDR_CONFIG_1_PIPE:
1078 m_pipes = 1;
1079 m_pipesLog2 = 0;
1080 break;
1081 case ADDR_CONFIG_2_PIPE:
1082 m_pipes = 2;
1083 m_pipesLog2 = 1;
1084 break;
1085 case ADDR_CONFIG_4_PIPE:
1086 m_pipes = 4;
1087 m_pipesLog2 = 2;
1088 break;
1089 case ADDR_CONFIG_8_PIPE:
1090 m_pipes = 8;
1091 m_pipesLog2 = 3;
1092 break;
1093 case ADDR_CONFIG_16_PIPE:
1094 m_pipes = 16;
1095 m_pipesLog2 = 4;
1096 break;
1097 case ADDR_CONFIG_32_PIPE:
1098 m_pipes = 32;
1099 m_pipesLog2 = 5;
1100 break;
1101 default:
1102 ADDR_ASSERT_ALWAYS();
1103 break;
1104 }
1105
1106 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1107 {
1108 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1109 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1110 m_pipeInterleaveLog2 = 8;
1111 break;
1112 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1113 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1114 m_pipeInterleaveLog2 = 9;
1115 break;
1116 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1117 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1118 m_pipeInterleaveLog2 = 10;
1119 break;
1120 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1121 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1122 m_pipeInterleaveLog2 = 11;
1123 break;
1124 default:
1125 ADDR_ASSERT_ALWAYS();
1126 break;
1127 }
1128
1129 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1130 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1131 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1132
1133 switch (gbAddrConfig.bits.NUM_BANKS)
1134 {
1135 case ADDR_CONFIG_1_BANK:
1136 m_banks = 1;
1137 m_banksLog2 = 0;
1138 break;
1139 case ADDR_CONFIG_2_BANK:
1140 m_banks = 2;
1141 m_banksLog2 = 1;
1142 break;
1143 case ADDR_CONFIG_4_BANK:
1144 m_banks = 4;
1145 m_banksLog2 = 2;
1146 break;
1147 case ADDR_CONFIG_8_BANK:
1148 m_banks = 8;
1149 m_banksLog2 = 3;
1150 break;
1151 case ADDR_CONFIG_16_BANK:
1152 m_banks = 16;
1153 m_banksLog2 = 4;
1154 break;
1155 default:
1156 ADDR_ASSERT_ALWAYS();
1157 break;
1158 }
1159
1160 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1161 {
1162 case ADDR_CONFIG_1_SHADER_ENGINE:
1163 m_se = 1;
1164 m_seLog2 = 0;
1165 break;
1166 case ADDR_CONFIG_2_SHADER_ENGINE:
1167 m_se = 2;
1168 m_seLog2 = 1;
1169 break;
1170 case ADDR_CONFIG_4_SHADER_ENGINE:
1171 m_se = 4;
1172 m_seLog2 = 2;
1173 break;
1174 case ADDR_CONFIG_8_SHADER_ENGINE:
1175 m_se = 8;
1176 m_seLog2 = 3;
1177 break;
1178 default:
1179 ADDR_ASSERT_ALWAYS();
1180 break;
1181 }
1182
1183 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1184 {
1185 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1186 m_rbPerSe = 1;
1187 m_rbPerSeLog2 = 0;
1188 break;
1189 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1190 m_rbPerSe = 2;
1191 m_rbPerSeLog2 = 1;
1192 break;
1193 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1194 m_rbPerSe = 4;
1195 m_rbPerSeLog2 = 2;
1196 break;
1197 default:
1198 ADDR_ASSERT_ALWAYS();
1199 break;
1200 }
1201
1202 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1203 {
1204 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1205 m_maxCompFrag = 1;
1206 m_maxCompFragLog2 = 0;
1207 break;
1208 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1209 m_maxCompFrag = 2;
1210 m_maxCompFragLog2 = 1;
1211 break;
1212 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1213 m_maxCompFrag = 4;
1214 m_maxCompFragLog2 = 2;
1215 break;
1216 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1217 m_maxCompFrag = 8;
1218 m_maxCompFragLog2 = 3;
1219 break;
1220 default:
1221 ADDR_ASSERT_ALWAYS();
1222 break;
1223 }
1224
1225 m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1226 ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1227 ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1228 m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1229
1230 if ((m_rbPerSeLog2 == 1) &&
1231 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1232 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1233 {
1234 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1235 ADDR_ASSERT(m_settings.isRaven == FALSE);
1236
1237 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1238
1239 if (m_settings.isVega12)
1240 {
1241 m_settings.htileCacheRbConflict = 1;
1242 }
1243 }
1244 }
1245 else
1246 {
1247 valid = FALSE;
1248 ADDR_NOT_IMPLEMENTED();
1249 }
1250
1251 if (valid)
1252 {
1253 InitEquationTable();
1254 }
1255
1256 return valid;
1257 }
1258
1259 /**
1260 ************************************************************************************************************************
1261 * Gfx9Lib::HwlConvertChipFamily
1262 *
1263 * @brief
1264 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1265 * @return
1266 * ChipFamily
1267 ************************************************************************************************************************
1268 */
1269 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1270 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1271 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1272 {
1273 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1274
1275 switch (uChipFamily)
1276 {
1277 case FAMILY_AI:
1278 m_settings.isArcticIsland = 1;
1279 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1280 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1281 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1282 m_settings.isDce12 = 1;
1283
1284 if (m_settings.isVega10 == 0)
1285 {
1286 m_settings.htileAlignFix = 1;
1287 m_settings.applyAliasFix = 1;
1288 }
1289
1290 m_settings.metaBaseAlignFix = 1;
1291
1292 m_settings.depthPipeXorDisable = 1;
1293 break;
1294 case FAMILY_RV:
1295 m_settings.isArcticIsland = 1;
1296
1297 if (ASICREV_IS_RAVEN(uChipRevision))
1298 {
1299 m_settings.isRaven = 1;
1300
1301 m_settings.depthPipeXorDisable = 1;
1302 }
1303
1304 if (ASICREV_IS_RAVEN2(uChipRevision))
1305 {
1306 m_settings.isRaven = 1;
1307 }
1308
1309 if (m_settings.isRaven == 0)
1310 {
1311 m_settings.htileAlignFix = 1;
1312 m_settings.applyAliasFix = 1;
1313 }
1314
1315 m_settings.isDcn1 = m_settings.isRaven;
1316
1317 m_settings.metaBaseAlignFix = 1;
1318 break;
1319
1320 default:
1321 ADDR_ASSERT(!"This should be a Fusion");
1322 break;
1323 }
1324
1325 return family;
1326 }
1327
1328 /**
1329 ************************************************************************************************************************
1330 * Gfx9Lib::InitRbEquation
1331 *
1332 * @brief
1333 * Init RB equation
1334 * @return
1335 * N/A
1336 ************************************************************************************************************************
1337 */
1338 VOID Gfx9Lib::GetRbEquation(
1339 CoordEq* pRbEq, ///< [out] rb equation
1340 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1341 UINT_32 numSeLog2) ///< [in] number of shader engine
1342 const
1343 {
1344 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1345 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1346 Coordinate cx('x', rbRegion);
1347 Coordinate cy('y', rbRegion);
1348
1349 UINT_32 start = 0;
1350 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1351
1352 // Clear the rb equation
1353 pRbEq->resize(0);
1354 pRbEq->resize(numRbTotalLog2);
1355
1356 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1357 {
1358 // Special case when more than 1 SE, and 2 RB per SE
1359 (*pRbEq)[0].add(cx);
1360 (*pRbEq)[0].add(cy);
1361 cx++;
1362 cy++;
1363
1364 if (m_settings.applyAliasFix == false)
1365 {
1366 (*pRbEq)[0].add(cy);
1367 }
1368
1369 (*pRbEq)[0].add(cy);
1370 start++;
1371 }
1372
1373 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1374
1375 for (UINT_32 i = 0; i < numBits; i++)
1376 {
1377 UINT_32 idx =
1378 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1379
1380 if ((i % 2) == 1)
1381 {
1382 (*pRbEq)[idx].add(cx);
1383 cx++;
1384 }
1385 else
1386 {
1387 (*pRbEq)[idx].add(cy);
1388 cy++;
1389 }
1390 }
1391 }
1392
1393 /**
1394 ************************************************************************************************************************
1395 * Gfx9Lib::GetDataEquation
1396 *
1397 * @brief
1398 * Get data equation for fmask and Z
1399 * @return
1400 * N/A
1401 ************************************************************************************************************************
1402 */
1403 VOID Gfx9Lib::GetDataEquation(
1404 CoordEq* pDataEq, ///< [out] data surface equation
1405 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1406 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1407 AddrResourceType resourceType, ///< [in] data surface resource type
1408 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1409 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1410 const
1411 {
1412 Coordinate cx('x', 0);
1413 Coordinate cy('y', 0);
1414 Coordinate cz('z', 0);
1415 Coordinate cs('s', 0);
1416
1417 // Clear the equation
1418 pDataEq->resize(0);
1419 pDataEq->resize(27);
1420
1421 if (dataSurfaceType == Gfx9DataColor)
1422 {
1423 if (IsLinear(swizzleMode))
1424 {
1425 Coordinate cm('m', 0);
1426
1427 pDataEq->resize(49);
1428
1429 for (UINT_32 i = 0; i < 49; i++)
1430 {
1431 (*pDataEq)[i].add(cm);
1432 cm++;
1433 }
1434 }
1435 else if (IsThick(resourceType, swizzleMode))
1436 {
1437 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1438 UINT_32 i;
1439 if (IsStandardSwizzle(resourceType, swizzleMode))
1440 {
1441 // Standard 3d swizzle
1442 // Fill in bottom x bits
1443 for (i = elementBytesLog2; i < 4; i++)
1444 {
1445 (*pDataEq)[i].add(cx);
1446 cx++;
1447 }
1448 // Fill in 2 bits of y and then z
1449 for (i = 4; i < 6; i++)
1450 {
1451 (*pDataEq)[i].add(cy);
1452 cy++;
1453 }
1454 for (i = 6; i < 8; i++)
1455 {
1456 (*pDataEq)[i].add(cz);
1457 cz++;
1458 }
1459 if (elementBytesLog2 < 2)
1460 {
1461 // fill in z & y bit
1462 (*pDataEq)[8].add(cz);
1463 (*pDataEq)[9].add(cy);
1464 cz++;
1465 cy++;
1466 }
1467 else if (elementBytesLog2 == 2)
1468 {
1469 // fill in y and x bit
1470 (*pDataEq)[8].add(cy);
1471 (*pDataEq)[9].add(cx);
1472 cy++;
1473 cx++;
1474 }
1475 else
1476 {
1477 // fill in 2 x bits
1478 (*pDataEq)[8].add(cx);
1479 cx++;
1480 (*pDataEq)[9].add(cx);
1481 cx++;
1482 }
1483 }
1484 else
1485 {
1486 // Z 3d swizzle
1487 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1488 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1489 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1490 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1491 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1492 {
1493 (*pDataEq)[i].add(cz);
1494 cz++;
1495 }
1496 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1497 {
1498 // add an x and z
1499 (*pDataEq)[6].add(cx);
1500 (*pDataEq)[7].add(cz);
1501 cx++;
1502 cz++;
1503 }
1504 else if (elementBytesLog2 == 2)
1505 {
1506 // add a y and z
1507 (*pDataEq)[6].add(cy);
1508 (*pDataEq)[7].add(cz);
1509 cy++;
1510 cz++;
1511 }
1512 // add y and x
1513 (*pDataEq)[8].add(cy);
1514 (*pDataEq)[9].add(cx);
1515 cy++;
1516 cx++;
1517 }
1518 // Fill in bit 10 and up
1519 pDataEq->mort3d( cz, cy, cx, 10 );
1520 }
1521 else if (IsThin(resourceType, swizzleMode))
1522 {
1523 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1524 // Color 2D
1525 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1526 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1527 UINT_32 i;
1528 // Fill in bottom x bits
1529 for (i = elementBytesLog2; i < 4; i++)
1530 {
1531 (*pDataEq)[i].add(cx);
1532 cx++;
1533 }
1534 // Fill in bottom y bits
1535 for (i = 4; i < 4 + microYBits; i++)
1536 {
1537 (*pDataEq)[i].add(cy);
1538 cy++;
1539 }
1540 // Fill in last of the micro_x bits
1541 for (i = 4 + microYBits; i < 8; i++)
1542 {
1543 (*pDataEq)[i].add(cx);
1544 cx++;
1545 }
1546 // Fill in x/y bits below sample split
1547 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1548 // Fill in sample bits
1549 for (i = 0; i < numSamplesLog2; i++)
1550 {
1551 cs.set('s', i);
1552 (*pDataEq)[tileSplitStart + i].add(cs);
1553 }
1554 // Fill in x/y bits above sample split
1555 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1556 {
1557 pDataEq->mort2d(cx, cy, blockSizeLog2);
1558 }
1559 else
1560 {
1561 pDataEq->mort2d(cy, cx, blockSizeLog2);
1562 }
1563 }
1564 else
1565 {
1566 ADDR_ASSERT_ALWAYS();
1567 }
1568 }
1569 else
1570 {
1571 // Fmask or depth
1572 UINT_32 sampleStart = elementBytesLog2;
1573 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1574 UINT_32 ymajStart = 6 + numSamplesLog2;
1575
1576 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1577 {
1578 cs.set('s', s);
1579 (*pDataEq)[sampleStart + s].add(cs);
1580 }
1581
1582 // Put in the x-major order pixel bits
1583 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1584 // Put in the y-major order pixel bits
1585 pDataEq->mort2d(cy, cx, ymajStart);
1586 }
1587 }
1588
1589 /**
1590 ************************************************************************************************************************
1591 * Gfx9Lib::GetPipeEquation
1592 *
1593 * @brief
1594 * Get pipe equation
1595 * @return
1596 * N/A
1597 ************************************************************************************************************************
1598 */
1599 VOID Gfx9Lib::GetPipeEquation(
1600 CoordEq* pPipeEq, ///< [out] pipe equation
1601 CoordEq* pDataEq, ///< [in] data equation
1602 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1603 UINT_32 numPipeLog2, ///< [in] number of pipes
1604 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1605 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1606 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1607 AddrResourceType resourceType ///< [in] data surface resource type
1608 ) const
1609 {
1610 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1611 CoordEq dataEq;
1612
1613 pDataEq->copy(dataEq);
1614
1615 if (dataSurfaceType == Gfx9DataColor)
1616 {
1617 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1618 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1619 }
1620
1621 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1622
1623 // This section should only apply to z/stencil, maybe fmask
1624 // If the pipe bit is below the comp block size,
1625 // then keep moving up the address until we find a bit that is above
1626 UINT_32 pipeStart = 0;
1627
1628 if (dataSurfaceType != Gfx9DataColor)
1629 {
1630 Coordinate tileMin('x', 3);
1631
1632 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1633 {
1634 pipeStart++;
1635 }
1636
1637 // if pipe is 0, then the first pipe bit is above the comp block size,
1638 // so we don't need to do anything
1639 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1640 // we will get the same pipe equation
1641 if (pipeStart != 0)
1642 {
1643 for (UINT_32 i = 0; i < numPipeLog2; i++)
1644 {
1645 // Copy the jth bit above pipe interleave to the current pipe equation bit
1646 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1647 }
1648 }
1649 }
1650
1651 if (IsPrt(swizzleMode))
1652 {
1653 // Clear out bits above the block size if prt's are enabled
1654 dataEq.resize(blockSizeLog2);
1655 dataEq.resize(48);
1656 }
1657
1658 if (IsXor(swizzleMode))
1659 {
1660 CoordEq xorMask;
1661
1662 if (IsThick(resourceType, swizzleMode))
1663 {
1664 CoordEq xorMask2;
1665
1666 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1667
1668 xorMask.resize(numPipeLog2);
1669
1670 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1671 {
1672 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1673 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1674 }
1675 }
1676 else
1677 {
1678 // Xor in the bits above the pipe+gpu bits
1679 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1680
1681 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1682 {
1683 Coordinate co;
1684 CoordEq xorMask2;
1685 // if 1xaa and not prt, then xor in the z bits
1686 xorMask2.resize(0);
1687 xorMask2.resize(numPipeLog2);
1688 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1689 {
1690 co.set('z', numPipeLog2 - 1 - pipeIdx);
1691 xorMask2[pipeIdx].add(co);
1692 }
1693
1694 pPipeEq->xorin(xorMask2);
1695 }
1696 }
1697
1698 xorMask.reverse();
1699 pPipeEq->xorin(xorMask);
1700 }
1701 }
1702 /**
1703 ************************************************************************************************************************
1704 * Gfx9Lib::GetMetaEquation
1705 *
1706 * @brief
1707 * Get meta equation for cmask/htile/DCC
1708 * @return
1709 * Pointer to a calculated meta equation
1710 ************************************************************************************************************************
1711 */
1712 const CoordEq* Gfx9Lib::GetMetaEquation(
1713 const MetaEqParams& metaEqParams)
1714 {
1715 UINT_32 cachedMetaEqIndex;
1716
1717 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1718 {
1719 if (memcmp(&metaEqParams,
1720 &m_cachedMetaEqKey[cachedMetaEqIndex],
1721 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1722 {
1723 break;
1724 }
1725 }
1726
1727 CoordEq* pMetaEq = NULL;
1728
1729 if (cachedMetaEqIndex < MaxCachedMetaEq)
1730 {
1731 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1732 }
1733 else
1734 {
1735 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1736
1737 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1738
1739 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1740
1741 GenMetaEquation(pMetaEq,
1742 metaEqParams.maxMip,
1743 metaEqParams.elementBytesLog2,
1744 metaEqParams.numSamplesLog2,
1745 metaEqParams.metaFlag,
1746 metaEqParams.dataSurfaceType,
1747 metaEqParams.swizzleMode,
1748 metaEqParams.resourceType,
1749 metaEqParams.metaBlkWidthLog2,
1750 metaEqParams.metaBlkHeightLog2,
1751 metaEqParams.metaBlkDepthLog2,
1752 metaEqParams.compBlkWidthLog2,
1753 metaEqParams.compBlkHeightLog2,
1754 metaEqParams.compBlkDepthLog2);
1755 }
1756
1757 return pMetaEq;
1758 }
1759
1760 /**
1761 ************************************************************************************************************************
1762 * Gfx9Lib::GenMetaEquation
1763 *
1764 * @brief
1765 * Get meta equation for cmask/htile/DCC
1766 * @return
1767 * N/A
1768 ************************************************************************************************************************
1769 */
1770 VOID Gfx9Lib::GenMetaEquation(
1771 CoordEq* pMetaEq, ///< [out] meta equation
1772 UINT_32 maxMip, ///< [in] max mip Id
1773 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1774 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1775 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1776 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1777 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1778 AddrResourceType resourceType, ///< [in] data surface resource type
1779 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1780 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1781 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1782 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1783 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1784 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1785 const
1786 {
1787 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1788 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1789
1790 // Get the correct data address and rb equation
1791 CoordEq dataEq;
1792 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1793 elementBytesLog2, numSamplesLog2);
1794
1795 // Get pipe and rb equations
1796 CoordEq pipeEquation;
1797 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1798 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1799 numPipeTotalLog2 = pipeEquation.getsize();
1800
1801 if (metaFlag.linear)
1802 {
1803 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1804 ADDR_ASSERT_ALWAYS();
1805
1806 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1807
1808 dataEq.copy(*pMetaEq);
1809
1810 if (IsLinear(swizzleMode))
1811 {
1812 if (metaFlag.pipeAligned)
1813 {
1814 // Remove the pipe bits
1815 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1816 pMetaEq->shift(-shift, pipeInterleaveLog2);
1817 }
1818 // Divide by comp block size, which for linear (which is always color) is 256 B
1819 pMetaEq->shift(-8);
1820
1821 if (metaFlag.pipeAligned)
1822 {
1823 // Put pipe bits back in
1824 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1825
1826 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1827 {
1828 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1829 }
1830 }
1831 }
1832
1833 pMetaEq->shift(1);
1834 }
1835 else
1836 {
1837 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1838 UINT_32 compFragLog2 =
1839 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1840 maxCompFragLog2 : numSamplesLog2;
1841
1842 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1843
1844 // Make sure the metaaddr is cleared
1845 pMetaEq->resize(0);
1846 pMetaEq->resize(27);
1847
1848 if (IsThick(resourceType, swizzleMode))
1849 {
1850 Coordinate cx('x', 0);
1851 Coordinate cy('y', 0);
1852 Coordinate cz('z', 0);
1853
1854 if (maxMip > 0)
1855 {
1856 pMetaEq->mort3d(cy, cx, cz);
1857 }
1858 else
1859 {
1860 pMetaEq->mort3d(cx, cy, cz);
1861 }
1862 }
1863 else
1864 {
1865 Coordinate cx('x', 0);
1866 Coordinate cy('y', 0);
1867 Coordinate cs;
1868
1869 if (maxMip > 0)
1870 {
1871 pMetaEq->mort2d(cy, cx, compFragLog2);
1872 }
1873 else
1874 {
1875 pMetaEq->mort2d(cx, cy, compFragLog2);
1876 }
1877
1878 //------------------------------------------------------------------------------------------------------------------------
1879 // Put the compressible fragments at the lsb
1880 // the uncompressible frags will be at the msb of the micro address
1881 //------------------------------------------------------------------------------------------------------------------------
1882 for (UINT_32 s = 0; s < compFragLog2; s++)
1883 {
1884 cs.set('s', s);
1885 (*pMetaEq)[s].add(cs);
1886 }
1887 }
1888
1889 // Keep a copy of the pipe equations
1890 CoordEq origPipeEquation;
1891 pipeEquation.copy(origPipeEquation);
1892
1893 Coordinate co;
1894 // filter out everything under the compressed block size
1895 co.set('x', compBlkWidthLog2);
1896 pMetaEq->Filter('<', co, 0, 'x');
1897 co.set('y', compBlkHeightLog2);
1898 pMetaEq->Filter('<', co, 0, 'y');
1899 co.set('z', compBlkDepthLog2);
1900 pMetaEq->Filter('<', co, 0, 'z');
1901
1902 // For non-color, filter out sample bits
1903 if (dataSurfaceType != Gfx9DataColor)
1904 {
1905 co.set('x', 0);
1906 pMetaEq->Filter('<', co, 0, 's');
1907 }
1908
1909 // filter out everything above the metablock size
1910 co.set('x', metaBlkWidthLog2 - 1);
1911 pMetaEq->Filter('>', co, 0, 'x');
1912 co.set('y', metaBlkHeightLog2 - 1);
1913 pMetaEq->Filter('>', co, 0, 'y');
1914 co.set('z', metaBlkDepthLog2 - 1);
1915 pMetaEq->Filter('>', co, 0, 'z');
1916
1917 // filter out everything above the metablock size for the channel bits
1918 co.set('x', metaBlkWidthLog2 - 1);
1919 pipeEquation.Filter('>', co, 0, 'x');
1920 co.set('y', metaBlkHeightLog2 - 1);
1921 pipeEquation.Filter('>', co, 0, 'y');
1922 co.set('z', metaBlkDepthLog2 - 1);
1923 pipeEquation.Filter('>', co, 0, 'z');
1924
1925 // Make sure we still have the same number of channel bits
1926 if (pipeEquation.getsize() != numPipeTotalLog2)
1927 {
1928 ADDR_ASSERT_ALWAYS();
1929 }
1930
1931 // Loop through all channel and rb bits,
1932 // and make sure these components exist in the metadata address
1933 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1934 {
1935 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1936 {
1937 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1938 {
1939 ADDR_ASSERT_ALWAYS();
1940 }
1941 }
1942 }
1943
1944 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1945 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1946 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1947 CoordEq origRbEquation;
1948
1949 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1950
1951 CoordEq rbEquation = origRbEquation;
1952
1953 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1954 {
1955 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1956 {
1957 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1958 {
1959 ADDR_ASSERT_ALWAYS();
1960 }
1961 }
1962 }
1963
1964 if (m_settings.applyAliasFix)
1965 {
1966 co.set('z', -1);
1967 }
1968
1969 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1970 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1971 {
1972 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1973 {
1974 BOOL_32 isRbEquationInPipeEquation = FALSE;
1975
1976 if (m_settings.applyAliasFix)
1977 {
1978 CoordTerm filteredPipeEq;
1979 filteredPipeEq = pipeEquation[j];
1980
1981 filteredPipeEq.Filter('>', co, 0, 'z');
1982
1983 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1984 }
1985 else
1986 {
1987 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1988 }
1989
1990 if (isRbEquationInPipeEquation)
1991 {
1992 rbEquation[i].Clear();
1993 }
1994 }
1995 }
1996
1997 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1998
1999 // Loop through each bit of the channel, get the smallest coordinate,
2000 // and remove it from the metaaddr, and rb_equation
2001 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2002 {
2003 pipeEquation[i].getsmallest(co);
2004
2005 UINT_32 old_size = pMetaEq->getsize();
2006 pMetaEq->Filter('=', co);
2007 UINT_32 new_size = pMetaEq->getsize();
2008 if (new_size != old_size-1)
2009 {
2010 ADDR_ASSERT_ALWAYS();
2011 }
2012 pipeEquation.remove(co);
2013 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2014 {
2015 if (rbEquation[j].remove(co))
2016 {
2017 // if we actually removed something from this bit, then add the remaining
2018 // channel bits, as these can be removed for this bit
2019 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2020 {
2021 if (pipeEquation[i][k] != co)
2022 {
2023 rbEquation[j].add(pipeEquation[i][k]);
2024 rbAppendedWithPipeBits[j] = true;
2025 }
2026 }
2027 }
2028 }
2029 }
2030
2031 // Loop through the rb bits and see what remain;
2032 // filter out the smallest coordinate if it remains
2033 UINT_32 rbBitsLeft = 0;
2034 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2035 {
2036 BOOL_32 isRbEqAppended = FALSE;
2037
2038 if (m_settings.applyAliasFix)
2039 {
2040 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2041 }
2042 else
2043 {
2044 isRbEqAppended = (rbEquation[i].getsize() > 0);
2045 }
2046
2047 if (isRbEqAppended)
2048 {
2049 rbBitsLeft++;
2050 rbEquation[i].getsmallest(co);
2051 UINT_32 old_size = pMetaEq->getsize();
2052 pMetaEq->Filter('=', co);
2053 UINT_32 new_size = pMetaEq->getsize();
2054 if (new_size != old_size - 1)
2055 {
2056 // assert warning
2057 }
2058 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2059 {
2060 if (rbEquation[j].remove(co))
2061 {
2062 // if we actually removed something from this bit, then add the remaining
2063 // rb bits, as these can be removed for this bit
2064 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2065 {
2066 if (rbEquation[i][k] != co)
2067 {
2068 rbEquation[j].add(rbEquation[i][k]);
2069 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2070 }
2071 }
2072 }
2073 }
2074 }
2075 }
2076
2077 // capture the size of the metaaddr
2078 UINT_32 metaSize = pMetaEq->getsize();
2079 // resize to 49 bits...make this a nibble address
2080 pMetaEq->resize(49);
2081 // Concatenate the macro address above the current address
2082 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2083 {
2084 co.set('m', j);
2085 (*pMetaEq)[i].add(co);
2086 }
2087
2088 // Multiply by meta element size (in nibbles)
2089 if (dataSurfaceType == Gfx9DataColor)
2090 {
2091 pMetaEq->shift(1);
2092 }
2093 else if (dataSurfaceType == Gfx9DataDepthStencil)
2094 {
2095 pMetaEq->shift(3);
2096 }
2097
2098 //------------------------------------------------------------------------------------------
2099 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2100 // Shift up from pipe interleave number of channel
2101 // and rb bits left, and uncompressed fragments
2102 //------------------------------------------------------------------------------------------
2103
2104 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2105
2106 // Put in the channel bits
2107 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2108 {
2109 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2110 }
2111
2112 // Put in remaining rb bits
2113 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2114 {
2115 BOOL_32 isRbEqAppended = FALSE;
2116
2117 if (m_settings.applyAliasFix)
2118 {
2119 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2120 }
2121 else
2122 {
2123 isRbEqAppended = (rbEquation[i].getsize() > 0);
2124 }
2125
2126 if (isRbEqAppended)
2127 {
2128 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2129 // Mark any rb bit we add in to the rb mask
2130 j++;
2131 }
2132 }
2133
2134 //------------------------------------------------------------------------------------------
2135 // Put in the uncompressed fragment bits
2136 //------------------------------------------------------------------------------------------
2137 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2138 {
2139 co.set('s', compFragLog2 + i);
2140 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2141 }
2142 }
2143 }
2144
2145 /**
2146 ************************************************************************************************************************
2147 * Gfx9Lib::IsEquationSupported
2148 *
2149 * @brief
2150 * Check if equation is supported for given swizzle mode and resource type.
2151 *
2152 * @return
2153 * TRUE if supported
2154 ************************************************************************************************************************
2155 */
2156 BOOL_32 Gfx9Lib::IsEquationSupported(
2157 AddrResourceType rsrcType,
2158 AddrSwizzleMode swMode,
2159 UINT_32 elementBytesLog2) const
2160 {
2161 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2162 (IsLinear(swMode) == FALSE) &&
2163 (((IsTex2d(rsrcType) == TRUE) &&
2164 ((elementBytesLog2 < 4) ||
2165 ((IsRotateSwizzle(swMode) == FALSE) &&
2166 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2167 ((IsTex3d(rsrcType) == TRUE) &&
2168 (IsRotateSwizzle(swMode) == FALSE) &&
2169 (IsBlock256b(swMode) == FALSE)));
2170
2171 return supported;
2172 }
2173
2174 /**
2175 ************************************************************************************************************************
2176 * Gfx9Lib::InitEquationTable
2177 *
2178 * @brief
2179 * Initialize Equation table.
2180 *
2181 * @return
2182 * N/A
2183 ************************************************************************************************************************
2184 */
2185 VOID Gfx9Lib::InitEquationTable()
2186 {
2187 memset(m_equationTable, 0, sizeof(m_equationTable));
2188
2189 // Loop all possible resource type (2D/3D)
2190 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2191 {
2192 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2193
2194 // Loop all possible swizzle mode
2195 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
2196 {
2197 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2198
2199 // Loop all possible bpp
2200 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2201 {
2202 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2203
2204 // Check if the input is supported
2205 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2206 {
2207 ADDR_EQUATION equation;
2208 ADDR_E_RETURNCODE retCode;
2209
2210 memset(&equation, 0, sizeof(ADDR_EQUATION));
2211
2212 // Generate the equation
2213 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2214 {
2215 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2216 }
2217 else if (IsThin(rsrcType, swMode))
2218 {
2219 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2220 }
2221 else
2222 {
2223 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2224 }
2225
2226 // Only fill the equation into the table if the return code is ADDR_OK,
2227 // otherwise if the return code is not ADDR_OK, it indicates this is not
2228 // a valid input, we do nothing but just fill invalid equation index
2229 // into the lookup table.
2230 if (retCode == ADDR_OK)
2231 {
2232 equationIndex = m_numEquations;
2233 ADDR_ASSERT(equationIndex < EquationTableSize);
2234
2235 m_equationTable[equationIndex] = equation;
2236
2237 m_numEquations++;
2238 }
2239 else
2240 {
2241 ADDR_ASSERT_ALWAYS();
2242 }
2243 }
2244
2245 // Fill the index into the lookup table, if the combination is not supported
2246 // fill the invalid equation index
2247 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2248 }
2249 }
2250 }
2251 }
2252
2253 /**
2254 ************************************************************************************************************************
2255 * Gfx9Lib::HwlGetEquationIndex
2256 *
2257 * @brief
2258 * Interface function stub of GetEquationIndex
2259 *
2260 * @return
2261 * ADDR_E_RETURNCODE
2262 ************************************************************************************************************************
2263 */
2264 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2265 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2266 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2267 ) const
2268 {
2269 AddrResourceType rsrcType = pIn->resourceType;
2270 AddrSwizzleMode swMode = pIn->swizzleMode;
2271 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2272 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2273
2274 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2275 {
2276 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2277 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2278
2279 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2280 }
2281
2282 if (pOut->pMipInfo != NULL)
2283 {
2284 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2285 {
2286 pOut->pMipInfo[i].equationIndex = index;
2287 }
2288 }
2289
2290 return index;
2291 }
2292
2293 /**
2294 ************************************************************************************************************************
2295 * Gfx9Lib::HwlComputeBlock256Equation
2296 *
2297 * @brief
2298 * Interface function stub of ComputeBlock256Equation
2299 *
2300 * @return
2301 * ADDR_E_RETURNCODE
2302 ************************************************************************************************************************
2303 */
2304 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2305 AddrResourceType rsrcType,
2306 AddrSwizzleMode swMode,
2307 UINT_32 elementBytesLog2,
2308 ADDR_EQUATION* pEquation) const
2309 {
2310 ADDR_E_RETURNCODE ret = ADDR_OK;
2311
2312 pEquation->numBits = 8;
2313
2314 UINT_32 i = 0;
2315 for (; i < elementBytesLog2; i++)
2316 {
2317 InitChannel(1, 0 , i, &pEquation->addr[i]);
2318 }
2319
2320 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2321
2322 const UINT_32 maxBitsUsed = 4;
2323 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2324 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2325
2326 for (i = 0; i < maxBitsUsed; i++)
2327 {
2328 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2329 InitChannel(1, 1, i, &y[i]);
2330 }
2331
2332 if (IsStandardSwizzle(rsrcType, swMode))
2333 {
2334 switch (elementBytesLog2)
2335 {
2336 case 0:
2337 pixelBit[0] = x[0];
2338 pixelBit[1] = x[1];
2339 pixelBit[2] = x[2];
2340 pixelBit[3] = x[3];
2341 pixelBit[4] = y[0];
2342 pixelBit[5] = y[1];
2343 pixelBit[6] = y[2];
2344 pixelBit[7] = y[3];
2345 break;
2346 case 1:
2347 pixelBit[0] = x[0];
2348 pixelBit[1] = x[1];
2349 pixelBit[2] = x[2];
2350 pixelBit[3] = y[0];
2351 pixelBit[4] = y[1];
2352 pixelBit[5] = y[2];
2353 pixelBit[6] = x[3];
2354 break;
2355 case 2:
2356 pixelBit[0] = x[0];
2357 pixelBit[1] = x[1];
2358 pixelBit[2] = y[0];
2359 pixelBit[3] = y[1];
2360 pixelBit[4] = y[2];
2361 pixelBit[5] = x[2];
2362 break;
2363 case 3:
2364 pixelBit[0] = x[0];
2365 pixelBit[1] = y[0];
2366 pixelBit[2] = y[1];
2367 pixelBit[3] = x[1];
2368 pixelBit[4] = x[2];
2369 break;
2370 case 4:
2371 pixelBit[0] = y[0];
2372 pixelBit[1] = y[1];
2373 pixelBit[2] = x[0];
2374 pixelBit[3] = x[1];
2375 break;
2376 default:
2377 ADDR_ASSERT_ALWAYS();
2378 ret = ADDR_INVALIDPARAMS;
2379 break;
2380 }
2381 }
2382 else if (IsDisplaySwizzle(rsrcType, swMode))
2383 {
2384 switch (elementBytesLog2)
2385 {
2386 case 0:
2387 pixelBit[0] = x[0];
2388 pixelBit[1] = x[1];
2389 pixelBit[2] = x[2];
2390 pixelBit[3] = y[1];
2391 pixelBit[4] = y[0];
2392 pixelBit[5] = y[2];
2393 pixelBit[6] = x[3];
2394 pixelBit[7] = y[3];
2395 break;
2396 case 1:
2397 pixelBit[0] = x[0];
2398 pixelBit[1] = x[1];
2399 pixelBit[2] = x[2];
2400 pixelBit[3] = y[0];
2401 pixelBit[4] = y[1];
2402 pixelBit[5] = y[2];
2403 pixelBit[6] = x[3];
2404 break;
2405 case 2:
2406 pixelBit[0] = x[0];
2407 pixelBit[1] = x[1];
2408 pixelBit[2] = y[0];
2409 pixelBit[3] = x[2];
2410 pixelBit[4] = y[1];
2411 pixelBit[5] = y[2];
2412 break;
2413 case 3:
2414 pixelBit[0] = x[0];
2415 pixelBit[1] = y[0];
2416 pixelBit[2] = x[1];
2417 pixelBit[3] = x[2];
2418 pixelBit[4] = y[1];
2419 break;
2420 case 4:
2421 pixelBit[0] = x[0];
2422 pixelBit[1] = y[0];
2423 pixelBit[2] = x[1];
2424 pixelBit[3] = y[1];
2425 break;
2426 default:
2427 ADDR_ASSERT_ALWAYS();
2428 ret = ADDR_INVALIDPARAMS;
2429 break;
2430 }
2431 }
2432 else if (IsRotateSwizzle(swMode))
2433 {
2434 switch (elementBytesLog2)
2435 {
2436 case 0:
2437 pixelBit[0] = y[0];
2438 pixelBit[1] = y[1];
2439 pixelBit[2] = y[2];
2440 pixelBit[3] = x[1];
2441 pixelBit[4] = x[0];
2442 pixelBit[5] = x[2];
2443 pixelBit[6] = x[3];
2444 pixelBit[7] = y[3];
2445 break;
2446 case 1:
2447 pixelBit[0] = y[0];
2448 pixelBit[1] = y[1];
2449 pixelBit[2] = y[2];
2450 pixelBit[3] = x[0];
2451 pixelBit[4] = x[1];
2452 pixelBit[5] = x[2];
2453 pixelBit[6] = x[3];
2454 break;
2455 case 2:
2456 pixelBit[0] = y[0];
2457 pixelBit[1] = y[1];
2458 pixelBit[2] = x[0];
2459 pixelBit[3] = y[2];
2460 pixelBit[4] = x[1];
2461 pixelBit[5] = x[2];
2462 break;
2463 case 3:
2464 pixelBit[0] = y[0];
2465 pixelBit[1] = x[0];
2466 pixelBit[2] = y[1];
2467 pixelBit[3] = x[1];
2468 pixelBit[4] = x[2];
2469 break;
2470 default:
2471 ADDR_ASSERT_ALWAYS();
2472 case 4:
2473 ret = ADDR_INVALIDPARAMS;
2474 break;
2475 }
2476 }
2477 else
2478 {
2479 ADDR_ASSERT_ALWAYS();
2480 ret = ADDR_INVALIDPARAMS;
2481 }
2482
2483 // Post validation
2484 if (ret == ADDR_OK)
2485 {
2486 MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2487 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2488 (microBlockDim.w * (1 << elementBytesLog2)));
2489 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2490 }
2491
2492 return ret;
2493 }
2494
2495 /**
2496 ************************************************************************************************************************
2497 * Gfx9Lib::HwlComputeThinEquation
2498 *
2499 * @brief
2500 * Interface function stub of ComputeThinEquation
2501 *
2502 * @return
2503 * ADDR_E_RETURNCODE
2504 ************************************************************************************************************************
2505 */
2506 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2507 AddrResourceType rsrcType,
2508 AddrSwizzleMode swMode,
2509 UINT_32 elementBytesLog2,
2510 ADDR_EQUATION* pEquation) const
2511 {
2512 ADDR_E_RETURNCODE ret = ADDR_OK;
2513
2514 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2515
2516 UINT_32 maxXorBits = blockSizeLog2;
2517 if (IsNonPrtXor(swMode))
2518 {
2519 // For non-prt-xor, maybe need to initialize some more bits for xor
2520 // The highest xor bit used in equation will be max the following 3 items:
2521 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2522 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2523 // 3. blockSizeLog2
2524
2525 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2526 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2527 GetPipeXorBits(blockSizeLog2) +
2528 2 * GetBankXorBits(blockSizeLog2));
2529 }
2530
2531 const UINT_32 maxBitsUsed = 14;
2532 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2533 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2534 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2535
2536 const UINT_32 extraXorBits = 16;
2537 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2538 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2539
2540 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2541 {
2542 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2543 InitChannel(1, 1, i, &y[i]);
2544 }
2545
2546 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2547
2548 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2549 {
2550 InitChannel(1, 0 , i, &pixelBit[i]);
2551 }
2552
2553 UINT_32 xIdx = 0;
2554 UINT_32 yIdx = 0;
2555 UINT_32 lowBits = 0;
2556
2557 if (IsZOrderSwizzle(swMode))
2558 {
2559 if (elementBytesLog2 <= 3)
2560 {
2561 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2562 {
2563 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2564 }
2565
2566 lowBits = 6;
2567 }
2568 else
2569 {
2570 ret = ADDR_INVALIDPARAMS;
2571 }
2572 }
2573 else
2574 {
2575 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2576
2577 if (ret == ADDR_OK)
2578 {
2579 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2580 xIdx = Log2(microBlockDim.w);
2581 yIdx = Log2(microBlockDim.h);
2582 lowBits = 8;
2583 }
2584 }
2585
2586 if (ret == ADDR_OK)
2587 {
2588 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2589 {
2590 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2591 }
2592
2593 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2594 {
2595 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2596 }
2597
2598 if (IsXor(swMode))
2599 {
2600 // Fill XOR bits
2601 UINT_32 pipeStart = m_pipeInterleaveLog2;
2602 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2603
2604 UINT_32 bankStart = pipeStart + pipeXorBits;
2605 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2606
2607 for (UINT_32 i = 0; i < pipeXorBits; i++)
2608 {
2609 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2610 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2611 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2612
2613 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2614 }
2615
2616 for (UINT_32 i = 0; i < bankXorBits; i++)
2617 {
2618 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2619 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2620 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2621
2622 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2623 }
2624
2625 if (IsPrt(swMode) == FALSE)
2626 {
2627 for (UINT_32 i = 0; i < pipeXorBits; i++)
2628 {
2629 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2630 }
2631
2632 for (UINT_32 i = 0; i < bankXorBits; i++)
2633 {
2634 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2635 }
2636 }
2637 }
2638
2639 pEquation->numBits = blockSizeLog2;
2640 }
2641
2642 return ret;
2643 }
2644
2645 /**
2646 ************************************************************************************************************************
2647 * Gfx9Lib::HwlComputeThickEquation
2648 *
2649 * @brief
2650 * Interface function stub of ComputeThickEquation
2651 *
2652 * @return
2653 * ADDR_E_RETURNCODE
2654 ************************************************************************************************************************
2655 */
2656 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2657 AddrResourceType rsrcType,
2658 AddrSwizzleMode swMode,
2659 UINT_32 elementBytesLog2,
2660 ADDR_EQUATION* pEquation) const
2661 {
2662 ADDR_E_RETURNCODE ret = ADDR_OK;
2663
2664 ADDR_ASSERT(IsTex3d(rsrcType));
2665
2666 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2667
2668 UINT_32 maxXorBits = blockSizeLog2;
2669 if (IsNonPrtXor(swMode))
2670 {
2671 // For non-prt-xor, maybe need to initialize some more bits for xor
2672 // The highest xor bit used in equation will be max the following 3:
2673 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2674 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2675 // 3. blockSizeLog2
2676
2677 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2678 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2679 GetPipeXorBits(blockSizeLog2) +
2680 3 * GetBankXorBits(blockSizeLog2));
2681 }
2682
2683 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2684 {
2685 InitChannel(1, 0 , i, &pEquation->addr[i]);
2686 }
2687
2688 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2689
2690 const UINT_32 maxBitsUsed = 12;
2691 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2692 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2693 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2694 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2695
2696 const UINT_32 extraXorBits = 24;
2697 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2698 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2699
2700 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2701 {
2702 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2703 InitChannel(1, 1, i, &y[i]);
2704 InitChannel(1, 2, i, &z[i]);
2705 }
2706
2707 if (IsZOrderSwizzle(swMode))
2708 {
2709 switch (elementBytesLog2)
2710 {
2711 case 0:
2712 pixelBit[0] = x[0];
2713 pixelBit[1] = y[0];
2714 pixelBit[2] = x[1];
2715 pixelBit[3] = y[1];
2716 pixelBit[4] = z[0];
2717 pixelBit[5] = z[1];
2718 pixelBit[6] = x[2];
2719 pixelBit[7] = z[2];
2720 pixelBit[8] = y[2];
2721 pixelBit[9] = x[3];
2722 break;
2723 case 1:
2724 pixelBit[0] = x[0];
2725 pixelBit[1] = y[0];
2726 pixelBit[2] = x[1];
2727 pixelBit[3] = y[1];
2728 pixelBit[4] = z[0];
2729 pixelBit[5] = z[1];
2730 pixelBit[6] = z[2];
2731 pixelBit[7] = y[2];
2732 pixelBit[8] = x[2];
2733 break;
2734 case 2:
2735 pixelBit[0] = x[0];
2736 pixelBit[1] = y[0];
2737 pixelBit[2] = x[1];
2738 pixelBit[3] = z[0];
2739 pixelBit[4] = y[1];
2740 pixelBit[5] = z[1];
2741 pixelBit[6] = y[2];
2742 pixelBit[7] = x[2];
2743 break;
2744 case 3:
2745 pixelBit[0] = x[0];
2746 pixelBit[1] = y[0];
2747 pixelBit[2] = z[0];
2748 pixelBit[3] = x[1];
2749 pixelBit[4] = z[1];
2750 pixelBit[5] = y[1];
2751 pixelBit[6] = x[2];
2752 break;
2753 case 4:
2754 pixelBit[0] = x[0];
2755 pixelBit[1] = y[0];
2756 pixelBit[2] = z[0];
2757 pixelBit[3] = z[1];
2758 pixelBit[4] = y[1];
2759 pixelBit[5] = x[1];
2760 break;
2761 default:
2762 ADDR_ASSERT_ALWAYS();
2763 ret = ADDR_INVALIDPARAMS;
2764 break;
2765 }
2766 }
2767 else if (IsStandardSwizzle(rsrcType, swMode))
2768 {
2769 switch (elementBytesLog2)
2770 {
2771 case 0:
2772 pixelBit[0] = x[0];
2773 pixelBit[1] = x[1];
2774 pixelBit[2] = x[2];
2775 pixelBit[3] = x[3];
2776 pixelBit[4] = y[0];
2777 pixelBit[5] = y[1];
2778 pixelBit[6] = z[0];
2779 pixelBit[7] = z[1];
2780 pixelBit[8] = z[2];
2781 pixelBit[9] = y[2];
2782 break;
2783 case 1:
2784 pixelBit[0] = x[0];
2785 pixelBit[1] = x[1];
2786 pixelBit[2] = x[2];
2787 pixelBit[3] = y[0];
2788 pixelBit[4] = y[1];
2789 pixelBit[5] = z[0];
2790 pixelBit[6] = z[1];
2791 pixelBit[7] = z[2];
2792 pixelBit[8] = y[2];
2793 break;
2794 case 2:
2795 pixelBit[0] = x[0];
2796 pixelBit[1] = x[1];
2797 pixelBit[2] = y[0];
2798 pixelBit[3] = y[1];
2799 pixelBit[4] = z[0];
2800 pixelBit[5] = z[1];
2801 pixelBit[6] = y[2];
2802 pixelBit[7] = x[2];
2803 break;
2804 case 3:
2805 pixelBit[0] = x[0];
2806 pixelBit[1] = y[0];
2807 pixelBit[2] = y[1];
2808 pixelBit[3] = z[0];
2809 pixelBit[4] = z[1];
2810 pixelBit[5] = x[1];
2811 pixelBit[6] = x[2];
2812 break;
2813 case 4:
2814 pixelBit[0] = y[0];
2815 pixelBit[1] = y[1];
2816 pixelBit[2] = z[0];
2817 pixelBit[3] = z[1];
2818 pixelBit[4] = x[0];
2819 pixelBit[5] = x[1];
2820 break;
2821 default:
2822 ADDR_ASSERT_ALWAYS();
2823 ret = ADDR_INVALIDPARAMS;
2824 break;
2825 }
2826 }
2827 else
2828 {
2829 ADDR_ASSERT_ALWAYS();
2830 ret = ADDR_INVALIDPARAMS;
2831 }
2832
2833 if (ret == ADDR_OK)
2834 {
2835 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2836 UINT_32 xIdx = Log2(microBlockDim.w);
2837 UINT_32 yIdx = Log2(microBlockDim.h);
2838 UINT_32 zIdx = Log2(microBlockDim.d);
2839
2840 pixelBit = pEquation->addr;
2841
2842 const UINT_32 lowBits = 10;
2843 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2844 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2845
2846 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2847 {
2848 if ((i % 3) == 0)
2849 {
2850 pixelBit[i] = x[xIdx++];
2851 }
2852 else if ((i % 3) == 1)
2853 {
2854 pixelBit[i] = z[zIdx++];
2855 }
2856 else
2857 {
2858 pixelBit[i] = y[yIdx++];
2859 }
2860 }
2861
2862 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2863 {
2864 if ((i % 3) == 0)
2865 {
2866 xorExtra[i - blockSizeLog2] = x[xIdx++];
2867 }
2868 else if ((i % 3) == 1)
2869 {
2870 xorExtra[i - blockSizeLog2] = z[zIdx++];
2871 }
2872 else
2873 {
2874 xorExtra[i - blockSizeLog2] = y[yIdx++];
2875 }
2876 }
2877
2878 if (IsXor(swMode))
2879 {
2880 // Fill XOR bits
2881 UINT_32 pipeStart = m_pipeInterleaveLog2;
2882 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2883 for (UINT_32 i = 0; i < pipeXorBits; i++)
2884 {
2885 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2886 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2887 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2888
2889 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2890
2891 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2892 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2893 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2894
2895 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2896 }
2897
2898 UINT_32 bankStart = pipeStart + pipeXorBits;
2899 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2900 for (UINT_32 i = 0; i < bankXorBits; i++)
2901 {
2902 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2903 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2904 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2905
2906 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2907
2908 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2909 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2910 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2911
2912 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2913 }
2914 }
2915
2916 pEquation->numBits = blockSizeLog2;
2917 }
2918
2919 return ret;
2920 }
2921
2922 /**
2923 ************************************************************************************************************************
2924 * Gfx9Lib::IsValidDisplaySwizzleMode
2925 *
2926 * @brief
2927 * Check if a swizzle mode is supported by display engine
2928 *
2929 * @return
2930 * TRUE is swizzle mode is supported by display engine
2931 ************************************************************************************************************************
2932 */
2933 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2934 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2935 {
2936 BOOL_32 support = FALSE;
2937
2938 if (m_settings.isDce12)
2939 {
2940 switch (pIn->swizzleMode)
2941 {
2942 case ADDR_SW_256B_D:
2943 case ADDR_SW_256B_R:
2944 support = (pIn->bpp == 32);
2945 break;
2946
2947 case ADDR_SW_LINEAR:
2948 case ADDR_SW_4KB_D:
2949 case ADDR_SW_4KB_R:
2950 case ADDR_SW_64KB_D:
2951 case ADDR_SW_64KB_R:
2952 case ADDR_SW_VAR_D:
2953 case ADDR_SW_VAR_R:
2954 case ADDR_SW_4KB_D_X:
2955 case ADDR_SW_4KB_R_X:
2956 case ADDR_SW_64KB_D_X:
2957 case ADDR_SW_64KB_R_X:
2958 case ADDR_SW_VAR_D_X:
2959 case ADDR_SW_VAR_R_X:
2960 support = (pIn->bpp <= 64);
2961 break;
2962
2963 default:
2964 break;
2965 }
2966 }
2967 else if (m_settings.isDcn1)
2968 {
2969 switch (pIn->swizzleMode)
2970 {
2971 case ADDR_SW_4KB_D:
2972 case ADDR_SW_64KB_D:
2973 case ADDR_SW_VAR_D:
2974 case ADDR_SW_64KB_D_T:
2975 case ADDR_SW_4KB_D_X:
2976 case ADDR_SW_64KB_D_X:
2977 case ADDR_SW_VAR_D_X:
2978 support = (pIn->bpp == 64);
2979 break;
2980
2981 case ADDR_SW_LINEAR:
2982 case ADDR_SW_4KB_S:
2983 case ADDR_SW_64KB_S:
2984 case ADDR_SW_VAR_S:
2985 case ADDR_SW_64KB_S_T:
2986 case ADDR_SW_4KB_S_X:
2987 case ADDR_SW_64KB_S_X:
2988 case ADDR_SW_VAR_S_X:
2989 support = (pIn->bpp <= 64);
2990 break;
2991
2992 default:
2993 break;
2994 }
2995 }
2996 else
2997 {
2998 ADDR_NOT_IMPLEMENTED();
2999 }
3000
3001 return support;
3002 }
3003
3004 /**
3005 ************************************************************************************************************************
3006 * Gfx9Lib::HwlComputePipeBankXor
3007 *
3008 * @brief
3009 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3010 *
3011 * @return
3012 * PipeBankXor value
3013 ************************************************************************************************************************
3014 */
3015 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3016 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3017 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
3018 {
3019 if (IsXor(pIn->swizzleMode))
3020 {
3021 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3022 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3023 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3024
3025 UINT_32 pipeXor = 0;
3026 UINT_32 bankXor = 0;
3027
3028 const UINT_32 bankMask = (1 << bankBits) - 1;
3029 const UINT_32 index = pIn->surfIndex & bankMask;
3030
3031 const UINT_32 bpp = pIn->flags.fmask ?
3032 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3033 if (bankBits == 4)
3034 {
3035 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3036 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3037
3038 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3039 }
3040 else if (bankBits > 0)
3041 {
3042 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3043 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3044 bankXor = (index * bankIncrease) & bankMask;
3045 }
3046
3047 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3048 }
3049 else
3050 {
3051 pOut->pipeBankXor = 0;
3052 }
3053
3054 return ADDR_OK;
3055 }
3056
3057 /**
3058 ************************************************************************************************************************
3059 * Gfx9Lib::HwlComputeSlicePipeBankXor
3060 *
3061 * @brief
3062 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3063 *
3064 * @return
3065 * PipeBankXor value
3066 ************************************************************************************************************************
3067 */
3068 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3069 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3070 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3071 {
3072 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3073 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3074 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3075
3076 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3077 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3078
3079 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3080
3081 return ADDR_OK;
3082 }
3083
3084 /**
3085 ************************************************************************************************************************
3086 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3087 *
3088 * @brief
3089 * Compute sub resource offset to support swizzle pattern
3090 *
3091 * @return
3092 * Offset
3093 ************************************************************************************************************************
3094 */
3095 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3096 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3097 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3098 {
3099 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3100
3101 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3102 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3103 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3104 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3105 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3106 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3107
3108 pOut->offset = pIn->slice * pIn->sliceSize +
3109 pIn->macroBlockOffset +
3110 (pIn->mipTailOffset ^ pipeBankXor) -
3111 static_cast<UINT_64>(pipeBankXor);
3112 return ADDR_OK;
3113 }
3114
3115 /**
3116 ************************************************************************************************************************
3117 * Gfx9Lib::ValidateNonSwModeParams
3118 *
3119 * @brief
3120 * Validate compute surface info params except swizzle mode
3121 *
3122 * @return
3123 * TRUE if parameters are valid, FALSE otherwise
3124 ************************************************************************************************************************
3125 */
3126 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3127 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3128 {
3129 BOOL_32 valid = TRUE;
3130
3131 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3132 {
3133 ADDR_ASSERT_ALWAYS();
3134 valid = FALSE;
3135 }
3136
3137 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3138 {
3139 ADDR_ASSERT_ALWAYS();
3140 valid = FALSE;
3141 }
3142
3143 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3144 const BOOL_32 msaa = (pIn->numFrags > 1);
3145 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3146
3147 const AddrResourceType rsrcType = pIn->resourceType;
3148 const BOOL_32 tex3d = IsTex3d(rsrcType);
3149 const BOOL_32 tex2d = IsTex2d(rsrcType);
3150 const BOOL_32 tex1d = IsTex1d(rsrcType);
3151
3152 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3153 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3154 const BOOL_32 display = flags.display || flags.rotated;
3155 const BOOL_32 stereo = flags.qbStereo;
3156 const BOOL_32 fmask = flags.fmask;
3157
3158 // Resource type check
3159 if (tex1d)
3160 {
3161 if (msaa || zbuffer || display || stereo || isBc || fmask)
3162 {
3163 ADDR_ASSERT_ALWAYS();
3164 valid = FALSE;
3165 }
3166 }
3167 else if (tex2d)
3168 {
3169 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3170 {
3171 ADDR_ASSERT_ALWAYS();
3172 valid = FALSE;
3173 }
3174 }
3175 else if (tex3d)
3176 {
3177 if (msaa || zbuffer || display || stereo || fmask)
3178 {
3179 ADDR_ASSERT_ALWAYS();
3180 valid = FALSE;
3181 }
3182 }
3183 else
3184 {
3185 ADDR_ASSERT_ALWAYS();
3186 valid = FALSE;
3187 }
3188
3189 return valid;
3190 }
3191
3192 /**
3193 ************************************************************************************************************************
3194 * Gfx9Lib::ValidateSwModeParams
3195 *
3196 * @brief
3197 * Validate compute surface info related to swizzle mode
3198 *
3199 * @return
3200 * TRUE if parameters are valid, FALSE otherwise
3201 ************************************************************************************************************************
3202 */
3203 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3204 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3205 {
3206 BOOL_32 valid = TRUE;
3207
3208 if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
3209 {
3210 ADDR_ASSERT_ALWAYS();
3211 valid = FALSE;
3212 }
3213
3214 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3215 const BOOL_32 msaa = (pIn->numFrags > 1);
3216 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3217 const BOOL_32 is422 = ElemLib::IsMacroPixelPacked(pIn->format);
3218
3219 const AddrResourceType rsrcType = pIn->resourceType;
3220 const BOOL_32 tex3d = IsTex3d(rsrcType);
3221 const BOOL_32 tex2d = IsTex2d(rsrcType);
3222 const BOOL_32 tex1d = IsTex1d(rsrcType);
3223
3224 const AddrSwizzleMode swizzle = pIn->swizzleMode;
3225 const BOOL_32 linear = IsLinear(swizzle);
3226 const BOOL_32 blk256B = IsBlock256b(swizzle);
3227 const BOOL_32 blkVar = IsBlockVariable(swizzle);
3228 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3229
3230 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3231 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3232 const BOOL_32 color = flags.color;
3233 const BOOL_32 texture = flags.texture;
3234 const BOOL_32 display = flags.display || flags.rotated;
3235 const BOOL_32 prt = flags.prt;
3236 const BOOL_32 fmask = flags.fmask;
3237
3238 const BOOL_32 thin3d = tex3d && flags.view3dAs2dArray;
3239 const BOOL_32 zMaxMip = tex3d && mipmap &&
3240 (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3241
3242 // Misc check
3243 if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3244 {
3245 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3246 ADDR_ASSERT_ALWAYS();
3247 valid = FALSE;
3248 }
3249
3250 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3251 {
3252 ADDR_ASSERT_ALWAYS();
3253 valid = FALSE;
3254 }
3255
3256 if ((pIn->bpp == 96) && (linear == FALSE))
3257 {
3258 ADDR_ASSERT_ALWAYS();
3259 valid = FALSE;
3260 }
3261
3262 if (prt && isNonPrtXor)
3263 {
3264 ADDR_ASSERT_ALWAYS();
3265 valid = FALSE;
3266 }
3267
3268 // Resource type check
3269 if (tex1d)
3270 {
3271 if (linear == FALSE)
3272 {
3273 ADDR_ASSERT_ALWAYS();
3274 valid = FALSE;
3275 }
3276 }
3277
3278 // Swizzle type check
3279 if (linear)
3280 {
3281 if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3282 ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3283 {
3284 ADDR_ASSERT_ALWAYS();
3285 valid = FALSE;
3286 }
3287 }
3288 else if (IsZOrderSwizzle(swizzle))
3289 {
3290 if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3291 {
3292 ADDR_ASSERT_ALWAYS();
3293 valid = FALSE;
3294 }
3295 }
3296 else if (IsStandardSwizzle(swizzle))
3297 {
3298 if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3299 {
3300 ADDR_ASSERT_ALWAYS();
3301 valid = FALSE;
3302 }
3303 }
3304 else if (IsDisplaySwizzle(swizzle))
3305 {
3306 if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3307 {
3308 ADDR_ASSERT_ALWAYS();
3309 valid = FALSE;
3310 }
3311 }
3312 else if (IsRotateSwizzle(swizzle))
3313 {
3314 if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3315 {
3316 ADDR_ASSERT_ALWAYS();
3317 valid = FALSE;
3318 }
3319 }
3320 else
3321 {
3322 ADDR_ASSERT_ALWAYS();
3323 valid = FALSE;
3324 }
3325
3326 // Block type check
3327 if (blk256B)
3328 {
3329 if (prt || zbuffer || tex3d || mipmap || msaa)
3330 {
3331 ADDR_ASSERT_ALWAYS();
3332 valid = FALSE;
3333 }
3334 }
3335 else if (blkVar)
3336 {
3337 ADDR_ASSERT_ALWAYS();
3338 valid = FALSE;
3339 }
3340
3341 return valid;
3342 }
3343
3344 /**
3345 ************************************************************************************************************************
3346 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3347 *
3348 * @brief
3349 * Compute surface info sanity check
3350 *
3351 * @return
3352 * ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3353 ************************************************************************************************************************
3354 */
3355 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3356 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3357 {
3358 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3359 }
3360
3361 /**
3362 ************************************************************************************************************************
3363 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3364 *
3365 * @brief
3366 * Internal function to get suggested surface information for cliet to use
3367 *
3368 * @return
3369 * ADDR_E_RETURNCODE
3370 ************************************************************************************************************************
3371 */
3372 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3373 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3374 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3375 {
3376 ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3377 ElemLib* pElemLib = GetElemLib();
3378
3379 UINT_32 bpp = pIn->bpp;
3380 UINT_32 width = Max(pIn->width, 1u);
3381 UINT_32 height = Max(pIn->height, 1u);
3382 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3383 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3384
3385 if (pIn->flags.fmask)
3386 {
3387 bpp = GetFmaskBpp(numSamples, numFrags);
3388 numFrags = 1;
3389 numSamples = 1;
3390 pOut->resourceType = ADDR_RSRC_TEX_2D;
3391 }
3392 else
3393 {
3394 // Set format to INVALID will skip this conversion
3395 if (pIn->format != ADDR_FMT_INVALID)
3396 {
3397 UINT_32 expandX, expandY;
3398
3399 // Don't care for this case
3400 ElemMode elemMode = ADDR_UNCOMPRESSED;
3401
3402 // Get compression/expansion factors and element mode which indicates compression/expansion
3403 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3404 &elemMode,
3405 &expandX,
3406 &expandY);
3407
3408 UINT_32 basePitch = 0;
3409 GetElemLib()->AdjustSurfaceInfo(elemMode,
3410 expandX,
3411 expandY,
3412 &bpp,
3413 &basePitch,
3414 &width,
3415 &height);
3416 }
3417
3418 // The output may get changed for volume(3D) texture resource in future
3419 pOut->resourceType = pIn->resourceType;
3420 }
3421
3422 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3423 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3424 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
3425 const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated;
3426
3427 // Pre sanity check on non swizzle mode parameters
3428 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3429 localIn.flags = pIn->flags;
3430 localIn.resourceType = pOut->resourceType;
3431 localIn.format = pIn->format;
3432 localIn.bpp = bpp;
3433 localIn.width = width;
3434 localIn.height = height;
3435 localIn.numSlices = numSlices;
3436 localIn.numMipLevels = numMipLevels;
3437 localIn.numSamples = numSamples;
3438 localIn.numFrags = numFrags;
3439
3440 if (ValidateNonSwModeParams(&localIn))
3441 {
3442 // Forbid swizzle mode(s) by client setting, for simplicity we never allow VAR swizzle mode for GFX9
3443 ADDR2_SWMODE_SET allowedSwModeSet = {};
3444 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3445 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
3446 allowedSwModeSet.value |= pIn->forbiddenBlock.macro4KB ? 0 : Gfx9Blk4KBSwModeMask;
3447 allowedSwModeSet.value |= pIn->forbiddenBlock.macro64KB ? 0 : Gfx9Blk64KBSwModeMask;
3448
3449 if (pIn->preferredSwSet.value != 0)
3450 {
3451 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3452 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3453 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3454 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3455 }
3456
3457 if (pIn->noXor)
3458 {
3459 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3460 }
3461
3462 if (pIn->maxAlign > 0)
3463 {
3464 if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3465 {
3466 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3467 }
3468
3469 if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3470 {
3471 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3472 }
3473
3474 if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3475 {
3476 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3477 }
3478 }
3479
3480 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3481 switch (pOut->resourceType)
3482 {
3483 case ADDR_RSRC_TEX_1D:
3484 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3485 break;
3486
3487 case ADDR_RSRC_TEX_2D:
3488 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3489
3490 if (bpp > 64)
3491 {
3492 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3493 }
3494 break;
3495
3496 case ADDR_RSRC_TEX_3D:
3497 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3498
3499 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3500 {
3501 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3502 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3503 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3504 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3505 }
3506
3507 if ((bpp == 128) && pIn->flags.color)
3508 {
3509 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3510 }
3511
3512 if (pIn->flags.view3dAs2dArray)
3513 {
3514 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3515 }
3516 break;
3517
3518 default:
3519 ADDR_ASSERT_ALWAYS();
3520 allowedSwModeSet.value = 0;
3521 break;
3522 }
3523
3524 if (pIn->format == ADDR_FMT_32_32_32)
3525 {
3526 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3527 }
3528
3529 if (ElemLib::IsBlockCompressed(pIn->format))
3530 {
3531 if (pIn->flags.texture)
3532 {
3533 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3534 }
3535 else
3536 {
3537 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3538 }
3539 }
3540
3541 if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3542 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3543 {
3544 allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3545 }
3546
3547 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3548 {
3549 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3550
3551 if (pIn->flags.noMetadata == FALSE)
3552 {
3553 if (pIn->flags.depth &&
3554 pIn->flags.texture &&
3555 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3556 {
3557 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3558 // equation from wrong address within memory range a tile covered and use the
3559 // garbage data for compressed Z reading which finally leads to corruption.
3560 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3561 }
3562
3563 if (m_settings.htileCacheRbConflict &&
3564 (pIn->flags.depth || pIn->flags.stencil) &&
3565 (numSlices > 1) &&
3566 (pIn->flags.metaRbUnaligned == FALSE) &&
3567 (pIn->flags.metaPipeUnaligned == FALSE))
3568 {
3569 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3570 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3571 }
3572 }
3573 }
3574
3575 if (msaa)
3576 {
3577 allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3578 }
3579
3580 if ((numFrags > 1) &&
3581 (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3582 {
3583 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3584 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3585 }
3586
3587 if (numMipLevels > 1)
3588 {
3589 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3590 }
3591
3592 if (displayRsrc)
3593 {
3594 if (m_settings.isDce12)
3595 {
3596 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3597 }
3598 else if (m_settings.isDcn1)
3599 {
3600 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3601 }
3602 else
3603 {
3604 ADDR_NOT_IMPLEMENTED();
3605 }
3606 }
3607
3608 if (allowedSwModeSet.value != 0)
3609 {
3610 #if DEBUG
3611 // Post sanity check, at least AddrLib should accept the output generated by its own
3612 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3613
3614 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3615 {
3616 if (validateSwModeSet & 1)
3617 {
3618 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3619 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3620 }
3621
3622 validateSwModeSet >>= 1;
3623 }
3624 #endif
3625
3626 pOut->validSwModeSet = allowedSwModeSet;
3627 pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3628 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet);
3629 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3630
3631 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3632
3633 if (pOut->clientPreferredSwSet.value == 0)
3634 {
3635 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3636 }
3637
3638 if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3639 {
3640 pOut->swizzleMode = ADDR_SW_LINEAR;
3641 }
3642 else
3643 {
3644 // Always ignore linear swizzle mode if there is other choice.
3645 allowedSwModeSet.swLinear = 0;
3646
3647 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet);
3648
3649 // Determine block size if there is 2 or more block type candidates
3650 if (IsPow2(allowedBlockSet.value) == FALSE)
3651 {
3652 const AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_256B, ADDR_SW_4KB, ADDR_SW_64KB};
3653 Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3654 Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3655 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3656
3657 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3658 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3659 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3660 UINT_32 minSizeBlk = AddrBlockMicro;
3661 UINT_64 minSize = 0;
3662
3663 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3664 {
3665 if (allowedBlockSet.value & (1 << i))
3666 {
3667 ComputeBlockDimensionForSurf(&blkDim[i].w,
3668 &blkDim[i].h,
3669 &blkDim[i].d,
3670 bpp,
3671 numFrags,
3672 pOut->resourceType,
3673 swMode[i]);
3674
3675 if (displayRsrc)
3676 {
3677 blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3678 }
3679
3680 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3681 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
3682
3683 if ((minSize == 0) ||
3684 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3685 {
3686 minSize = padSize[i];
3687 minSizeBlk = i;
3688 }
3689 }
3690 }
3691
3692 if ((allowedBlockSet.micro == TRUE) &&
3693 (width <= blkDim[AddrBlockMicro].w) &&
3694 (height <= blkDim[AddrBlockMicro].h) &&
3695 (NextPow2(pIn->minSizeAlign) <= GetBlockSize(ADDR_SW_256B)))
3696 {
3697 minSizeBlk = AddrBlockMicro;
3698 }
3699
3700 if (minSizeBlk == AddrBlockMicro)
3701 {
3702 allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3703 }
3704 else if (minSizeBlk == AddrBlock4KB)
3705 {
3706 allowedSwModeSet.value &= Gfx9Blk4KBSwModeMask;
3707 }
3708 else
3709 {
3710 ADDR_ASSERT(minSizeBlk == AddrBlock64KB);
3711 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3712 }
3713 }
3714
3715 // Block type should be determined.
3716 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet).value));
3717
3718 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3719
3720 // Determine swizzle type if there is 2 or more swizzle type candidates
3721 if (IsPow2(allowedSwSet.value) == FALSE)
3722 {
3723 if (ElemLib::IsBlockCompressed(pIn->format))
3724 {
3725 if (allowedSwSet.sw_D)
3726 {
3727 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3728 }
3729 else
3730 {
3731 ADDR_ASSERT(allowedSwSet.sw_S);
3732 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3733 }
3734 }
3735 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3736 {
3737 if (allowedSwSet.sw_S)
3738 {
3739 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3740 }
3741 else if (allowedSwSet.sw_D)
3742 {
3743 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3744 }
3745 else
3746 {
3747 ADDR_ASSERT(allowedSwSet.sw_R);
3748 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3749 }
3750 }
3751 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3752 {
3753 if (pIn->flags.color && allowedSwSet.sw_D)
3754 {
3755 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3756 }
3757 else if (allowedSwSet.sw_Z)
3758 {
3759 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3760 }
3761 else
3762 {
3763 ADDR_ASSERT(allowedSwSet.sw_S);
3764 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3765 }
3766 }
3767 else
3768 {
3769 if (pIn->flags.rotated && allowedSwSet.sw_R)
3770 {
3771 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3772 }
3773 else if (displayRsrc && allowedSwSet.sw_D)
3774 {
3775 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3776 }
3777 else if (allowedSwSet.sw_S)
3778 {
3779 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3780 }
3781 else
3782 {
3783 ADDR_ASSERT(allowedSwSet.sw_Z);
3784 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3785 }
3786 }
3787 }
3788
3789 // Swizzle type should be determined.
3790 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3791
3792 // Determine swizzle mode now - always select the "largest" swizzle mode for a given block type +
3793 // swizzle type combination. For example, for AddrBlock64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3794 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3795 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3796 }
3797
3798 returnCode = ADDR_OK;
3799 }
3800 else
3801 {
3802 // Invalid combination...
3803 ADDR_ASSERT_ALWAYS();
3804 }
3805 }
3806 else
3807 {
3808 // Invalid combination...
3809 ADDR_ASSERT_ALWAYS();
3810 }
3811
3812 return returnCode;
3813 }
3814
3815 /**
3816 ************************************************************************************************************************
3817 * Gfx9Lib::ComputeStereoInfo
3818 *
3819 * @brief
3820 * Compute height alignment and right eye pipeBankXor for stereo surface
3821 *
3822 * @return
3823 * Error code
3824 *
3825 ************************************************************************************************************************
3826 */
3827 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3828 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3829 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
3830 UINT_32* pHeightAlign
3831 ) const
3832 {
3833 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3834
3835 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3836
3837 if (eqIndex < m_numEquations)
3838 {
3839 if (IsXor(pIn->swizzleMode))
3840 {
3841 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3842 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
3843 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
3844 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
3845 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3846 MAYBE_UNUSED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3847
3848 ADDR_ASSERT(maxYCoordBlock256 ==
3849 GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
3850
3851 const UINT_32 maxYCoordInBaseEquation =
3852 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
3853
3854 ADDR_ASSERT(maxYCoordInBaseEquation ==
3855 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3856
3857 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3858
3859 ADDR_ASSERT(maxYCoordInPipeXor ==
3860 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3861
3862 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3863 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3864
3865 ADDR_ASSERT(maxYCoordInBankXor ==
3866 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3867
3868 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3869
3870 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3871 {
3872 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3873
3874 if (pOut->pStereoInfo != NULL)
3875 {
3876 pOut->pStereoInfo->rightSwizzle = 0;
3877
3878 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3879 {
3880 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3881 {
3882 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3883 }
3884
3885 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3886 {
3887 pOut->pStereoInfo->rightSwizzle |=
3888 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3889 }
3890
3891 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3892 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3893 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3894 }
3895 }
3896 }
3897 }
3898 }
3899 else
3900 {
3901 ADDR_ASSERT_ALWAYS();
3902 returnCode = ADDR_ERROR;
3903 }
3904
3905 return returnCode;
3906 }
3907
3908 /**
3909 ************************************************************************************************************************
3910 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3911 *
3912 * @brief
3913 * Internal function to calculate alignment for tiled surface
3914 *
3915 * @return
3916 * ADDR_E_RETURNCODE
3917 ************************************************************************************************************************
3918 */
3919 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3920 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3921 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3922 ) const
3923 {
3924 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3925 &pOut->blockHeight,
3926 &pOut->blockSlices,
3927 pIn->bpp,
3928 pIn->numFrags,
3929 pIn->resourceType,
3930 pIn->swizzleMode);
3931
3932 if (returnCode == ADDR_OK)
3933 {
3934 UINT_32 pitchAlignInElement = pOut->blockWidth;
3935
3936 if ((IsTex2d(pIn->resourceType) == TRUE) &&
3937 (pIn->flags.display || pIn->flags.rotated) &&
3938 (pIn->numMipLevels <= 1) &&
3939 (pIn->numSamples <= 1) &&
3940 (pIn->numFrags <= 1))
3941 {
3942 // Display engine needs pitch align to be at least 32 pixels.
3943 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3944 }
3945
3946 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3947
3948 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3949 {
3950 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3951 {
3952 returnCode = ADDR_INVALIDPARAMS;
3953 }
3954 else if (pIn->pitchInElement < pOut->pitch)
3955 {
3956 returnCode = ADDR_INVALIDPARAMS;
3957 }
3958 else
3959 {
3960 pOut->pitch = pIn->pitchInElement;
3961 }
3962 }
3963
3964 UINT_32 heightAlign = 0;
3965
3966 if (pIn->flags.qbStereo)
3967 {
3968 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3969 }
3970
3971 if (returnCode == ADDR_OK)
3972 {
3973 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3974
3975 if (heightAlign > 1)
3976 {
3977 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3978 }
3979
3980 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3981
3982 pOut->epitchIsHeight = FALSE;
3983 pOut->mipChainInTail = FALSE;
3984 pOut->firstMipIdInTail = pIn->numMipLevels;
3985
3986 pOut->mipChainPitch = pOut->pitch;
3987 pOut->mipChainHeight = pOut->height;
3988 pOut->mipChainSlice = pOut->numSlices;
3989
3990 if (pIn->numMipLevels > 1)
3991 {
3992 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
3993 pIn->swizzleMode,
3994 pIn->bpp,
3995 pIn->width,
3996 pIn->height,
3997 pIn->numSlices,
3998 pOut->blockWidth,
3999 pOut->blockHeight,
4000 pOut->blockSlices,
4001 pIn->numMipLevels,
4002 pOut->pMipInfo);
4003
4004 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4005
4006 if (endingMipId == 0)
4007 {
4008 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4009 pIn->swizzleMode,
4010 pOut->blockWidth,
4011 pOut->blockHeight,
4012 pOut->blockSlices);
4013
4014 pOut->epitchIsHeight = TRUE;
4015 pOut->pitch = tailMaxDim.w;
4016 pOut->height = tailMaxDim.h;
4017 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4018 tailMaxDim.d : pIn->numSlices;
4019 pOut->mipChainInTail = TRUE;
4020 }
4021 else
4022 {
4023 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
4024 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4025
4026 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4027 pIn->swizzleMode,
4028 mip0WidthInBlk,
4029 mip0HeightInBlk,
4030 pOut->numSlices / pOut->blockSlices);
4031 if (majorMode == ADDR_MAJOR_Y)
4032 {
4033 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4034
4035 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4036 {
4037 mip1WidthInBlk++;
4038 }
4039
4040 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4041
4042 pOut->epitchIsHeight = FALSE;
4043 }
4044 else
4045 {
4046 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4047
4048 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4049 {
4050 mip1HeightInBlk++;
4051 }
4052
4053 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4054
4055 pOut->epitchIsHeight = TRUE;
4056 }
4057 }
4058
4059 if (pOut->pMipInfo != NULL)
4060 {
4061 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4062
4063 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4064 {
4065 Dim3d mipStartPos = {0};
4066 UINT_32 mipTailOffsetInBytes = 0;
4067
4068 mipStartPos = GetMipStartPos(pIn->resourceType,
4069 pIn->swizzleMode,
4070 pOut->pitch,
4071 pOut->height,
4072 pOut->numSlices,
4073 pOut->blockWidth,
4074 pOut->blockHeight,
4075 pOut->blockSlices,
4076 i,
4077 elementBytesLog2,
4078 &mipTailOffsetInBytes);
4079
4080 UINT_32 pitchInBlock =
4081 pOut->mipChainPitch / pOut->blockWidth;
4082 UINT_32 sliceInBlock =
4083 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4084 UINT_64 blockIndex =
4085 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4086 UINT_64 macroBlockOffset =
4087 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4088
4089 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4090 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
4091 }
4092 }
4093 }
4094 else if (pOut->pMipInfo != NULL)
4095 {
4096 pOut->pMipInfo[0].pitch = pOut->pitch;
4097 pOut->pMipInfo[0].height = pOut->height;
4098 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4099 pOut->pMipInfo[0].offset = 0;
4100 }
4101
4102 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4103 (pIn->bpp >> 3) * pIn->numFrags;
4104 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
4105 pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4106
4107 if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4108 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4109 (pIn->flags.texture == TRUE) &&
4110 (pIn->flags.noMetadata == FALSE) &&
4111 (pIn->flags.metaPipeUnaligned == FALSE))
4112 {
4113 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4114 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4115 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4116 // them, which may cause invalid metadata to be fetched.
4117 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4118 }
4119
4120 if (pIn->flags.prt)
4121 {
4122 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4123 }
4124 }
4125 }
4126
4127 return returnCode;
4128 }
4129
4130 /**
4131 ************************************************************************************************************************
4132 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4133 *
4134 * @brief
4135 * Internal function to calculate alignment for linear surface
4136 *
4137 * @return
4138 * ADDR_E_RETURNCODE
4139 ************************************************************************************************************************
4140 */
4141 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4142 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4143 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4144 ) const
4145 {
4146 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4147 UINT_32 pitch = 0;
4148 UINT_32 actualHeight = 0;
4149 UINT_32 elementBytes = pIn->bpp >> 3;
4150 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4151
4152 if (IsTex1d(pIn->resourceType))
4153 {
4154 if (pIn->height > 1)
4155 {
4156 returnCode = ADDR_INVALIDPARAMS;
4157 }
4158 else
4159 {
4160 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4161
4162 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4163 actualHeight = pIn->numMipLevels;
4164
4165 if (pIn->flags.prt == FALSE)
4166 {
4167 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4168 &pitch, &actualHeight);
4169 }
4170
4171 if (returnCode == ADDR_OK)
4172 {
4173 if (pOut->pMipInfo != NULL)
4174 {
4175 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4176 {
4177 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4178 pOut->pMipInfo[i].pitch = pitch;
4179 pOut->pMipInfo[i].height = 1;
4180 pOut->pMipInfo[i].depth = 1;
4181 }
4182 }
4183 }
4184 }
4185 }
4186 else
4187 {
4188 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4189 }
4190
4191 if ((pitch == 0) || (actualHeight == 0))
4192 {
4193 returnCode = ADDR_INVALIDPARAMS;
4194 }
4195
4196 if (returnCode == ADDR_OK)
4197 {
4198 pOut->pitch = pitch;
4199 pOut->height = pIn->height;
4200 pOut->numSlices = pIn->numSlices;
4201 pOut->mipChainPitch = pitch;
4202 pOut->mipChainHeight = actualHeight;
4203 pOut->mipChainSlice = pOut->numSlices;
4204 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4205 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4206 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4207 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4208 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4209 pOut->blockHeight = 1;
4210 pOut->blockSlices = 1;
4211 }
4212
4213 // Post calculation validate
4214 ADDR_ASSERT(pOut->sliceSize > 0);
4215
4216 return returnCode;
4217 }
4218
4219 /**
4220 ************************************************************************************************************************
4221 * Gfx9Lib::GetMipChainInfo
4222 *
4223 * @brief
4224 * Internal function to get out information about mip chain
4225 *
4226 * @return
4227 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4228 ************************************************************************************************************************
4229 */
4230 UINT_32 Gfx9Lib::GetMipChainInfo(
4231 AddrResourceType resourceType,
4232 AddrSwizzleMode swizzleMode,
4233 UINT_32 bpp,
4234 UINT_32 mip0Width,
4235 UINT_32 mip0Height,
4236 UINT_32 mip0Depth,
4237 UINT_32 blockWidth,
4238 UINT_32 blockHeight,
4239 UINT_32 blockDepth,
4240 UINT_32 numMipLevel,
4241 ADDR2_MIP_INFO* pMipInfo) const
4242 {
4243 const Dim3d tailMaxDim =
4244 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4245
4246 UINT_32 mipPitch = mip0Width;
4247 UINT_32 mipHeight = mip0Height;
4248 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4249 UINT_32 offset = 0;
4250 UINT_32 firstMipIdInTail = numMipLevel;
4251 BOOL_32 inTail = FALSE;
4252 BOOL_32 finalDim = FALSE;
4253 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4254 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4255
4256 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4257 {
4258 if (inTail)
4259 {
4260 if (finalDim == FALSE)
4261 {
4262 UINT_32 mipSize;
4263
4264 if (is3dThick)
4265 {
4266 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4267 }
4268 else
4269 {
4270 mipSize = mipPitch * mipHeight * (bpp >> 3);
4271 }
4272
4273 if (mipSize <= 256)
4274 {
4275 UINT_32 index = Log2(bpp >> 3);
4276
4277 if (is3dThick)
4278 {
4279 mipPitch = Block256_3dZ[index].w;
4280 mipHeight = Block256_3dZ[index].h;
4281 mipDepth = Block256_3dZ[index].d;
4282 }
4283 else
4284 {
4285 mipPitch = Block256_2d[index].w;
4286 mipHeight = Block256_2d[index].h;
4287 }
4288
4289 finalDim = TRUE;
4290 }
4291 }
4292 }
4293 else
4294 {
4295 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4296 mipPitch, mipHeight, mipDepth);
4297
4298 if (inTail)
4299 {
4300 firstMipIdInTail = mipId;
4301 mipPitch = tailMaxDim.w;
4302 mipHeight = tailMaxDim.h;
4303
4304 if (is3dThick)
4305 {
4306 mipDepth = tailMaxDim.d;
4307 }
4308 }
4309 else
4310 {
4311 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4312 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4313
4314 if (is3dThick)
4315 {
4316 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4317 }
4318 }
4319 }
4320
4321 if (pMipInfo != NULL)
4322 {
4323 pMipInfo[mipId].pitch = mipPitch;
4324 pMipInfo[mipId].height = mipHeight;
4325 pMipInfo[mipId].depth = mipDepth;
4326 pMipInfo[mipId].offset = offset;
4327 }
4328
4329 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4330
4331 if (finalDim)
4332 {
4333 if (is3dThin)
4334 {
4335 mipDepth = Max(mipDepth >> 1, 1u);
4336 }
4337 }
4338 else
4339 {
4340 mipPitch = Max(mipPitch >> 1, 1u);
4341 mipHeight = Max(mipHeight >> 1, 1u);
4342
4343 if (is3dThick || is3dThin)
4344 {
4345 mipDepth = Max(mipDepth >> 1, 1u);
4346 }
4347 }
4348 }
4349
4350 return firstMipIdInTail;
4351 }
4352
4353 /**
4354 ************************************************************************************************************************
4355 * Gfx9Lib::GetMetaMiptailInfo
4356 *
4357 * @brief
4358 * Get mip tail coordinate information.
4359 *
4360 * @return
4361 * N/A
4362 ************************************************************************************************************************
4363 */
4364 VOID Gfx9Lib::GetMetaMiptailInfo(
4365 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4366 Dim3d mipCoord, ///< [in] mip tail base coord
4367 UINT_32 numMipInTail, ///< [in] number of mips in tail
4368 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4369 ) const
4370 {
4371 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4372 UINT_32 mipWidth = pMetaBlkDim->w;
4373 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4374 UINT_32 mipDepth = pMetaBlkDim->d;
4375 UINT_32 minInc;
4376
4377 if (isThick)
4378 {
4379 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4380 }
4381 else if (pMetaBlkDim->h >= 1024)
4382 {
4383 minInc = 256;
4384 }
4385 else if (pMetaBlkDim->h == 512)
4386 {
4387 minInc = 128;
4388 }
4389 else
4390 {
4391 minInc = 64;
4392 }
4393
4394 UINT_32 blk32MipId = 0xFFFFFFFF;
4395
4396 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4397 {
4398 pInfo[mip].inMiptail = TRUE;
4399 pInfo[mip].startX = mipCoord.w;
4400 pInfo[mip].startY = mipCoord.h;
4401 pInfo[mip].startZ = mipCoord.d;
4402 pInfo[mip].width = mipWidth;
4403 pInfo[mip].height = mipHeight;
4404 pInfo[mip].depth = mipDepth;
4405
4406 if (mipWidth <= 32)
4407 {
4408 if (blk32MipId == 0xFFFFFFFF)
4409 {
4410 blk32MipId = mip;
4411 }
4412
4413 mipCoord.w = pInfo[blk32MipId].startX;
4414 mipCoord.h = pInfo[blk32MipId].startY;
4415 mipCoord.d = pInfo[blk32MipId].startZ;
4416
4417 switch (mip - blk32MipId)
4418 {
4419 case 0:
4420 mipCoord.w += 32; // 16x16
4421 break;
4422 case 1:
4423 mipCoord.h += 32; // 8x8
4424 break;
4425 case 2:
4426 mipCoord.h += 32; // 4x4
4427 mipCoord.w += 16;
4428 break;
4429 case 3:
4430 mipCoord.h += 32; // 2x2
4431 mipCoord.w += 32;
4432 break;
4433 case 4:
4434 mipCoord.h += 32; // 1x1
4435 mipCoord.w += 48;
4436 break;
4437 // The following are for BC/ASTC formats
4438 case 5:
4439 mipCoord.h += 48; // 1/2 x 1/2
4440 break;
4441 case 6:
4442 mipCoord.h += 48; // 1/4 x 1/4
4443 mipCoord.w += 16;
4444 break;
4445 case 7:
4446 mipCoord.h += 48; // 1/8 x 1/8
4447 mipCoord.w += 32;
4448 break;
4449 case 8:
4450 mipCoord.h += 48; // 1/16 x 1/16
4451 mipCoord.w += 48;
4452 break;
4453 default:
4454 ADDR_ASSERT_ALWAYS();
4455 break;
4456 }
4457
4458 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4459 mipHeight = mipWidth;
4460
4461 if (isThick)
4462 {
4463 mipDepth = mipWidth;
4464 }
4465 }
4466 else
4467 {
4468 if (mipWidth <= minInc)
4469 {
4470 // if we're below the minimal increment...
4471 if (isThick)
4472 {
4473 // For 3d, just go in z direction
4474 mipCoord.d += mipDepth;
4475 }
4476 else
4477 {
4478 // For 2d, first go across, then down
4479 if ((mipWidth * 2) == minInc)
4480 {
4481 // if we're 2 mips below, that's when we go back in x, and down in y
4482 mipCoord.w -= minInc;
4483 mipCoord.h += minInc;
4484 }
4485 else
4486 {
4487 // otherwise, just go across in x
4488 mipCoord.w += minInc;
4489 }
4490 }
4491 }
4492 else
4493 {
4494 // On even mip, go down, otherwise, go across
4495 if (mip & 1)
4496 {
4497 mipCoord.w += mipWidth;
4498 }
4499 else
4500 {
4501 mipCoord.h += mipHeight;
4502 }
4503 }
4504 // Divide the width by 2
4505 mipWidth >>= 1;
4506 // After the first mip in tail, the mip is always a square
4507 mipHeight = mipWidth;
4508 // ...or for 3d, a cube
4509 if (isThick)
4510 {
4511 mipDepth = mipWidth;
4512 }
4513 }
4514 }
4515 }
4516
4517 /**
4518 ************************************************************************************************************************
4519 * Gfx9Lib::GetMipStartPos
4520 *
4521 * @brief
4522 * Internal function to get out information about mip logical start position
4523 *
4524 * @return
4525 * logical start position in macro block width/heith/depth of one mip level within one slice
4526 ************************************************************************************************************************
4527 */
4528 Dim3d Gfx9Lib::GetMipStartPos(
4529 AddrResourceType resourceType,
4530 AddrSwizzleMode swizzleMode,
4531 UINT_32 width,
4532 UINT_32 height,
4533 UINT_32 depth,
4534 UINT_32 blockWidth,
4535 UINT_32 blockHeight,
4536 UINT_32 blockDepth,
4537 UINT_32 mipId,
4538 UINT_32 log2ElementBytes,
4539 UINT_32* pMipTailBytesOffset) const
4540 {
4541 Dim3d mipStartPos = {0};
4542 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4543
4544 // Report mip in tail if Mip0 is already in mip tail
4545 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4546 UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
4547 UINT_32 mipIndexInTail = mipId;
4548
4549 if (inMipTail == FALSE)
4550 {
4551 // Mip 0 dimension, unit in block
4552 UINT_32 mipWidthInBlk = width / blockWidth;
4553 UINT_32 mipHeightInBlk = height / blockHeight;
4554 UINT_32 mipDepthInBlk = depth / blockDepth;
4555 AddrMajorMode majorMode = GetMajorMode(resourceType,
4556 swizzleMode,
4557 mipWidthInBlk,
4558 mipHeightInBlk,
4559 mipDepthInBlk);
4560
4561 UINT_32 endingMip = mipId + 1;
4562
4563 for (UINT_32 i = 1; i <= mipId; i++)
4564 {
4565 if ((i == 1) || (i == 3))
4566 {
4567 if (majorMode == ADDR_MAJOR_Y)
4568 {
4569 mipStartPos.w += mipWidthInBlk;
4570 }
4571 else
4572 {
4573 mipStartPos.h += mipHeightInBlk;
4574 }
4575 }
4576 else
4577 {
4578 if (majorMode == ADDR_MAJOR_X)
4579 {
4580 mipStartPos.w += mipWidthInBlk;
4581 }
4582 else if (majorMode == ADDR_MAJOR_Y)
4583 {
4584 mipStartPos.h += mipHeightInBlk;
4585 }
4586 else
4587 {
4588 mipStartPos.d += mipDepthInBlk;
4589 }
4590 }
4591
4592 BOOL_32 inTail = FALSE;
4593
4594 if (IsThick(resourceType, swizzleMode))
4595 {
4596 UINT_32 dim = log2blkSize % 3;
4597
4598 if (dim == 0)
4599 {
4600 inTail =
4601 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4602 }
4603 else if (dim == 1)
4604 {
4605 inTail =
4606 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4607 }
4608 else
4609 {
4610 inTail =
4611 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4612 }
4613 }
4614 else
4615 {
4616 if (log2blkSize & 1)
4617 {
4618 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4619 }
4620 else
4621 {
4622 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4623 }
4624 }
4625
4626 if (inTail)
4627 {
4628 endingMip = i;
4629 break;
4630 }
4631
4632 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4633 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4634 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4635 }
4636
4637 if (mipId >= endingMip)
4638 {
4639 inMipTail = TRUE;
4640 mipIndexInTail = mipId - endingMip;
4641 }
4642 }
4643
4644 if (inMipTail)
4645 {
4646 UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
4647 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4648 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4649 }
4650
4651 return mipStartPos;
4652 }
4653
4654 /**
4655 ************************************************************************************************************************
4656 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4657 *
4658 * @brief
4659 * Internal function to calculate address from coord for tiled swizzle surface
4660 *
4661 * @return
4662 * ADDR_E_RETURNCODE
4663 ************************************************************************************************************************
4664 */
4665 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4666 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4667 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4668 ) const
4669 {
4670 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4671 localIn.swizzleMode = pIn->swizzleMode;
4672 localIn.flags = pIn->flags;
4673 localIn.resourceType = pIn->resourceType;
4674 localIn.bpp = pIn->bpp;
4675 localIn.width = Max(pIn->unalignedWidth, 1u);
4676 localIn.height = Max(pIn->unalignedHeight, 1u);
4677 localIn.numSlices = Max(pIn->numSlices, 1u);
4678 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4679 localIn.numSamples = Max(pIn->numSamples, 1u);
4680 localIn.numFrags = Max(pIn->numFrags, 1u);
4681 if (localIn.numMipLevels <= 1)
4682 {
4683 localIn.pitchInElement = pIn->pitchInElement;
4684 }
4685
4686 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4687 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4688
4689 BOOL_32 valid = (returnCode == ADDR_OK) &&
4690 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4691 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4692 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4693
4694 if (valid)
4695 {
4696 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4697 Dim3d mipStartPos = {0};
4698 UINT_32 mipTailBytesOffset = 0;
4699
4700 if (pIn->numMipLevels > 1)
4701 {
4702 // Mip-map chain cannot be MSAA surface
4703 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4704
4705 mipStartPos = GetMipStartPos(pIn->resourceType,
4706 pIn->swizzleMode,
4707 localOut.pitch,
4708 localOut.height,
4709 localOut.numSlices,
4710 localOut.blockWidth,
4711 localOut.blockHeight,
4712 localOut.blockSlices,
4713 pIn->mipId,
4714 log2ElementBytes,
4715 &mipTailBytesOffset);
4716 }
4717
4718 UINT_32 interleaveOffset = 0;
4719 UINT_32 pipeBits = 0;
4720 UINT_32 pipeXor = 0;
4721 UINT_32 bankBits = 0;
4722 UINT_32 bankXor = 0;
4723
4724 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4725 {
4726 UINT_32 blockOffset = 0;
4727 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4728
4729 if (IsZOrderSwizzle(pIn->swizzleMode))
4730 {
4731 // Morton generation
4732 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4733 {
4734 UINT_32 totalLowBits = 6 - log2ElementBytes;
4735 UINT_32 mortBits = totalLowBits / 2;
4736 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4737 // Are 9 bits enough?
4738 UINT_32 highBitsValue =
4739 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4740 blockOffset = lowBitsValue | highBitsValue;
4741 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4742 }
4743 else
4744 {
4745 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4746 }
4747
4748 // Fill LSBs with sample bits
4749 if (pIn->numSamples > 1)
4750 {
4751 blockOffset *= pIn->numSamples;
4752 blockOffset |= pIn->sample;
4753 }
4754
4755 // Shift according to BytesPP
4756 blockOffset <<= log2ElementBytes;
4757 }
4758 else
4759 {
4760 // Micro block offset
4761 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4762 blockOffset = microBlockOffset;
4763
4764 // Micro block dimension
4765 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4766 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4767 // Morton generation, does 12 bit enough?
4768 blockOffset |=
4769 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4770
4771 // Sample bits start location
4772 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
4773 // Join sample bits information to the highest Macro block bits
4774 if (IsNonPrtXor(pIn->swizzleMode))
4775 {
4776 // Non-prt-Xor : xor highest Macro block bits with sample bits
4777 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4778 }
4779 else
4780 {
4781 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4782 // after this op, the blockOffset only contains log2 Macro block size bits
4783 blockOffset %= (1 << sampleStart);
4784 blockOffset |= (pIn->sample << sampleStart);
4785 ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
4786 }
4787 }
4788
4789 if (IsXor(pIn->swizzleMode))
4790 {
4791 // Mask off bits above Macro block bits to keep page synonyms working for prt
4792 if (IsPrt(pIn->swizzleMode))
4793 {
4794 blockOffset &= ((1 << log2blkSize) - 1);
4795 }
4796
4797 // Preserve offset inside pipe interleave
4798 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4799 blockOffset >>= m_pipeInterleaveLog2;
4800
4801 // Pipe/Se xor bits
4802 pipeBits = GetPipeXorBits(log2blkSize);
4803 // Pipe xor
4804 pipeXor = FoldXor2d(blockOffset, pipeBits);
4805 blockOffset >>= pipeBits;
4806
4807 // Bank xor bits
4808 bankBits = GetBankXorBits(log2blkSize);
4809 // Bank Xor
4810 bankXor = FoldXor2d(blockOffset, bankBits);
4811 blockOffset >>= bankBits;
4812
4813 // Put all the part back together
4814 blockOffset <<= bankBits;
4815 blockOffset |= bankXor;
4816 blockOffset <<= pipeBits;
4817 blockOffset |= pipeXor;
4818 blockOffset <<= m_pipeInterleaveLog2;
4819 blockOffset |= interleaveOffset;
4820 }
4821
4822 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4823 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4824
4825 blockOffset |= mipTailBytesOffset;
4826
4827 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4828 {
4829 // Apply slice xor if not MSAA/PRT
4830 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4831 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4832 (m_pipeInterleaveLog2 + pipeBits));
4833 }
4834
4835 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4836 bankBits, pipeBits, &blockOffset);
4837
4838 blockOffset %= (1 << log2blkSize);
4839
4840 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4841 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4842 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4843 UINT_64 macroBlockIndex =
4844 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4845 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4846 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4847
4848 pOut->addr = blockOffset | (macroBlockIndex << log2blkSize);
4849 }
4850 else
4851 {
4852 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4853
4854 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4855
4856 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4857 (pIn->y / microBlockDim.h),
4858 (pIn->slice / microBlockDim.d),
4859 8);
4860
4861 blockOffset <<= 10;
4862 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4863
4864 if (IsXor(pIn->swizzleMode))
4865 {
4866 // Mask off bits above Macro block bits to keep page synonyms working for prt
4867 if (IsPrt(pIn->swizzleMode))
4868 {
4869 blockOffset &= ((1 << log2blkSize) - 1);
4870 }
4871
4872 // Preserve offset inside pipe interleave
4873 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4874 blockOffset >>= m_pipeInterleaveLog2;
4875
4876 // Pipe/Se xor bits
4877 pipeBits = GetPipeXorBits(log2blkSize);
4878 // Pipe xor
4879 pipeXor = FoldXor3d(blockOffset, pipeBits);
4880 blockOffset >>= pipeBits;
4881
4882 // Bank xor bits
4883 bankBits = GetBankXorBits(log2blkSize);
4884 // Bank Xor
4885 bankXor = FoldXor3d(blockOffset, bankBits);
4886 blockOffset >>= bankBits;
4887
4888 // Put all the part back together
4889 blockOffset <<= bankBits;
4890 blockOffset |= bankXor;
4891 blockOffset <<= pipeBits;
4892 blockOffset |= pipeXor;
4893 blockOffset <<= m_pipeInterleaveLog2;
4894 blockOffset |= interleaveOffset;
4895 }
4896
4897 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4898 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4899 blockOffset |= mipTailBytesOffset;
4900
4901 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4902 bankBits, pipeBits, &blockOffset);
4903
4904 blockOffset %= (1 << log2blkSize);
4905
4906 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
4907 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4908 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4909
4910 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4911 UINT_32 sliceSizeInBlock =
4912 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4913 UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4914
4915 pOut->addr = blockOffset | (blockIndex << log2blkSize);
4916 }
4917 }
4918 else
4919 {
4920 returnCode = ADDR_INVALIDPARAMS;
4921 }
4922
4923 return returnCode;
4924 }
4925
4926 /**
4927 ************************************************************************************************************************
4928 * Gfx9Lib::ComputeSurfaceInfoLinear
4929 *
4930 * @brief
4931 * Internal function to calculate padding for linear swizzle 2D/3D surface
4932 *
4933 * @return
4934 * N/A
4935 ************************************************************************************************************************
4936 */
4937 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4938 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
4939 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
4940 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
4941 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
4942 ) const
4943 {
4944 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4945
4946 UINT_32 elementBytes = pIn->bpp >> 3;
4947 UINT_32 pitchAlignInElement = 0;
4948
4949 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4950 {
4951 ADDR_ASSERT(pIn->numMipLevels <= 1);
4952 ADDR_ASSERT(pIn->numSlices <= 1);
4953 pitchAlignInElement = 1;
4954 }
4955 else
4956 {
4957 pitchAlignInElement = (256 / elementBytes);
4958 }
4959
4960 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
4961 UINT_32 slice0PaddedHeight = pIn->height;
4962
4963 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4964 &mipChainWidth, &slice0PaddedHeight);
4965
4966 if (returnCode == ADDR_OK)
4967 {
4968 UINT_32 mipChainHeight = 0;
4969 UINT_32 mipHeight = pIn->height;
4970 UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4971
4972 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4973 {
4974 if (pMipInfo != NULL)
4975 {
4976 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4977 pMipInfo[i].pitch = mipChainWidth;
4978 pMipInfo[i].height = mipHeight;
4979 pMipInfo[i].depth = mipDepth;
4980 }
4981
4982 mipChainHeight += mipHeight;
4983 mipHeight = RoundHalf(mipHeight);
4984 mipHeight = Max(mipHeight, 1u);
4985 }
4986
4987 *pMipmap0PaddedWidth = mipChainWidth;
4988 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
4989 }
4990
4991 return returnCode;
4992 }
4993
4994 } // V2
4995 } // Addr