amd/addrlib: update Mesa's copy of addrlib
[mesa.git] / src / amd / addrlib / src / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2007-2018 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 #include "util/macros.h"
41
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
44
45 namespace Addr
46 {
47
48 /**
49 ************************************************************************************************************************
50 * Gfx9HwlInit
51 *
52 * @brief
53 * Creates an Gfx9Lib object.
54 *
55 * @return
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
58 */
59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
60 {
61 return V2::Gfx9Lib::CreateObj(pClient);
62 }
63
64 namespace V2
65 {
66
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
70
71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
77
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
82
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
87
88 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
89 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
90 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
91 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
92
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
97
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
102
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
107
108 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
109 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
110 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
111 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
113 };
114
115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
116 8, 6, 5, 4, 3, 2, 1, 0};
117
118 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
119
120 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
121
122 /**
123 ************************************************************************************************************************
124 * Gfx9Lib::Gfx9Lib
125 *
126 * @brief
127 * Constructor
128 *
129 ************************************************************************************************************************
130 */
131 Gfx9Lib::Gfx9Lib(const Client* pClient)
132 :
133 Lib(pClient),
134 m_numEquations(0)
135 {
136 m_class = AI_ADDRLIB;
137 memset(&m_settings, 0, sizeof(m_settings));
138 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
139 }
140
141 /**
142 ************************************************************************************************************************
143 * Gfx9Lib::~Gfx9Lib
144 *
145 * @brief
146 * Destructor
147 ************************************************************************************************************************
148 */
149 Gfx9Lib::~Gfx9Lib()
150 {
151 }
152
153 /**
154 ************************************************************************************************************************
155 * Gfx9Lib::HwlComputeHtileInfo
156 *
157 * @brief
158 * Interface function stub of AddrComputeHtilenfo
159 *
160 * @return
161 * ADDR_E_RETURNCODE
162 ************************************************************************************************************************
163 */
164 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
165 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
166 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
167 ) const
168 {
169 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
170 pIn->swizzleMode);
171
172 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
173
174 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
175
176 if ((numPipeTotal == 1) && (numRbTotal == 1))
177 {
178 numCompressBlkPerMetaBlkLog2 = 10;
179 }
180 else
181 {
182 if (m_settings.applyAliasFix)
183 {
184 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
185 }
186 else
187 {
188 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
189 }
190 }
191
192 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
193
194 Dim3d metaBlkDim = {8, 8, 1};
195 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
196 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
197 UINT_32 heightAmp = totalAmpBits - widthAmp;
198 metaBlkDim.w <<= widthAmp;
199 metaBlkDim.h <<= heightAmp;
200
201 #if DEBUG
202 Dim3d metaBlkDimDbg = {8, 8, 1};
203 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
204 {
205 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
206 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
207 {
208 metaBlkDimDbg.h <<= 1;
209 }
210 else
211 {
212 metaBlkDimDbg.w <<= 1;
213 }
214 }
215 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
216 #endif
217
218 UINT_32 numMetaBlkX;
219 UINT_32 numMetaBlkY;
220 UINT_32 numMetaBlkZ;
221
222 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
223 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
224 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
225
226 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
227 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
228
229 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
230 {
231 align *= (numPipeTotal >> 1);
232 }
233
234 align = Max(align, metaBlkSize);
235
236 if (m_settings.metaBaseAlignFix)
237 {
238 align = Max(align, GetBlockSize(pIn->swizzleMode));
239 }
240
241 if (m_settings.htileAlignFix)
242 {
243 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
244 const INT_32 htileCachelineSizeLog2 = 11;
245 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
246
247 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
248
249 align <<= rbMaskPadding;
250 }
251
252 pOut->pitch = numMetaBlkX * metaBlkDim.w;
253 pOut->height = numMetaBlkY * metaBlkDim.h;
254 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
255
256 pOut->metaBlkWidth = metaBlkDim.w;
257 pOut->metaBlkHeight = metaBlkDim.h;
258 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
259
260 pOut->baseAlign = align;
261 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
262
263 return ADDR_OK;
264 }
265
266 /**
267 ************************************************************************************************************************
268 * Gfx9Lib::HwlComputeCmaskInfo
269 *
270 * @brief
271 * Interface function stub of AddrComputeCmaskInfo
272 *
273 * @return
274 * ADDR_E_RETURNCODE
275 ************************************************************************************************************************
276 */
277 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
278 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
279 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
280 ) const
281 {
282 // TODO: Clarify with AddrLib team
283 // ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
284
285 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
286 pIn->swizzleMode);
287
288 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
289
290 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
291
292 if ((numPipeTotal == 1) && (numRbTotal == 1))
293 {
294 numCompressBlkPerMetaBlkLog2 = 13;
295 }
296 else
297 {
298 if (m_settings.applyAliasFix)
299 {
300 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
301 }
302 else
303 {
304 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
305 }
306
307 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
308 }
309
310 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
311
312 Dim2d metaBlkDim = {8, 8};
313 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
314 UINT_32 heightAmp = totalAmpBits >> 1;
315 UINT_32 widthAmp = totalAmpBits - heightAmp;
316 metaBlkDim.w <<= widthAmp;
317 metaBlkDim.h <<= heightAmp;
318
319 #if DEBUG
320 Dim2d metaBlkDimDbg = {8, 8};
321 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
322 {
323 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
324 {
325 metaBlkDimDbg.h <<= 1;
326 }
327 else
328 {
329 metaBlkDimDbg.w <<= 1;
330 }
331 }
332 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
333 #endif
334
335 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
336 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
337 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
338
339 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
340
341 if (m_settings.metaBaseAlignFix)
342 {
343 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
344 }
345
346 pOut->pitch = numMetaBlkX * metaBlkDim.w;
347 pOut->height = numMetaBlkY * metaBlkDim.h;
348 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
349 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
350 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
351
352 pOut->metaBlkWidth = metaBlkDim.w;
353 pOut->metaBlkHeight = metaBlkDim.h;
354
355 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
356
357 return ADDR_OK;
358 }
359
360 /**
361 ************************************************************************************************************************
362 * Gfx9Lib::GetMetaMipInfo
363 *
364 * @brief
365 * Get meta mip info
366 *
367 * @return
368 * N/A
369 ************************************************************************************************************************
370 */
371 VOID Gfx9Lib::GetMetaMipInfo(
372 UINT_32 numMipLevels, ///< [in] number of mip levels
373 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
374 BOOL_32 dataThick, ///< [in] data surface is thick
375 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
376 UINT_32 mip0Width, ///< [in] mip0 width
377 UINT_32 mip0Height, ///< [in] mip0 height
378 UINT_32 mip0Depth, ///< [in] mip0 depth
379 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
380 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
381 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
382 const
383 {
384 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
385 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
386 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
387 UINT_32 tailWidth = pMetaBlkDim->w;
388 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
389 UINT_32 tailDepth = pMetaBlkDim->d;
390 BOOL_32 inTail = FALSE;
391 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
392
393 if (numMipLevels > 1)
394 {
395 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
396 {
397 // Z major
398 major = ADDR_MAJOR_Z;
399 }
400 else if (numMetaBlkX >= numMetaBlkY)
401 {
402 // X major
403 major = ADDR_MAJOR_X;
404 }
405 else
406 {
407 // Y major
408 major = ADDR_MAJOR_Y;
409 }
410
411 inTail = ((mip0Width <= tailWidth) &&
412 (mip0Height <= tailHeight) &&
413 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
414
415 if (inTail == FALSE)
416 {
417 UINT_32 orderLimit;
418 UINT_32 *pMipDim;
419 UINT_32 *pOrderDim;
420
421 if (major == ADDR_MAJOR_Z)
422 {
423 // Z major
424 pMipDim = &numMetaBlkY;
425 pOrderDim = &numMetaBlkZ;
426 orderLimit = 4;
427 }
428 else if (major == ADDR_MAJOR_X)
429 {
430 // X major
431 pMipDim = &numMetaBlkY;
432 pOrderDim = &numMetaBlkX;
433 orderLimit = 4;
434 }
435 else
436 {
437 // Y major
438 pMipDim = &numMetaBlkX;
439 pOrderDim = &numMetaBlkY;
440 orderLimit = 2;
441 }
442
443 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
444 {
445 *pMipDim += 2;
446 }
447 else
448 {
449 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
450 }
451 }
452 }
453
454 if (pInfo != NULL)
455 {
456 UINT_32 mipWidth = mip0Width;
457 UINT_32 mipHeight = mip0Height;
458 UINT_32 mipDepth = mip0Depth;
459 Dim3d mipCoord = {0};
460
461 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
462 {
463 if (inTail)
464 {
465 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
466 pMetaBlkDim);
467 break;
468 }
469 else
470 {
471 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
472 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
473 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
474
475 pInfo[mip].inMiptail = FALSE;
476 pInfo[mip].startX = mipCoord.w;
477 pInfo[mip].startY = mipCoord.h;
478 pInfo[mip].startZ = mipCoord.d;
479 pInfo[mip].width = mipWidth;
480 pInfo[mip].height = mipHeight;
481 pInfo[mip].depth = dataThick ? mipDepth : 1;
482
483 if ((mip >= 3) || (mip & 1))
484 {
485 switch (major)
486 {
487 case ADDR_MAJOR_X:
488 mipCoord.w += mipWidth;
489 break;
490 case ADDR_MAJOR_Y:
491 mipCoord.h += mipHeight;
492 break;
493 case ADDR_MAJOR_Z:
494 mipCoord.d += mipDepth;
495 break;
496 default:
497 break;
498 }
499 }
500 else
501 {
502 switch (major)
503 {
504 case ADDR_MAJOR_X:
505 mipCoord.h += mipHeight;
506 break;
507 case ADDR_MAJOR_Y:
508 mipCoord.w += mipWidth;
509 break;
510 case ADDR_MAJOR_Z:
511 mipCoord.h += mipHeight;
512 break;
513 default:
514 break;
515 }
516 }
517
518 mipWidth = Max(mipWidth >> 1, 1u);
519 mipHeight = Max(mipHeight >> 1, 1u);
520 mipDepth = Max(mipDepth >> 1, 1u);
521
522 inTail = ((mipWidth <= tailWidth) &&
523 (mipHeight <= tailHeight) &&
524 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
525 }
526 }
527 }
528
529 *pNumMetaBlkX = numMetaBlkX;
530 *pNumMetaBlkY = numMetaBlkY;
531 *pNumMetaBlkZ = numMetaBlkZ;
532 }
533
534 /**
535 ************************************************************************************************************************
536 * Gfx9Lib::HwlComputeDccInfo
537 *
538 * @brief
539 * Interface function to compute DCC key info
540 *
541 * @return
542 * ADDR_E_RETURNCODE
543 ************************************************************************************************************************
544 */
545 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
546 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
547 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
548 ) const
549 {
550 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
551 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
552 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
553
554 if (dataLinear)
555 {
556 metaLinear = TRUE;
557 }
558 else if (metaLinear == TRUE)
559 {
560 pipeAligned = FALSE;
561 }
562
563 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
564
565 if (metaLinear)
566 {
567 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
568 ADDR_ASSERT_ALWAYS();
569
570 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
571 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
572 }
573 else
574 {
575 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
576
577 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
578
579 UINT_32 numFrags = Max(pIn->numFrags, 1u);
580 UINT_32 numSlices = Max(pIn->numSlices, 1u);
581
582 minMetaBlkSize /= numFrags;
583
584 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
585
586 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
587
588 if ((numPipeTotal > 1) || (numRbTotal > 1))
589 {
590 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
591
592 numCompressBlkPerMetaBlk =
593 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
594
595 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
596 {
597 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
598 }
599 }
600
601 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
602 Dim3d metaBlkDim = compressBlkDim;
603
604 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
605 {
606 if ((metaBlkDim.h < metaBlkDim.w) ||
607 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
608 {
609 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
610 {
611 metaBlkDim.h <<= 1;
612 }
613 else
614 {
615 metaBlkDim.d <<= 1;
616 }
617 }
618 else
619 {
620 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
621 {
622 metaBlkDim.w <<= 1;
623 }
624 else
625 {
626 metaBlkDim.d <<= 1;
627 }
628 }
629 }
630
631 UINT_32 numMetaBlkX;
632 UINT_32 numMetaBlkY;
633 UINT_32 numMetaBlkZ;
634
635 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
636 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
637 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
638
639 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
640
641 if (numFrags > m_maxCompFrag)
642 {
643 sizeAlign *= (numFrags / m_maxCompFrag);
644 }
645
646 if (m_settings.metaBaseAlignFix)
647 {
648 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
649 }
650
651 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
652 numCompressBlkPerMetaBlk * numFrags;
653 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
654 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
655
656 pOut->pitch = numMetaBlkX * metaBlkDim.w;
657 pOut->height = numMetaBlkY * metaBlkDim.h;
658 pOut->depth = numMetaBlkZ * metaBlkDim.d;
659
660 pOut->compressBlkWidth = compressBlkDim.w;
661 pOut->compressBlkHeight = compressBlkDim.h;
662 pOut->compressBlkDepth = compressBlkDim.d;
663
664 pOut->metaBlkWidth = metaBlkDim.w;
665 pOut->metaBlkHeight = metaBlkDim.h;
666 pOut->metaBlkDepth = metaBlkDim.d;
667
668 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
669 pOut->fastClearSizePerSlice =
670 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
671 }
672
673 return ADDR_OK;
674 }
675
676 /**
677 ************************************************************************************************************************
678 * Gfx9Lib::HwlComputeMaxBaseAlignments
679 *
680 * @brief
681 * Gets maximum alignments
682 * @return
683 * maximum alignments
684 ************************************************************************************************************************
685 */
686 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
687 {
688 return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
689 }
690
691 /**
692 ************************************************************************************************************************
693 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
694 *
695 * @brief
696 * Gets maximum alignments for metadata
697 * @return
698 * maximum alignments for metadata
699 ************************************************************************************************************************
700 */
701 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
702 {
703 // Max base alignment for Htile
704 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
705 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
706
707 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
708 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
709 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
710 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
711
712 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
713
714 if (maxNumPipeTotal > 2)
715 {
716 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
717 }
718
719 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
720
721 if (m_settings.metaBaseAlignFix)
722 {
723 maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
724 }
725
726 if (m_settings.htileAlignFix)
727 {
728 maxBaseAlignHtile *= maxNumPipeTotal;
729 }
730
731 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
732
733 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
734 UINT_32 maxBaseAlignDcc3D = 65536;
735
736 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
737 {
738 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
739 }
740
741 // Max base alignment for Msaa Dcc
742 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
743
744 if (m_settings.metaBaseAlignFix)
745 {
746 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
747 }
748
749 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
750 }
751
752 /**
753 ************************************************************************************************************************
754 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
755 *
756 * @brief
757 * Interface function stub of AddrComputeCmaskAddrFromCoord
758 *
759 * @return
760 * ADDR_E_RETURNCODE
761 ************************************************************************************************************************
762 */
763 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
764 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
765 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
766 {
767 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
768 input.size = sizeof(input);
769 input.cMaskFlags = pIn->cMaskFlags;
770 input.colorFlags = pIn->colorFlags;
771 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
772 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
773 input.numSlices = Max(pIn->numSlices, 1u);
774 input.swizzleMode = pIn->swizzleMode;
775 input.resourceType = pIn->resourceType;
776
777 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
778 output.size = sizeof(output);
779
780 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
781
782 if (returnCode == ADDR_OK)
783 {
784 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
785 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
786 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
787 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
788
789 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
790 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
791 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
792
793 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
794
795 UINT_32 xb = pIn->x / output.metaBlkWidth;
796 UINT_32 yb = pIn->y / output.metaBlkHeight;
797 UINT_32 zb = pIn->slice;
798
799 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
800 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
801 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
802
803 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
804
805 pOut->addr = address >> 1;
806 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
807
808 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
809 pIn->swizzleMode);
810
811 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
812
813 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
814 }
815
816 return returnCode;
817 }
818
819 /**
820 ************************************************************************************************************************
821 * Gfx9Lib::HwlComputeHtileAddrFromCoord
822 *
823 * @brief
824 * Interface function stub of AddrComputeHtileAddrFromCoord
825 *
826 * @return
827 * ADDR_E_RETURNCODE
828 ************************************************************************************************************************
829 */
830 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
831 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
832 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
833 {
834 ADDR_E_RETURNCODE returnCode = ADDR_OK;
835
836 if (pIn->numMipLevels > 1)
837 {
838 returnCode = ADDR_NOTIMPLEMENTED;
839 }
840 else
841 {
842 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
843 input.size = sizeof(input);
844 input.hTileFlags = pIn->hTileFlags;
845 input.depthFlags = pIn->depthflags;
846 input.swizzleMode = pIn->swizzleMode;
847 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
848 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
849 input.numSlices = Max(pIn->numSlices, 1u);
850 input.numMipLevels = Max(pIn->numMipLevels, 1u);
851
852 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
853 output.size = sizeof(output);
854
855 returnCode = ComputeHtileInfo(&input, &output);
856
857 if (returnCode == ADDR_OK)
858 {
859 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
860 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
861 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
862 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
863
864 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
865 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
866 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
867
868 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
869
870 UINT_32 xb = pIn->x / output.metaBlkWidth;
871 UINT_32 yb = pIn->y / output.metaBlkHeight;
872 UINT_32 zb = pIn->slice;
873
874 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
875 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
876 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
877
878 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
879
880 pOut->addr = address >> 1;
881
882 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
883 pIn->swizzleMode);
884
885 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
886
887 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
888 }
889 }
890
891 return returnCode;
892 }
893
894 /**
895 ************************************************************************************************************************
896 * Gfx9Lib::HwlComputeHtileCoordFromAddr
897 *
898 * @brief
899 * Interface function stub of AddrComputeHtileCoordFromAddr
900 *
901 * @return
902 * ADDR_E_RETURNCODE
903 ************************************************************************************************************************
904 */
905 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
906 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
907 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
908 {
909 ADDR_E_RETURNCODE returnCode = ADDR_OK;
910
911 if (pIn->numMipLevels > 1)
912 {
913 returnCode = ADDR_NOTIMPLEMENTED;
914 }
915 else
916 {
917 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
918 input.size = sizeof(input);
919 input.hTileFlags = pIn->hTileFlags;
920 input.swizzleMode = pIn->swizzleMode;
921 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
922 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
923 input.numSlices = Max(pIn->numSlices, 1u);
924 input.numMipLevels = Max(pIn->numMipLevels, 1u);
925
926 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
927 output.size = sizeof(output);
928
929 returnCode = ComputeHtileInfo(&input, &output);
930
931 if (returnCode == ADDR_OK)
932 {
933 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
934 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
935 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
936 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
937
938 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
939 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
940 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
941
942 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
943
944 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
945 pIn->swizzleMode);
946
947 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
948
949 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
950
951 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
952 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
953
954 UINT_32 x, y, z, s, m;
955 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
956
957 pOut->slice = m / sliceSizeInBlock;
958 pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
959 pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x;
960 }
961 }
962
963 return returnCode;
964 }
965
966 /**
967 ************************************************************************************************************************
968 * Gfx9Lib::HwlComputeDccAddrFromCoord
969 *
970 * @brief
971 * Interface function stub of AddrComputeDccAddrFromCoord
972 *
973 * @return
974 * ADDR_E_RETURNCODE
975 ************************************************************************************************************************
976 */
977 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
978 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
979 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
980 {
981 ADDR_E_RETURNCODE returnCode = ADDR_OK;
982
983 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
984 {
985 returnCode = ADDR_NOTIMPLEMENTED;
986 }
987 else
988 {
989 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
990 input.size = sizeof(input);
991 input.dccKeyFlags = pIn->dccKeyFlags;
992 input.colorFlags = pIn->colorFlags;
993 input.swizzleMode = pIn->swizzleMode;
994 input.resourceType = pIn->resourceType;
995 input.bpp = pIn->bpp;
996 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
997 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
998 input.numSlices = Max(pIn->numSlices, 1u);
999 input.numFrags = Max(pIn->numFrags, 1u);
1000 input.numMipLevels = Max(pIn->numMipLevels, 1u);
1001
1002 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
1003 output.size = sizeof(output);
1004
1005 returnCode = ComputeDccInfo(&input, &output);
1006
1007 if (returnCode == ADDR_OK)
1008 {
1009 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1010 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
1011 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
1012 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1013 UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
1014 UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
1015 UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
1016 UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
1017
1018 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1019 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1020 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1021 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1022
1023 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1024
1025 UINT_32 xb = pIn->x / output.metaBlkWidth;
1026 UINT_32 yb = pIn->y / output.metaBlkHeight;
1027 UINT_32 zb = pIn->slice / output.metaBlkDepth;
1028
1029 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
1030 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1031 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1032
1033 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
1034
1035 pOut->addr = address >> 1;
1036
1037 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1038 pIn->swizzleMode);
1039
1040 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1041
1042 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1043 }
1044 }
1045
1046 return returnCode;
1047 }
1048
1049 /**
1050 ************************************************************************************************************************
1051 * Gfx9Lib::HwlInitGlobalParams
1052 *
1053 * @brief
1054 * Initializes global parameters
1055 *
1056 * @return
1057 * TRUE if all settings are valid
1058 *
1059 ************************************************************************************************************************
1060 */
1061 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1062 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1063 {
1064 BOOL_32 valid = TRUE;
1065
1066 if (m_settings.isArcticIsland)
1067 {
1068 GB_ADDR_CONFIG gbAddrConfig;
1069
1070 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1071
1072 // These values are copied from CModel code
1073 switch (gbAddrConfig.bits.NUM_PIPES)
1074 {
1075 case ADDR_CONFIG_1_PIPE:
1076 m_pipes = 1;
1077 m_pipesLog2 = 0;
1078 break;
1079 case ADDR_CONFIG_2_PIPE:
1080 m_pipes = 2;
1081 m_pipesLog2 = 1;
1082 break;
1083 case ADDR_CONFIG_4_PIPE:
1084 m_pipes = 4;
1085 m_pipesLog2 = 2;
1086 break;
1087 case ADDR_CONFIG_8_PIPE:
1088 m_pipes = 8;
1089 m_pipesLog2 = 3;
1090 break;
1091 case ADDR_CONFIG_16_PIPE:
1092 m_pipes = 16;
1093 m_pipesLog2 = 4;
1094 break;
1095 case ADDR_CONFIG_32_PIPE:
1096 m_pipes = 32;
1097 m_pipesLog2 = 5;
1098 break;
1099 default:
1100 ADDR_ASSERT_ALWAYS();
1101 break;
1102 }
1103
1104 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1105 {
1106 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1107 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1108 m_pipeInterleaveLog2 = 8;
1109 break;
1110 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1111 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1112 m_pipeInterleaveLog2 = 9;
1113 break;
1114 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1115 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1116 m_pipeInterleaveLog2 = 10;
1117 break;
1118 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1119 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1120 m_pipeInterleaveLog2 = 11;
1121 break;
1122 default:
1123 ADDR_ASSERT_ALWAYS();
1124 break;
1125 }
1126
1127 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1128 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1129 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1130
1131 switch (gbAddrConfig.bits.NUM_BANKS)
1132 {
1133 case ADDR_CONFIG_1_BANK:
1134 m_banks = 1;
1135 m_banksLog2 = 0;
1136 break;
1137 case ADDR_CONFIG_2_BANK:
1138 m_banks = 2;
1139 m_banksLog2 = 1;
1140 break;
1141 case ADDR_CONFIG_4_BANK:
1142 m_banks = 4;
1143 m_banksLog2 = 2;
1144 break;
1145 case ADDR_CONFIG_8_BANK:
1146 m_banks = 8;
1147 m_banksLog2 = 3;
1148 break;
1149 case ADDR_CONFIG_16_BANK:
1150 m_banks = 16;
1151 m_banksLog2 = 4;
1152 break;
1153 default:
1154 ADDR_ASSERT_ALWAYS();
1155 break;
1156 }
1157
1158 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1159 {
1160 case ADDR_CONFIG_1_SHADER_ENGINE:
1161 m_se = 1;
1162 m_seLog2 = 0;
1163 break;
1164 case ADDR_CONFIG_2_SHADER_ENGINE:
1165 m_se = 2;
1166 m_seLog2 = 1;
1167 break;
1168 case ADDR_CONFIG_4_SHADER_ENGINE:
1169 m_se = 4;
1170 m_seLog2 = 2;
1171 break;
1172 case ADDR_CONFIG_8_SHADER_ENGINE:
1173 m_se = 8;
1174 m_seLog2 = 3;
1175 break;
1176 default:
1177 ADDR_ASSERT_ALWAYS();
1178 break;
1179 }
1180
1181 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1182 {
1183 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1184 m_rbPerSe = 1;
1185 m_rbPerSeLog2 = 0;
1186 break;
1187 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1188 m_rbPerSe = 2;
1189 m_rbPerSeLog2 = 1;
1190 break;
1191 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1192 m_rbPerSe = 4;
1193 m_rbPerSeLog2 = 2;
1194 break;
1195 default:
1196 ADDR_ASSERT_ALWAYS();
1197 break;
1198 }
1199
1200 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1201 {
1202 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1203 m_maxCompFrag = 1;
1204 m_maxCompFragLog2 = 0;
1205 break;
1206 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1207 m_maxCompFrag = 2;
1208 m_maxCompFragLog2 = 1;
1209 break;
1210 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1211 m_maxCompFrag = 4;
1212 m_maxCompFragLog2 = 2;
1213 break;
1214 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1215 m_maxCompFrag = 8;
1216 m_maxCompFragLog2 = 3;
1217 break;
1218 default:
1219 ADDR_ASSERT_ALWAYS();
1220 break;
1221 }
1222
1223 m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1224 ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1225 ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1226 m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1227
1228 if ((m_rbPerSeLog2 == 1) &&
1229 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1230 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1231 {
1232 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1233 ADDR_ASSERT(m_settings.isRaven == FALSE);
1234 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1235
1236 if (m_settings.isVega12)
1237 {
1238 m_settings.htileCacheRbConflict = 1;
1239 }
1240 }
1241 }
1242 else
1243 {
1244 valid = FALSE;
1245 ADDR_NOT_IMPLEMENTED();
1246 }
1247
1248 if (valid)
1249 {
1250 InitEquationTable();
1251 }
1252
1253 return valid;
1254 }
1255
1256 /**
1257 ************************************************************************************************************************
1258 * Gfx9Lib::HwlConvertChipFamily
1259 *
1260 * @brief
1261 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1262 * @return
1263 * ChipFamily
1264 ************************************************************************************************************************
1265 */
1266 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1267 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1268 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1269 {
1270 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1271
1272 switch (uChipFamily)
1273 {
1274 case FAMILY_AI:
1275 m_settings.isArcticIsland = 1;
1276 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1277 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1278 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1279 m_settings.isDce12 = 1;
1280
1281 if (m_settings.isVega10 == 0)
1282 {
1283 m_settings.htileAlignFix = 1;
1284 m_settings.applyAliasFix = 1;
1285 }
1286
1287 m_settings.metaBaseAlignFix = 1;
1288
1289 m_settings.depthPipeXorDisable = 1;
1290 break;
1291 case FAMILY_RV:
1292 m_settings.isArcticIsland = 1;
1293
1294 if (ASICREV_IS_RAVEN(uChipRevision))
1295 {
1296 m_settings.isRaven = 1;
1297
1298 m_settings.depthPipeXorDisable = 1;
1299 }
1300
1301 if (ASICREV_IS_RAVEN2(uChipRevision))
1302 {
1303 m_settings.isRaven = 1;
1304 }
1305
1306 if (m_settings.isRaven == 0)
1307 {
1308 m_settings.htileAlignFix = 1;
1309 m_settings.applyAliasFix = 1;
1310 }
1311
1312 m_settings.isDcn1 = m_settings.isRaven;
1313
1314 m_settings.metaBaseAlignFix = 1;
1315 break;
1316
1317 default:
1318 ADDR_ASSERT(!"This should be a Fusion");
1319 break;
1320 }
1321
1322 return family;
1323 }
1324
1325 /**
1326 ************************************************************************************************************************
1327 * Gfx9Lib::InitRbEquation
1328 *
1329 * @brief
1330 * Init RB equation
1331 * @return
1332 * N/A
1333 ************************************************************************************************************************
1334 */
1335 VOID Gfx9Lib::GetRbEquation(
1336 CoordEq* pRbEq, ///< [out] rb equation
1337 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1338 UINT_32 numSeLog2) ///< [in] number of shader engine
1339 const
1340 {
1341 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1342 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1343 Coordinate cx('x', rbRegion);
1344 Coordinate cy('y', rbRegion);
1345
1346 UINT_32 start = 0;
1347 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1348
1349 // Clear the rb equation
1350 pRbEq->resize(0);
1351 pRbEq->resize(numRbTotalLog2);
1352
1353 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1354 {
1355 // Special case when more than 1 SE, and 2 RB per SE
1356 (*pRbEq)[0].add(cx);
1357 (*pRbEq)[0].add(cy);
1358 cx++;
1359 cy++;
1360
1361 if (m_settings.applyAliasFix == false)
1362 {
1363 (*pRbEq)[0].add(cy);
1364 }
1365
1366 (*pRbEq)[0].add(cy);
1367 start++;
1368 }
1369
1370 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1371
1372 for (UINT_32 i = 0; i < numBits; i++)
1373 {
1374 UINT_32 idx =
1375 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1376
1377 if ((i % 2) == 1)
1378 {
1379 (*pRbEq)[idx].add(cx);
1380 cx++;
1381 }
1382 else
1383 {
1384 (*pRbEq)[idx].add(cy);
1385 cy++;
1386 }
1387 }
1388 }
1389
1390 /**
1391 ************************************************************************************************************************
1392 * Gfx9Lib::GetDataEquation
1393 *
1394 * @brief
1395 * Get data equation for fmask and Z
1396 * @return
1397 * N/A
1398 ************************************************************************************************************************
1399 */
1400 VOID Gfx9Lib::GetDataEquation(
1401 CoordEq* pDataEq, ///< [out] data surface equation
1402 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1403 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1404 AddrResourceType resourceType, ///< [in] data surface resource type
1405 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1406 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1407 const
1408 {
1409 Coordinate cx('x', 0);
1410 Coordinate cy('y', 0);
1411 Coordinate cz('z', 0);
1412 Coordinate cs('s', 0);
1413
1414 // Clear the equation
1415 pDataEq->resize(0);
1416 pDataEq->resize(27);
1417
1418 if (dataSurfaceType == Gfx9DataColor)
1419 {
1420 if (IsLinear(swizzleMode))
1421 {
1422 Coordinate cm('m', 0);
1423
1424 pDataEq->resize(49);
1425
1426 for (UINT_32 i = 0; i < 49; i++)
1427 {
1428 (*pDataEq)[i].add(cm);
1429 cm++;
1430 }
1431 }
1432 else if (IsThick(resourceType, swizzleMode))
1433 {
1434 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1435 UINT_32 i;
1436 if (IsStandardSwizzle(resourceType, swizzleMode))
1437 {
1438 // Standard 3d swizzle
1439 // Fill in bottom x bits
1440 for (i = elementBytesLog2; i < 4; i++)
1441 {
1442 (*pDataEq)[i].add(cx);
1443 cx++;
1444 }
1445 // Fill in 2 bits of y and then z
1446 for (i = 4; i < 6; i++)
1447 {
1448 (*pDataEq)[i].add(cy);
1449 cy++;
1450 }
1451 for (i = 6; i < 8; i++)
1452 {
1453 (*pDataEq)[i].add(cz);
1454 cz++;
1455 }
1456 if (elementBytesLog2 < 2)
1457 {
1458 // fill in z & y bit
1459 (*pDataEq)[8].add(cz);
1460 (*pDataEq)[9].add(cy);
1461 cz++;
1462 cy++;
1463 }
1464 else if (elementBytesLog2 == 2)
1465 {
1466 // fill in y and x bit
1467 (*pDataEq)[8].add(cy);
1468 (*pDataEq)[9].add(cx);
1469 cy++;
1470 cx++;
1471 }
1472 else
1473 {
1474 // fill in 2 x bits
1475 (*pDataEq)[8].add(cx);
1476 cx++;
1477 (*pDataEq)[9].add(cx);
1478 cx++;
1479 }
1480 }
1481 else
1482 {
1483 // Z 3d swizzle
1484 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1485 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1486 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1487 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1488 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1489 {
1490 (*pDataEq)[i].add(cz);
1491 cz++;
1492 }
1493 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1494 {
1495 // add an x and z
1496 (*pDataEq)[6].add(cx);
1497 (*pDataEq)[7].add(cz);
1498 cx++;
1499 cz++;
1500 }
1501 else if (elementBytesLog2 == 2)
1502 {
1503 // add a y and z
1504 (*pDataEq)[6].add(cy);
1505 (*pDataEq)[7].add(cz);
1506 cy++;
1507 cz++;
1508 }
1509 // add y and x
1510 (*pDataEq)[8].add(cy);
1511 (*pDataEq)[9].add(cx);
1512 cy++;
1513 cx++;
1514 }
1515 // Fill in bit 10 and up
1516 pDataEq->mort3d( cz, cy, cx, 10 );
1517 }
1518 else if (IsThin(resourceType, swizzleMode))
1519 {
1520 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1521 // Color 2D
1522 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1523 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1524 UINT_32 i;
1525 // Fill in bottom x bits
1526 for (i = elementBytesLog2; i < 4; i++)
1527 {
1528 (*pDataEq)[i].add(cx);
1529 cx++;
1530 }
1531 // Fill in bottom y bits
1532 for (i = 4; i < 4 + microYBits; i++)
1533 {
1534 (*pDataEq)[i].add(cy);
1535 cy++;
1536 }
1537 // Fill in last of the micro_x bits
1538 for (i = 4 + microYBits; i < 8; i++)
1539 {
1540 (*pDataEq)[i].add(cx);
1541 cx++;
1542 }
1543 // Fill in x/y bits below sample split
1544 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1545 // Fill in sample bits
1546 for (i = 0; i < numSamplesLog2; i++)
1547 {
1548 cs.set('s', i);
1549 (*pDataEq)[tileSplitStart + i].add(cs);
1550 }
1551 // Fill in x/y bits above sample split
1552 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1553 {
1554 pDataEq->mort2d(cx, cy, blockSizeLog2);
1555 }
1556 else
1557 {
1558 pDataEq->mort2d(cy, cx, blockSizeLog2);
1559 }
1560 }
1561 else
1562 {
1563 ADDR_ASSERT_ALWAYS();
1564 }
1565 }
1566 else
1567 {
1568 // Fmask or depth
1569 UINT_32 sampleStart = elementBytesLog2;
1570 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1571 UINT_32 ymajStart = 6 + numSamplesLog2;
1572
1573 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1574 {
1575 cs.set('s', s);
1576 (*pDataEq)[sampleStart + s].add(cs);
1577 }
1578
1579 // Put in the x-major order pixel bits
1580 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1581 // Put in the y-major order pixel bits
1582 pDataEq->mort2d(cy, cx, ymajStart);
1583 }
1584 }
1585
1586 /**
1587 ************************************************************************************************************************
1588 * Gfx9Lib::GetPipeEquation
1589 *
1590 * @brief
1591 * Get pipe equation
1592 * @return
1593 * N/A
1594 ************************************************************************************************************************
1595 */
1596 VOID Gfx9Lib::GetPipeEquation(
1597 CoordEq* pPipeEq, ///< [out] pipe equation
1598 CoordEq* pDataEq, ///< [in] data equation
1599 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1600 UINT_32 numPipeLog2, ///< [in] number of pipes
1601 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1602 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1603 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1604 AddrResourceType resourceType ///< [in] data surface resource type
1605 ) const
1606 {
1607 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1608 CoordEq dataEq;
1609
1610 pDataEq->copy(dataEq);
1611
1612 if (dataSurfaceType == Gfx9DataColor)
1613 {
1614 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1615 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1616 }
1617
1618 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1619
1620 // This section should only apply to z/stencil, maybe fmask
1621 // If the pipe bit is below the comp block size,
1622 // then keep moving up the address until we find a bit that is above
1623 UINT_32 pipeStart = 0;
1624
1625 if (dataSurfaceType != Gfx9DataColor)
1626 {
1627 Coordinate tileMin('x', 3);
1628
1629 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1630 {
1631 pipeStart++;
1632 }
1633
1634 // if pipe is 0, then the first pipe bit is above the comp block size,
1635 // so we don't need to do anything
1636 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1637 // we will get the same pipe equation
1638 if (pipeStart != 0)
1639 {
1640 for (UINT_32 i = 0; i < numPipeLog2; i++)
1641 {
1642 // Copy the jth bit above pipe interleave to the current pipe equation bit
1643 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1644 }
1645 }
1646 }
1647
1648 if (IsPrt(swizzleMode))
1649 {
1650 // Clear out bits above the block size if prt's are enabled
1651 dataEq.resize(blockSizeLog2);
1652 dataEq.resize(48);
1653 }
1654
1655 if (IsXor(swizzleMode))
1656 {
1657 CoordEq xorMask;
1658
1659 if (IsThick(resourceType, swizzleMode))
1660 {
1661 CoordEq xorMask2;
1662
1663 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1664
1665 xorMask.resize(numPipeLog2);
1666
1667 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1668 {
1669 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1670 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1671 }
1672 }
1673 else
1674 {
1675 // Xor in the bits above the pipe+gpu bits
1676 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1677
1678 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1679 {
1680 Coordinate co;
1681 CoordEq xorMask2;
1682 // if 1xaa and not prt, then xor in the z bits
1683 xorMask2.resize(0);
1684 xorMask2.resize(numPipeLog2);
1685 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1686 {
1687 co.set('z', numPipeLog2 - 1 - pipeIdx);
1688 xorMask2[pipeIdx].add(co);
1689 }
1690
1691 pPipeEq->xorin(xorMask2);
1692 }
1693 }
1694
1695 xorMask.reverse();
1696 pPipeEq->xorin(xorMask);
1697 }
1698 }
1699 /**
1700 ************************************************************************************************************************
1701 * Gfx9Lib::GetMetaEquation
1702 *
1703 * @brief
1704 * Get meta equation for cmask/htile/DCC
1705 * @return
1706 * Pointer to a calculated meta equation
1707 ************************************************************************************************************************
1708 */
1709 const CoordEq* Gfx9Lib::GetMetaEquation(
1710 const MetaEqParams& metaEqParams)
1711 {
1712 UINT_32 cachedMetaEqIndex;
1713
1714 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1715 {
1716 if (memcmp(&metaEqParams,
1717 &m_cachedMetaEqKey[cachedMetaEqIndex],
1718 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1719 {
1720 break;
1721 }
1722 }
1723
1724 CoordEq* pMetaEq = NULL;
1725
1726 if (cachedMetaEqIndex < MaxCachedMetaEq)
1727 {
1728 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1729 }
1730 else
1731 {
1732 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1733
1734 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1735
1736 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1737
1738 GenMetaEquation(pMetaEq,
1739 metaEqParams.maxMip,
1740 metaEqParams.elementBytesLog2,
1741 metaEqParams.numSamplesLog2,
1742 metaEqParams.metaFlag,
1743 metaEqParams.dataSurfaceType,
1744 metaEqParams.swizzleMode,
1745 metaEqParams.resourceType,
1746 metaEqParams.metaBlkWidthLog2,
1747 metaEqParams.metaBlkHeightLog2,
1748 metaEqParams.metaBlkDepthLog2,
1749 metaEqParams.compBlkWidthLog2,
1750 metaEqParams.compBlkHeightLog2,
1751 metaEqParams.compBlkDepthLog2);
1752 }
1753
1754 return pMetaEq;
1755 }
1756
1757 /**
1758 ************************************************************************************************************************
1759 * Gfx9Lib::GenMetaEquation
1760 *
1761 * @brief
1762 * Get meta equation for cmask/htile/DCC
1763 * @return
1764 * N/A
1765 ************************************************************************************************************************
1766 */
1767 VOID Gfx9Lib::GenMetaEquation(
1768 CoordEq* pMetaEq, ///< [out] meta equation
1769 UINT_32 maxMip, ///< [in] max mip Id
1770 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1771 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1772 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1773 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1774 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1775 AddrResourceType resourceType, ///< [in] data surface resource type
1776 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1777 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1778 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1779 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1780 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1781 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1782 const
1783 {
1784 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1785 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1786
1787 // Get the correct data address and rb equation
1788 CoordEq dataEq;
1789 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1790 elementBytesLog2, numSamplesLog2);
1791
1792 // Get pipe and rb equations
1793 CoordEq pipeEquation;
1794 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1795 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1796 numPipeTotalLog2 = pipeEquation.getsize();
1797
1798 if (metaFlag.linear)
1799 {
1800 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1801 ADDR_ASSERT_ALWAYS();
1802
1803 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1804
1805 dataEq.copy(*pMetaEq);
1806
1807 if (IsLinear(swizzleMode))
1808 {
1809 if (metaFlag.pipeAligned)
1810 {
1811 // Remove the pipe bits
1812 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1813 pMetaEq->shift(-shift, pipeInterleaveLog2);
1814 }
1815 // Divide by comp block size, which for linear (which is always color) is 256 B
1816 pMetaEq->shift(-8);
1817
1818 if (metaFlag.pipeAligned)
1819 {
1820 // Put pipe bits back in
1821 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1822
1823 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1824 {
1825 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1826 }
1827 }
1828 }
1829
1830 pMetaEq->shift(1);
1831 }
1832 else
1833 {
1834 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1835 UINT_32 compFragLog2 =
1836 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1837 maxCompFragLog2 : numSamplesLog2;
1838
1839 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1840
1841 // Make sure the metaaddr is cleared
1842 pMetaEq->resize(0);
1843 pMetaEq->resize(27);
1844
1845 if (IsThick(resourceType, swizzleMode))
1846 {
1847 Coordinate cx('x', 0);
1848 Coordinate cy('y', 0);
1849 Coordinate cz('z', 0);
1850
1851 if (maxMip > 0)
1852 {
1853 pMetaEq->mort3d(cy, cx, cz);
1854 }
1855 else
1856 {
1857 pMetaEq->mort3d(cx, cy, cz);
1858 }
1859 }
1860 else
1861 {
1862 Coordinate cx('x', 0);
1863 Coordinate cy('y', 0);
1864 Coordinate cs;
1865
1866 if (maxMip > 0)
1867 {
1868 pMetaEq->mort2d(cy, cx, compFragLog2);
1869 }
1870 else
1871 {
1872 pMetaEq->mort2d(cx, cy, compFragLog2);
1873 }
1874
1875 //------------------------------------------------------------------------------------------------------------------------
1876 // Put the compressible fragments at the lsb
1877 // the uncompressible frags will be at the msb of the micro address
1878 //------------------------------------------------------------------------------------------------------------------------
1879 for (UINT_32 s = 0; s < compFragLog2; s++)
1880 {
1881 cs.set('s', s);
1882 (*pMetaEq)[s].add(cs);
1883 }
1884 }
1885
1886 // Keep a copy of the pipe equations
1887 CoordEq origPipeEquation;
1888 pipeEquation.copy(origPipeEquation);
1889
1890 Coordinate co;
1891 // filter out everything under the compressed block size
1892 co.set('x', compBlkWidthLog2);
1893 pMetaEq->Filter('<', co, 0, 'x');
1894 co.set('y', compBlkHeightLog2);
1895 pMetaEq->Filter('<', co, 0, 'y');
1896 co.set('z', compBlkDepthLog2);
1897 pMetaEq->Filter('<', co, 0, 'z');
1898
1899 // For non-color, filter out sample bits
1900 if (dataSurfaceType != Gfx9DataColor)
1901 {
1902 co.set('x', 0);
1903 pMetaEq->Filter('<', co, 0, 's');
1904 }
1905
1906 // filter out everything above the metablock size
1907 co.set('x', metaBlkWidthLog2 - 1);
1908 pMetaEq->Filter('>', co, 0, 'x');
1909 co.set('y', metaBlkHeightLog2 - 1);
1910 pMetaEq->Filter('>', co, 0, 'y');
1911 co.set('z', metaBlkDepthLog2 - 1);
1912 pMetaEq->Filter('>', co, 0, 'z');
1913
1914 // filter out everything above the metablock size for the channel bits
1915 co.set('x', metaBlkWidthLog2 - 1);
1916 pipeEquation.Filter('>', co, 0, 'x');
1917 co.set('y', metaBlkHeightLog2 - 1);
1918 pipeEquation.Filter('>', co, 0, 'y');
1919 co.set('z', metaBlkDepthLog2 - 1);
1920 pipeEquation.Filter('>', co, 0, 'z');
1921
1922 // Make sure we still have the same number of channel bits
1923 if (pipeEquation.getsize() != numPipeTotalLog2)
1924 {
1925 ADDR_ASSERT_ALWAYS();
1926 }
1927
1928 // Loop through all channel and rb bits,
1929 // and make sure these components exist in the metadata address
1930 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1931 {
1932 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1933 {
1934 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1935 {
1936 ADDR_ASSERT_ALWAYS();
1937 }
1938 }
1939 }
1940
1941 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1942 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1943 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1944 CoordEq origRbEquation;
1945
1946 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1947
1948 CoordEq rbEquation = origRbEquation;
1949
1950 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1951 {
1952 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1953 {
1954 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1955 {
1956 ADDR_ASSERT_ALWAYS();
1957 }
1958 }
1959 }
1960
1961 if (m_settings.applyAliasFix)
1962 {
1963 co.set('z', -1);
1964 }
1965
1966 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1967 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1968 {
1969 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1970 {
1971 BOOL_32 isRbEquationInPipeEquation = FALSE;
1972
1973 if (m_settings.applyAliasFix)
1974 {
1975 CoordTerm filteredPipeEq;
1976 filteredPipeEq = pipeEquation[j];
1977
1978 filteredPipeEq.Filter('>', co, 0, 'z');
1979
1980 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1981 }
1982 else
1983 {
1984 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1985 }
1986
1987 if (isRbEquationInPipeEquation)
1988 {
1989 rbEquation[i].Clear();
1990 }
1991 }
1992 }
1993
1994 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1995
1996 // Loop through each bit of the channel, get the smallest coordinate,
1997 // and remove it from the metaaddr, and rb_equation
1998 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1999 {
2000 pipeEquation[i].getsmallest(co);
2001
2002 UINT_32 old_size = pMetaEq->getsize();
2003 pMetaEq->Filter('=', co);
2004 UINT_32 new_size = pMetaEq->getsize();
2005 if (new_size != old_size-1)
2006 {
2007 ADDR_ASSERT_ALWAYS();
2008 }
2009 pipeEquation.remove(co);
2010 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2011 {
2012 if (rbEquation[j].remove(co))
2013 {
2014 // if we actually removed something from this bit, then add the remaining
2015 // channel bits, as these can be removed for this bit
2016 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2017 {
2018 if (pipeEquation[i][k] != co)
2019 {
2020 rbEquation[j].add(pipeEquation[i][k]);
2021 rbAppendedWithPipeBits[j] = true;
2022 }
2023 }
2024 }
2025 }
2026 }
2027
2028 // Loop through the rb bits and see what remain;
2029 // filter out the smallest coordinate if it remains
2030 UINT_32 rbBitsLeft = 0;
2031 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2032 {
2033 BOOL_32 isRbEqAppended = FALSE;
2034
2035 if (m_settings.applyAliasFix)
2036 {
2037 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2038 }
2039 else
2040 {
2041 isRbEqAppended = (rbEquation[i].getsize() > 0);
2042 }
2043
2044 if (isRbEqAppended)
2045 {
2046 rbBitsLeft++;
2047 rbEquation[i].getsmallest(co);
2048 UINT_32 old_size = pMetaEq->getsize();
2049 pMetaEq->Filter('=', co);
2050 UINT_32 new_size = pMetaEq->getsize();
2051 if (new_size != old_size - 1)
2052 {
2053 // assert warning
2054 }
2055 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2056 {
2057 if (rbEquation[j].remove(co))
2058 {
2059 // if we actually removed something from this bit, then add the remaining
2060 // rb bits, as these can be removed for this bit
2061 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2062 {
2063 if (rbEquation[i][k] != co)
2064 {
2065 rbEquation[j].add(rbEquation[i][k]);
2066 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2067 }
2068 }
2069 }
2070 }
2071 }
2072 }
2073
2074 // capture the size of the metaaddr
2075 UINT_32 metaSize = pMetaEq->getsize();
2076 // resize to 49 bits...make this a nibble address
2077 pMetaEq->resize(49);
2078 // Concatenate the macro address above the current address
2079 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2080 {
2081 co.set('m', j);
2082 (*pMetaEq)[i].add(co);
2083 }
2084
2085 // Multiply by meta element size (in nibbles)
2086 if (dataSurfaceType == Gfx9DataColor)
2087 {
2088 pMetaEq->shift(1);
2089 }
2090 else if (dataSurfaceType == Gfx9DataDepthStencil)
2091 {
2092 pMetaEq->shift(3);
2093 }
2094
2095 //------------------------------------------------------------------------------------------
2096 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2097 // Shift up from pipe interleave number of channel
2098 // and rb bits left, and uncompressed fragments
2099 //------------------------------------------------------------------------------------------
2100
2101 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2102
2103 // Put in the channel bits
2104 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2105 {
2106 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2107 }
2108
2109 // Put in remaining rb bits
2110 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2111 {
2112 BOOL_32 isRbEqAppended = FALSE;
2113
2114 if (m_settings.applyAliasFix)
2115 {
2116 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2117 }
2118 else
2119 {
2120 isRbEqAppended = (rbEquation[i].getsize() > 0);
2121 }
2122
2123 if (isRbEqAppended)
2124 {
2125 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2126 // Mark any rb bit we add in to the rb mask
2127 j++;
2128 }
2129 }
2130
2131 //------------------------------------------------------------------------------------------
2132 // Put in the uncompressed fragment bits
2133 //------------------------------------------------------------------------------------------
2134 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2135 {
2136 co.set('s', compFragLog2 + i);
2137 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2138 }
2139 }
2140 }
2141
2142 /**
2143 ************************************************************************************************************************
2144 * Gfx9Lib::IsEquationSupported
2145 *
2146 * @brief
2147 * Check if equation is supported for given swizzle mode and resource type.
2148 *
2149 * @return
2150 * TRUE if supported
2151 ************************************************************************************************************************
2152 */
2153 BOOL_32 Gfx9Lib::IsEquationSupported(
2154 AddrResourceType rsrcType,
2155 AddrSwizzleMode swMode,
2156 UINT_32 elementBytesLog2) const
2157 {
2158 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2159 (IsLinear(swMode) == FALSE) &&
2160 (((IsTex2d(rsrcType) == TRUE) &&
2161 ((elementBytesLog2 < 4) ||
2162 ((IsRotateSwizzle(swMode) == FALSE) &&
2163 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2164 ((IsTex3d(rsrcType) == TRUE) &&
2165 (IsRotateSwizzle(swMode) == FALSE) &&
2166 (IsBlock256b(swMode) == FALSE)));
2167
2168 return supported;
2169 }
2170
2171 /**
2172 ************************************************************************************************************************
2173 * Gfx9Lib::InitEquationTable
2174 *
2175 * @brief
2176 * Initialize Equation table.
2177 *
2178 * @return
2179 * N/A
2180 ************************************************************************************************************************
2181 */
2182 VOID Gfx9Lib::InitEquationTable()
2183 {
2184 memset(m_equationTable, 0, sizeof(m_equationTable));
2185
2186 // Loop all possible resource type (2D/3D)
2187 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2188 {
2189 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2190
2191 // Loop all possible swizzle mode
2192 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
2193 {
2194 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2195
2196 // Loop all possible bpp
2197 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2198 {
2199 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2200
2201 // Check if the input is supported
2202 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2203 {
2204 ADDR_EQUATION equation;
2205 ADDR_E_RETURNCODE retCode;
2206
2207 memset(&equation, 0, sizeof(ADDR_EQUATION));
2208
2209 // Generate the equation
2210 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2211 {
2212 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2213 }
2214 else if (IsThin(rsrcType, swMode))
2215 {
2216 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2217 }
2218 else
2219 {
2220 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2221 }
2222
2223 // Only fill the equation into the table if the return code is ADDR_OK,
2224 // otherwise if the return code is not ADDR_OK, it indicates this is not
2225 // a valid input, we do nothing but just fill invalid equation index
2226 // into the lookup table.
2227 if (retCode == ADDR_OK)
2228 {
2229 equationIndex = m_numEquations;
2230 ADDR_ASSERT(equationIndex < EquationTableSize);
2231
2232 m_equationTable[equationIndex] = equation;
2233
2234 m_numEquations++;
2235 }
2236 else
2237 {
2238 ADDR_ASSERT_ALWAYS();
2239 }
2240 }
2241
2242 // Fill the index into the lookup table, if the combination is not supported
2243 // fill the invalid equation index
2244 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2245 }
2246 }
2247 }
2248 }
2249
2250 /**
2251 ************************************************************************************************************************
2252 * Gfx9Lib::HwlGetEquationIndex
2253 *
2254 * @brief
2255 * Interface function stub of GetEquationIndex
2256 *
2257 * @return
2258 * ADDR_E_RETURNCODE
2259 ************************************************************************************************************************
2260 */
2261 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2262 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2263 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2264 ) const
2265 {
2266 AddrResourceType rsrcType = pIn->resourceType;
2267 AddrSwizzleMode swMode = pIn->swizzleMode;
2268 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2269 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2270
2271 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2272 {
2273 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2274 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2275
2276 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2277 }
2278
2279 if (pOut->pMipInfo != NULL)
2280 {
2281 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2282 {
2283 pOut->pMipInfo[i].equationIndex = index;
2284 }
2285 }
2286
2287 return index;
2288 }
2289
2290 /**
2291 ************************************************************************************************************************
2292 * Gfx9Lib::HwlComputeBlock256Equation
2293 *
2294 * @brief
2295 * Interface function stub of ComputeBlock256Equation
2296 *
2297 * @return
2298 * ADDR_E_RETURNCODE
2299 ************************************************************************************************************************
2300 */
2301 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2302 AddrResourceType rsrcType,
2303 AddrSwizzleMode swMode,
2304 UINT_32 elementBytesLog2,
2305 ADDR_EQUATION* pEquation) const
2306 {
2307 ADDR_E_RETURNCODE ret = ADDR_OK;
2308
2309 pEquation->numBits = 8;
2310
2311 UINT_32 i = 0;
2312 for (; i < elementBytesLog2; i++)
2313 {
2314 InitChannel(1, 0 , i, &pEquation->addr[i]);
2315 }
2316
2317 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2318
2319 const UINT_32 maxBitsUsed = 4;
2320 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2321 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2322
2323 for (i = 0; i < maxBitsUsed; i++)
2324 {
2325 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2326 InitChannel(1, 1, i, &y[i]);
2327 }
2328
2329 if (IsStandardSwizzle(rsrcType, swMode))
2330 {
2331 switch (elementBytesLog2)
2332 {
2333 case 0:
2334 pixelBit[0] = x[0];
2335 pixelBit[1] = x[1];
2336 pixelBit[2] = x[2];
2337 pixelBit[3] = x[3];
2338 pixelBit[4] = y[0];
2339 pixelBit[5] = y[1];
2340 pixelBit[6] = y[2];
2341 pixelBit[7] = y[3];
2342 break;
2343 case 1:
2344 pixelBit[0] = x[0];
2345 pixelBit[1] = x[1];
2346 pixelBit[2] = x[2];
2347 pixelBit[3] = y[0];
2348 pixelBit[4] = y[1];
2349 pixelBit[5] = y[2];
2350 pixelBit[6] = x[3];
2351 break;
2352 case 2:
2353 pixelBit[0] = x[0];
2354 pixelBit[1] = x[1];
2355 pixelBit[2] = y[0];
2356 pixelBit[3] = y[1];
2357 pixelBit[4] = y[2];
2358 pixelBit[5] = x[2];
2359 break;
2360 case 3:
2361 pixelBit[0] = x[0];
2362 pixelBit[1] = y[0];
2363 pixelBit[2] = y[1];
2364 pixelBit[3] = x[1];
2365 pixelBit[4] = x[2];
2366 break;
2367 case 4:
2368 pixelBit[0] = y[0];
2369 pixelBit[1] = y[1];
2370 pixelBit[2] = x[0];
2371 pixelBit[3] = x[1];
2372 break;
2373 default:
2374 ADDR_ASSERT_ALWAYS();
2375 ret = ADDR_INVALIDPARAMS;
2376 break;
2377 }
2378 }
2379 else if (IsDisplaySwizzle(rsrcType, swMode))
2380 {
2381 switch (elementBytesLog2)
2382 {
2383 case 0:
2384 pixelBit[0] = x[0];
2385 pixelBit[1] = x[1];
2386 pixelBit[2] = x[2];
2387 pixelBit[3] = y[1];
2388 pixelBit[4] = y[0];
2389 pixelBit[5] = y[2];
2390 pixelBit[6] = x[3];
2391 pixelBit[7] = y[3];
2392 break;
2393 case 1:
2394 pixelBit[0] = x[0];
2395 pixelBit[1] = x[1];
2396 pixelBit[2] = x[2];
2397 pixelBit[3] = y[0];
2398 pixelBit[4] = y[1];
2399 pixelBit[5] = y[2];
2400 pixelBit[6] = x[3];
2401 break;
2402 case 2:
2403 pixelBit[0] = x[0];
2404 pixelBit[1] = x[1];
2405 pixelBit[2] = y[0];
2406 pixelBit[3] = x[2];
2407 pixelBit[4] = y[1];
2408 pixelBit[5] = y[2];
2409 break;
2410 case 3:
2411 pixelBit[0] = x[0];
2412 pixelBit[1] = y[0];
2413 pixelBit[2] = x[1];
2414 pixelBit[3] = x[2];
2415 pixelBit[4] = y[1];
2416 break;
2417 case 4:
2418 pixelBit[0] = x[0];
2419 pixelBit[1] = y[0];
2420 pixelBit[2] = x[1];
2421 pixelBit[3] = y[1];
2422 break;
2423 default:
2424 ADDR_ASSERT_ALWAYS();
2425 ret = ADDR_INVALIDPARAMS;
2426 break;
2427 }
2428 }
2429 else if (IsRotateSwizzle(swMode))
2430 {
2431 switch (elementBytesLog2)
2432 {
2433 case 0:
2434 pixelBit[0] = y[0];
2435 pixelBit[1] = y[1];
2436 pixelBit[2] = y[2];
2437 pixelBit[3] = x[1];
2438 pixelBit[4] = x[0];
2439 pixelBit[5] = x[2];
2440 pixelBit[6] = x[3];
2441 pixelBit[7] = y[3];
2442 break;
2443 case 1:
2444 pixelBit[0] = y[0];
2445 pixelBit[1] = y[1];
2446 pixelBit[2] = y[2];
2447 pixelBit[3] = x[0];
2448 pixelBit[4] = x[1];
2449 pixelBit[5] = x[2];
2450 pixelBit[6] = x[3];
2451 break;
2452 case 2:
2453 pixelBit[0] = y[0];
2454 pixelBit[1] = y[1];
2455 pixelBit[2] = x[0];
2456 pixelBit[3] = y[2];
2457 pixelBit[4] = x[1];
2458 pixelBit[5] = x[2];
2459 break;
2460 case 3:
2461 pixelBit[0] = y[0];
2462 pixelBit[1] = x[0];
2463 pixelBit[2] = y[1];
2464 pixelBit[3] = x[1];
2465 pixelBit[4] = x[2];
2466 break;
2467 default:
2468 ADDR_ASSERT_ALWAYS();
2469 case 4:
2470 ret = ADDR_INVALIDPARAMS;
2471 break;
2472 }
2473 }
2474 else
2475 {
2476 ADDR_ASSERT_ALWAYS();
2477 ret = ADDR_INVALIDPARAMS;
2478 }
2479
2480 // Post validation
2481 if (ret == ADDR_OK)
2482 {
2483 MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2484 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2485 (microBlockDim.w * (1 << elementBytesLog2)));
2486 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2487 }
2488
2489 return ret;
2490 }
2491
2492 /**
2493 ************************************************************************************************************************
2494 * Gfx9Lib::HwlComputeThinEquation
2495 *
2496 * @brief
2497 * Interface function stub of ComputeThinEquation
2498 *
2499 * @return
2500 * ADDR_E_RETURNCODE
2501 ************************************************************************************************************************
2502 */
2503 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2504 AddrResourceType rsrcType,
2505 AddrSwizzleMode swMode,
2506 UINT_32 elementBytesLog2,
2507 ADDR_EQUATION* pEquation) const
2508 {
2509 ADDR_E_RETURNCODE ret = ADDR_OK;
2510
2511 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2512
2513 UINT_32 maxXorBits = blockSizeLog2;
2514 if (IsNonPrtXor(swMode))
2515 {
2516 // For non-prt-xor, maybe need to initialize some more bits for xor
2517 // The highest xor bit used in equation will be max the following 3 items:
2518 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2519 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2520 // 3. blockSizeLog2
2521
2522 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2523 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2524 GetPipeXorBits(blockSizeLog2) +
2525 2 * GetBankXorBits(blockSizeLog2));
2526 }
2527
2528 const UINT_32 maxBitsUsed = 14;
2529 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2530 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2531 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2532
2533 const UINT_32 extraXorBits = 16;
2534 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2535 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2536
2537 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2538 {
2539 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2540 InitChannel(1, 1, i, &y[i]);
2541 }
2542
2543 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2544
2545 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2546 {
2547 InitChannel(1, 0 , i, &pixelBit[i]);
2548 }
2549
2550 UINT_32 xIdx = 0;
2551 UINT_32 yIdx = 0;
2552 UINT_32 lowBits = 0;
2553
2554 if (IsZOrderSwizzle(swMode))
2555 {
2556 if (elementBytesLog2 <= 3)
2557 {
2558 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2559 {
2560 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2561 }
2562
2563 lowBits = 6;
2564 }
2565 else
2566 {
2567 ret = ADDR_INVALIDPARAMS;
2568 }
2569 }
2570 else
2571 {
2572 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2573
2574 if (ret == ADDR_OK)
2575 {
2576 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2577 xIdx = Log2(microBlockDim.w);
2578 yIdx = Log2(microBlockDim.h);
2579 lowBits = 8;
2580 }
2581 }
2582
2583 if (ret == ADDR_OK)
2584 {
2585 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2586 {
2587 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2588 }
2589
2590 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2591 {
2592 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2593 }
2594
2595 if (IsXor(swMode))
2596 {
2597 // Fill XOR bits
2598 UINT_32 pipeStart = m_pipeInterleaveLog2;
2599 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2600
2601 UINT_32 bankStart = pipeStart + pipeXorBits;
2602 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2603
2604 for (UINT_32 i = 0; i < pipeXorBits; i++)
2605 {
2606 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2607 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2608 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2609
2610 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2611 }
2612
2613 for (UINT_32 i = 0; i < bankXorBits; i++)
2614 {
2615 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2616 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2617 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2618
2619 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2620 }
2621
2622 if (IsPrt(swMode) == FALSE)
2623 {
2624 for (UINT_32 i = 0; i < pipeXorBits; i++)
2625 {
2626 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2627 }
2628
2629 for (UINT_32 i = 0; i < bankXorBits; i++)
2630 {
2631 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2632 }
2633 }
2634 }
2635
2636 pEquation->numBits = blockSizeLog2;
2637 }
2638
2639 return ret;
2640 }
2641
2642 /**
2643 ************************************************************************************************************************
2644 * Gfx9Lib::HwlComputeThickEquation
2645 *
2646 * @brief
2647 * Interface function stub of ComputeThickEquation
2648 *
2649 * @return
2650 * ADDR_E_RETURNCODE
2651 ************************************************************************************************************************
2652 */
2653 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2654 AddrResourceType rsrcType,
2655 AddrSwizzleMode swMode,
2656 UINT_32 elementBytesLog2,
2657 ADDR_EQUATION* pEquation) const
2658 {
2659 ADDR_E_RETURNCODE ret = ADDR_OK;
2660
2661 ADDR_ASSERT(IsTex3d(rsrcType));
2662
2663 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2664
2665 UINT_32 maxXorBits = blockSizeLog2;
2666 if (IsNonPrtXor(swMode))
2667 {
2668 // For non-prt-xor, maybe need to initialize some more bits for xor
2669 // The highest xor bit used in equation will be max the following 3:
2670 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2671 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2672 // 3. blockSizeLog2
2673
2674 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2675 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2676 GetPipeXorBits(blockSizeLog2) +
2677 3 * GetBankXorBits(blockSizeLog2));
2678 }
2679
2680 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2681 {
2682 InitChannel(1, 0 , i, &pEquation->addr[i]);
2683 }
2684
2685 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2686
2687 const UINT_32 maxBitsUsed = 12;
2688 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2689 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2690 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2691 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2692
2693 const UINT_32 extraXorBits = 24;
2694 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2695 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2696
2697 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2698 {
2699 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2700 InitChannel(1, 1, i, &y[i]);
2701 InitChannel(1, 2, i, &z[i]);
2702 }
2703
2704 if (IsZOrderSwizzle(swMode))
2705 {
2706 switch (elementBytesLog2)
2707 {
2708 case 0:
2709 pixelBit[0] = x[0];
2710 pixelBit[1] = y[0];
2711 pixelBit[2] = x[1];
2712 pixelBit[3] = y[1];
2713 pixelBit[4] = z[0];
2714 pixelBit[5] = z[1];
2715 pixelBit[6] = x[2];
2716 pixelBit[7] = z[2];
2717 pixelBit[8] = y[2];
2718 pixelBit[9] = x[3];
2719 break;
2720 case 1:
2721 pixelBit[0] = x[0];
2722 pixelBit[1] = y[0];
2723 pixelBit[2] = x[1];
2724 pixelBit[3] = y[1];
2725 pixelBit[4] = z[0];
2726 pixelBit[5] = z[1];
2727 pixelBit[6] = z[2];
2728 pixelBit[7] = y[2];
2729 pixelBit[8] = x[2];
2730 break;
2731 case 2:
2732 pixelBit[0] = x[0];
2733 pixelBit[1] = y[0];
2734 pixelBit[2] = x[1];
2735 pixelBit[3] = z[0];
2736 pixelBit[4] = y[1];
2737 pixelBit[5] = z[1];
2738 pixelBit[6] = y[2];
2739 pixelBit[7] = x[2];
2740 break;
2741 case 3:
2742 pixelBit[0] = x[0];
2743 pixelBit[1] = y[0];
2744 pixelBit[2] = z[0];
2745 pixelBit[3] = x[1];
2746 pixelBit[4] = z[1];
2747 pixelBit[5] = y[1];
2748 pixelBit[6] = x[2];
2749 break;
2750 case 4:
2751 pixelBit[0] = x[0];
2752 pixelBit[1] = y[0];
2753 pixelBit[2] = z[0];
2754 pixelBit[3] = z[1];
2755 pixelBit[4] = y[1];
2756 pixelBit[5] = x[1];
2757 break;
2758 default:
2759 ADDR_ASSERT_ALWAYS();
2760 ret = ADDR_INVALIDPARAMS;
2761 break;
2762 }
2763 }
2764 else if (IsStandardSwizzle(rsrcType, swMode))
2765 {
2766 switch (elementBytesLog2)
2767 {
2768 case 0:
2769 pixelBit[0] = x[0];
2770 pixelBit[1] = x[1];
2771 pixelBit[2] = x[2];
2772 pixelBit[3] = x[3];
2773 pixelBit[4] = y[0];
2774 pixelBit[5] = y[1];
2775 pixelBit[6] = z[0];
2776 pixelBit[7] = z[1];
2777 pixelBit[8] = z[2];
2778 pixelBit[9] = y[2];
2779 break;
2780 case 1:
2781 pixelBit[0] = x[0];
2782 pixelBit[1] = x[1];
2783 pixelBit[2] = x[2];
2784 pixelBit[3] = y[0];
2785 pixelBit[4] = y[1];
2786 pixelBit[5] = z[0];
2787 pixelBit[6] = z[1];
2788 pixelBit[7] = z[2];
2789 pixelBit[8] = y[2];
2790 break;
2791 case 2:
2792 pixelBit[0] = x[0];
2793 pixelBit[1] = x[1];
2794 pixelBit[2] = y[0];
2795 pixelBit[3] = y[1];
2796 pixelBit[4] = z[0];
2797 pixelBit[5] = z[1];
2798 pixelBit[6] = y[2];
2799 pixelBit[7] = x[2];
2800 break;
2801 case 3:
2802 pixelBit[0] = x[0];
2803 pixelBit[1] = y[0];
2804 pixelBit[2] = y[1];
2805 pixelBit[3] = z[0];
2806 pixelBit[4] = z[1];
2807 pixelBit[5] = x[1];
2808 pixelBit[6] = x[2];
2809 break;
2810 case 4:
2811 pixelBit[0] = y[0];
2812 pixelBit[1] = y[1];
2813 pixelBit[2] = z[0];
2814 pixelBit[3] = z[1];
2815 pixelBit[4] = x[0];
2816 pixelBit[5] = x[1];
2817 break;
2818 default:
2819 ADDR_ASSERT_ALWAYS();
2820 ret = ADDR_INVALIDPARAMS;
2821 break;
2822 }
2823 }
2824 else
2825 {
2826 ADDR_ASSERT_ALWAYS();
2827 ret = ADDR_INVALIDPARAMS;
2828 }
2829
2830 if (ret == ADDR_OK)
2831 {
2832 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2833 UINT_32 xIdx = Log2(microBlockDim.w);
2834 UINT_32 yIdx = Log2(microBlockDim.h);
2835 UINT_32 zIdx = Log2(microBlockDim.d);
2836
2837 pixelBit = pEquation->addr;
2838
2839 const UINT_32 lowBits = 10;
2840 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2841 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2842
2843 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2844 {
2845 if ((i % 3) == 0)
2846 {
2847 pixelBit[i] = x[xIdx++];
2848 }
2849 else if ((i % 3) == 1)
2850 {
2851 pixelBit[i] = z[zIdx++];
2852 }
2853 else
2854 {
2855 pixelBit[i] = y[yIdx++];
2856 }
2857 }
2858
2859 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2860 {
2861 if ((i % 3) == 0)
2862 {
2863 xorExtra[i - blockSizeLog2] = x[xIdx++];
2864 }
2865 else if ((i % 3) == 1)
2866 {
2867 xorExtra[i - blockSizeLog2] = z[zIdx++];
2868 }
2869 else
2870 {
2871 xorExtra[i - blockSizeLog2] = y[yIdx++];
2872 }
2873 }
2874
2875 if (IsXor(swMode))
2876 {
2877 // Fill XOR bits
2878 UINT_32 pipeStart = m_pipeInterleaveLog2;
2879 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2880 for (UINT_32 i = 0; i < pipeXorBits; i++)
2881 {
2882 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2883 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2884 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2885
2886 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2887
2888 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2889 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2890 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2891
2892 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2893 }
2894
2895 UINT_32 bankStart = pipeStart + pipeXorBits;
2896 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2897 for (UINT_32 i = 0; i < bankXorBits; i++)
2898 {
2899 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2900 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2901 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2902
2903 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2904
2905 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2906 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2907 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2908
2909 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2910 }
2911 }
2912
2913 pEquation->numBits = blockSizeLog2;
2914 }
2915
2916 return ret;
2917 }
2918
2919 /**
2920 ************************************************************************************************************************
2921 * Gfx9Lib::IsValidDisplaySwizzleMode
2922 *
2923 * @brief
2924 * Check if a swizzle mode is supported by display engine
2925 *
2926 * @return
2927 * TRUE is swizzle mode is supported by display engine
2928 ************************************************************************************************************************
2929 */
2930 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2931 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2932 {
2933 BOOL_32 support = FALSE;
2934
2935 const AddrResourceType resourceType = pIn->resourceType;
2936 (void)resourceType;
2937 const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
2938
2939 if (m_settings.isDce12)
2940 {
2941 switch (swizzleMode)
2942 {
2943 case ADDR_SW_256B_D:
2944 case ADDR_SW_256B_R:
2945 support = (pIn->bpp == 32);
2946 break;
2947
2948 case ADDR_SW_LINEAR:
2949 case ADDR_SW_4KB_D:
2950 case ADDR_SW_4KB_R:
2951 case ADDR_SW_64KB_D:
2952 case ADDR_SW_64KB_R:
2953 case ADDR_SW_VAR_D:
2954 case ADDR_SW_VAR_R:
2955 case ADDR_SW_4KB_D_X:
2956 case ADDR_SW_4KB_R_X:
2957 case ADDR_SW_64KB_D_X:
2958 case ADDR_SW_64KB_R_X:
2959 case ADDR_SW_VAR_D_X:
2960 case ADDR_SW_VAR_R_X:
2961 support = (pIn->bpp <= 64);
2962 break;
2963
2964 default:
2965 break;
2966 }
2967 }
2968 else if (m_settings.isDcn1)
2969 {
2970 switch (swizzleMode)
2971 {
2972 case ADDR_SW_4KB_D:
2973 case ADDR_SW_64KB_D:
2974 case ADDR_SW_VAR_D:
2975 case ADDR_SW_64KB_D_T:
2976 case ADDR_SW_4KB_D_X:
2977 case ADDR_SW_64KB_D_X:
2978 case ADDR_SW_VAR_D_X:
2979 support = (pIn->bpp == 64);
2980 break;
2981
2982 case ADDR_SW_LINEAR:
2983 case ADDR_SW_4KB_S:
2984 case ADDR_SW_64KB_S:
2985 case ADDR_SW_VAR_S:
2986 case ADDR_SW_64KB_S_T:
2987 case ADDR_SW_4KB_S_X:
2988 case ADDR_SW_64KB_S_X:
2989 case ADDR_SW_VAR_S_X:
2990 support = (pIn->bpp <= 64);
2991 break;
2992
2993 default:
2994 break;
2995 }
2996 }
2997 else
2998 {
2999 ADDR_NOT_IMPLEMENTED();
3000 }
3001
3002 return support;
3003 }
3004
3005 /**
3006 ************************************************************************************************************************
3007 * Gfx9Lib::HwlComputePipeBankXor
3008 *
3009 * @brief
3010 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3011 *
3012 * @return
3013 * PipeBankXor value
3014 ************************************************************************************************************************
3015 */
3016 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3017 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3018 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
3019 {
3020 if (IsXor(pIn->swizzleMode))
3021 {
3022 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3023 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3024 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3025
3026 UINT_32 pipeXor = 0;
3027 UINT_32 bankXor = 0;
3028
3029 const UINT_32 bankMask = (1 << bankBits) - 1;
3030 const UINT_32 index = pIn->surfIndex & bankMask;
3031
3032 const UINT_32 bpp = pIn->flags.fmask ?
3033 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3034 if (bankBits == 4)
3035 {
3036 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3037 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3038
3039 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3040 }
3041 else if (bankBits > 0)
3042 {
3043 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3044 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3045 bankXor = (index * bankIncrease) & bankMask;
3046 }
3047
3048 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3049 }
3050 else
3051 {
3052 pOut->pipeBankXor = 0;
3053 }
3054
3055 return ADDR_OK;
3056 }
3057
3058 /**
3059 ************************************************************************************************************************
3060 * Gfx9Lib::HwlComputeSlicePipeBankXor
3061 *
3062 * @brief
3063 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3064 *
3065 * @return
3066 * PipeBankXor value
3067 ************************************************************************************************************************
3068 */
3069 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3070 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3071 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3072 {
3073 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3074 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3075 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3076
3077 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3078 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3079
3080 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3081
3082 return ADDR_OK;
3083 }
3084
3085 /**
3086 ************************************************************************************************************************
3087 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3088 *
3089 * @brief
3090 * Compute sub resource offset to support swizzle pattern
3091 *
3092 * @return
3093 * Offset
3094 ************************************************************************************************************************
3095 */
3096 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3097 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3098 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3099 {
3100 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3101
3102 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3103 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3104 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3105 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3106 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3107 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3108
3109 pOut->offset = pIn->slice * pIn->sliceSize +
3110 pIn->macroBlockOffset +
3111 (pIn->mipTailOffset ^ pipeBankXor) -
3112 static_cast<UINT_64>(pipeBankXor);
3113 return ADDR_OK;
3114 }
3115
3116 /**
3117 ************************************************************************************************************************
3118 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3119 *
3120 * @brief
3121 * Compute surface info sanity check
3122 *
3123 * @return
3124 * Offset
3125 ************************************************************************************************************************
3126 */
3127 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3128 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3129 {
3130 BOOL_32 invalid = FALSE;
3131
3132 if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3133 {
3134 invalid = TRUE;
3135 }
3136 else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) ||
3137 (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
3138 {
3139 invalid = TRUE;
3140 }
3141
3142 BOOL_32 mipmap = (pIn->numMipLevels > 1);
3143 BOOL_32 msaa = (pIn->numFrags > 1);
3144
3145 ADDR2_SURFACE_FLAGS flags = pIn->flags;
3146 BOOL_32 zbuffer = (flags.depth || flags.stencil);
3147 BOOL_32 color = flags.color;
3148 BOOL_32 display = flags.display || flags.rotated;
3149
3150 AddrResourceType rsrcType = pIn->resourceType;
3151 BOOL_32 tex3d = IsTex3d(rsrcType);
3152 BOOL_32 thin3d = tex3d && flags.view3dAs2dArray;
3153 AddrSwizzleMode swizzle = pIn->swizzleMode;
3154 BOOL_32 linear = IsLinear(swizzle);
3155 BOOL_32 blk256B = IsBlock256b(swizzle);
3156 BOOL_32 blkVar = IsBlockVariable(swizzle);
3157 BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3158 BOOL_32 prt = flags.prt;
3159 BOOL_32 stereo = flags.qbStereo;
3160
3161 if (invalid == FALSE)
3162 {
3163 if ((pIn->numFrags > 1) &&
3164 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3165 {
3166 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3167 invalid = TRUE;
3168 }
3169 }
3170
3171 if (invalid == FALSE)
3172 {
3173 switch (rsrcType)
3174 {
3175 case ADDR_RSRC_TEX_1D:
3176 invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
3177 break;
3178 case ADDR_RSRC_TEX_2D:
3179 invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
3180 break;
3181 case ADDR_RSRC_TEX_3D:
3182 invalid = msaa || zbuffer || display || stereo;
3183 break;
3184 default:
3185 invalid = TRUE;
3186 break;
3187 }
3188 }
3189
3190 if (invalid == FALSE)
3191 {
3192 if (display)
3193 {
3194 invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
3195 }
3196 }
3197
3198 if (invalid == FALSE)
3199 {
3200 if (linear)
3201 {
3202 invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
3203 zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
3204 }
3205 else
3206 {
3207 if (blk256B || blkVar || isNonPrtXor)
3208 {
3209 invalid = prt;
3210 if (blk256B)
3211 {
3212 invalid = invalid || zbuffer || tex3d || mipmap || msaa;
3213 }
3214 }
3215
3216 if (invalid == FALSE)
3217 {
3218 if (IsZOrderSwizzle(swizzle))
3219 {
3220 invalid = (color && msaa) || thin3d;
3221 }
3222 else if (IsStandardSwizzle(swizzle))
3223 {
3224 invalid = zbuffer || thin3d;
3225 }
3226 else if (IsDisplaySwizzle(swizzle))
3227 {
3228 invalid = zbuffer || (prt && (ADDR_RSRC_TEX_3D == rsrcType));
3229 }
3230 else if (IsRotateSwizzle(swizzle))
3231 {
3232 invalid = zbuffer || (pIn->bpp > 64) || tex3d;
3233 }
3234 else
3235 {
3236 ADDR_ASSERT(!"invalid swizzle mode");
3237 invalid = TRUE;
3238 }
3239 }
3240 }
3241 }
3242
3243 ADDR_ASSERT(invalid == FALSE);
3244
3245 return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
3246 }
3247
3248 /**
3249 ************************************************************************************************************************
3250 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3251 *
3252 * @brief
3253 * Internal function to get suggested surface information for cliet to use
3254 *
3255 * @return
3256 * ADDR_E_RETURNCODE
3257 ************************************************************************************************************************
3258 */
3259 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3260 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3261 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3262 {
3263 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3264 ElemLib* pElemLib = GetElemLib();
3265
3266 UINT_32 bpp = pIn->bpp;
3267 UINT_32 width = pIn->width;
3268 UINT_32 height = pIn->height;
3269 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3270 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3271
3272 if (pIn->flags.fmask)
3273 {
3274 bpp = GetFmaskBpp(numSamples, numFrags);
3275 numFrags = 1;
3276 numSamples = 1;
3277 pOut->resourceType = ADDR_RSRC_TEX_2D;
3278 }
3279 else
3280 {
3281 // Set format to INVALID will skip this conversion
3282 if (pIn->format != ADDR_FMT_INVALID)
3283 {
3284 UINT_32 expandX, expandY;
3285
3286 // Don't care for this case
3287 ElemMode elemMode = ADDR_UNCOMPRESSED;
3288
3289 // Get compression/expansion factors and element mode which indicates compression/expansion
3290 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3291 &elemMode,
3292 &expandX,
3293 &expandY);
3294
3295 UINT_32 basePitch = 0;
3296 GetElemLib()->AdjustSurfaceInfo(elemMode,
3297 expandX,
3298 expandY,
3299 &bpp,
3300 &basePitch,
3301 &width,
3302 &height);
3303 }
3304
3305 // The output may get changed for volume(3D) texture resource in future
3306 pOut->resourceType = pIn->resourceType;
3307 }
3308
3309 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3310 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3311 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
3312 const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated;
3313
3314 // Forbid swizzle mode(s) by client setting, for simplicity we never allow VAR swizzle mode for GFX9
3315 ADDR2_SWMODE_SET allowedSwModeSet = {};
3316 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3317 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
3318 allowedSwModeSet.value |= pIn->forbiddenBlock.macro4KB ? 0 : Gfx9Blk4KBSwModeMask;
3319 allowedSwModeSet.value |= pIn->forbiddenBlock.macro64KB ? 0 : Gfx9Blk64KBSwModeMask;
3320
3321 if (pIn->preferredSwSet.value != 0)
3322 {
3323 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3324 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3325 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3326 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3327 }
3328
3329 if (pIn->noXor)
3330 {
3331 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3332 }
3333
3334 if (pIn->maxAlign > 0)
3335 {
3336 if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3337 {
3338 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3339 }
3340
3341 if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3342 {
3343 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3344 }
3345
3346 if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3347 {
3348 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3349 }
3350 }
3351
3352 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3353 switch (pOut->resourceType)
3354 {
3355 case ADDR_RSRC_TEX_1D:
3356 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3357 break;
3358
3359 case ADDR_RSRC_TEX_2D:
3360 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3361
3362 if (bpp > 64)
3363 {
3364 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3365 }
3366 break;
3367
3368 case ADDR_RSRC_TEX_3D:
3369 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3370
3371 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3372 {
3373 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3374 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3375 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3376 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3377 }
3378
3379 if ((bpp == 128) && pIn->flags.color)
3380 {
3381 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3382 }
3383
3384 if (pIn->flags.view3dAs2dArray)
3385 {
3386 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3387 }
3388 break;
3389
3390 default:
3391 ADDR_ASSERT_ALWAYS();
3392 allowedSwModeSet.value = 0;
3393 break;
3394 }
3395
3396 if (pIn->format == ADDR_FMT_32_32_32)
3397 {
3398 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3399 }
3400
3401 if (ElemLib::IsBlockCompressed(pIn->format))
3402 {
3403 if (pIn->flags.texture)
3404 {
3405 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3406 }
3407 else
3408 {
3409 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3410 }
3411 }
3412
3413 if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3414 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3415 {
3416 allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3417 }
3418
3419 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3420 {
3421 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3422
3423 if (pIn->flags.noMetadata == FALSE)
3424 {
3425 if (pIn->flags.depth &&
3426 pIn->flags.texture &&
3427 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3428 {
3429 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3430 // equation from wrong address within memory range a tile covered and use the
3431 // garbage data for compressed Z reading which finally leads to corruption.
3432 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3433 }
3434
3435 if (m_settings.htileCacheRbConflict &&
3436 (pIn->flags.depth || pIn->flags.stencil) &&
3437 (numSlices > 1) &&
3438 (pIn->flags.metaRbUnaligned == FALSE) &&
3439 (pIn->flags.metaPipeUnaligned == FALSE))
3440 {
3441 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3442 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3443 }
3444 }
3445 }
3446
3447 if (msaa)
3448 {
3449 allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3450 }
3451
3452 if ((numFrags > 1) &&
3453 (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3454 {
3455 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3456 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3457 }
3458
3459 if (numMipLevels > 1)
3460 {
3461 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3462 }
3463
3464 if (displayRsrc)
3465 {
3466 if (m_settings.isDce12)
3467 {
3468 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3469 }
3470 else if (m_settings.isDcn1)
3471 {
3472 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3473 }
3474 else
3475 {
3476 ADDR_NOT_IMPLEMENTED();
3477 }
3478 }
3479
3480 if (allowedSwModeSet.value != 0)
3481 {
3482 #if DEBUG
3483 // Post sanity check, at least AddrLib should accept the output generated by its own
3484 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3485 localIn.flags = pIn->flags;
3486 localIn.resourceType = pOut->resourceType;
3487 localIn.format = pIn->format;
3488 localIn.bpp = bpp;
3489 localIn.width = width;
3490 localIn.height = height;
3491 localIn.numSlices = numSlices;
3492 localIn.numMipLevels = numMipLevels;
3493 localIn.numSamples = numSamples;
3494 localIn.numFrags = numFrags;
3495
3496 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3497 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3498 {
3499 if (validateSwModeSet & 1)
3500 {
3501 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3502 HwlComputeSurfaceInfoSanityCheck(&localIn);
3503 }
3504
3505 validateSwModeSet >>= 1;
3506 }
3507 #endif
3508
3509 pOut->validSwModeSet = allowedSwModeSet;
3510 pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3511 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet);
3512 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3513
3514 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3515
3516 if (pOut->clientPreferredSwSet.value == 0)
3517 {
3518 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3519 }
3520
3521 if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3522 {
3523 pOut->swizzleMode = ADDR_SW_LINEAR;
3524 }
3525 else
3526 {
3527 // Always ignore linear swizzle mode if there is other choice.
3528 allowedSwModeSet.swLinear = 0;
3529
3530 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet);
3531
3532 // Determine block size if there is 2 or more block type candidates
3533 if (IsPow2(allowedBlockSet.value) == FALSE)
3534 {
3535 const AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_256B, ADDR_SW_4KB, ADDR_SW_64KB};
3536 Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3537 Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3538 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3539
3540 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3541 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3542 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3543 UINT_32 minSizeBlk = AddrBlockMicro;
3544 UINT_64 minSize = 0;
3545
3546 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3547 {
3548 if (allowedBlockSet.value & (1 << i))
3549 {
3550 ComputeBlockDimensionForSurf(&blkDim[i].w,
3551 &blkDim[i].h,
3552 &blkDim[i].d,
3553 bpp,
3554 numFrags,
3555 pOut->resourceType,
3556 swMode[i]);
3557
3558 if (displayRsrc)
3559 {
3560 blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3561 }
3562
3563 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3564 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
3565
3566 if ((minSize == 0) ||
3567 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3568 {
3569 minSize = padSize[i];
3570 minSizeBlk = i;
3571 }
3572 }
3573 }
3574
3575 if ((allowedBlockSet.micro == TRUE) &&
3576 (width <= blkDim[AddrBlockMicro].w) &&
3577 (height <= blkDim[AddrBlockMicro].h) &&
3578 (NextPow2(pIn->minSizeAlign) <= GetBlockSize(ADDR_SW_256B)))
3579 {
3580 minSizeBlk = AddrBlockMicro;
3581 }
3582
3583 if (minSizeBlk == AddrBlockMicro)
3584 {
3585 allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3586 }
3587 else if (minSizeBlk == AddrBlock4KB)
3588 {
3589 allowedSwModeSet.value &= Gfx9Blk4KBSwModeMask;
3590 }
3591 else
3592 {
3593 ADDR_ASSERT(minSizeBlk == AddrBlock64KB);
3594 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3595 }
3596 }
3597
3598 // Block type should be determined.
3599 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet).value));
3600
3601 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3602
3603 // Determine swizzle type if there is 2 or more swizzle type candidates
3604 if (IsPow2(allowedSwSet.value) == FALSE)
3605 {
3606 if (ElemLib::IsBlockCompressed(pIn->format))
3607 {
3608 if (allowedSwSet.sw_D)
3609 {
3610 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3611 }
3612 else
3613 {
3614 ADDR_ASSERT(allowedSwSet.sw_S);
3615 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3616 }
3617 }
3618 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3619 {
3620 if (allowedSwSet.sw_S)
3621 {
3622 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3623 }
3624 else if (allowedSwSet.sw_D)
3625 {
3626 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3627 }
3628 else
3629 {
3630 ADDR_ASSERT(allowedSwSet.sw_R);
3631 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3632 }
3633 }
3634 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3635 {
3636 if (pIn->flags.color && allowedSwSet.sw_D)
3637 {
3638 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3639 }
3640 else if (allowedSwSet.sw_Z)
3641 {
3642 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3643 }
3644 else
3645 {
3646 ADDR_ASSERT(allowedSwSet.sw_S);
3647 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3648 }
3649 }
3650 else
3651 {
3652 if (pIn->flags.rotated && allowedSwSet.sw_R)
3653 {
3654 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3655 }
3656 else if (displayRsrc && allowedSwSet.sw_D)
3657 {
3658 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3659 }
3660 else if (allowedSwSet.sw_S)
3661 {
3662 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3663 }
3664 else
3665 {
3666 ADDR_ASSERT(allowedSwSet.sw_Z);
3667 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3668 }
3669 }
3670 }
3671
3672 // Swizzle type should be determined.
3673 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3674
3675 // Determine swizzle mode now - always select the "largest" swizzle mode for a given block type +
3676 // swizzle type combination. For example, for AddrBlock64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3677 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3678 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3679 }
3680 }
3681 else
3682 {
3683 // Invalid combination...
3684 ADDR_ASSERT_ALWAYS();
3685 returnCode = ADDR_INVALIDPARAMS;
3686 }
3687
3688 return returnCode;
3689 }
3690
3691 /**
3692 ************************************************************************************************************************
3693 * Gfx9Lib::ComputeStereoInfo
3694 *
3695 * @brief
3696 * Compute height alignment and right eye pipeBankXor for stereo surface
3697 *
3698 * @return
3699 * Error code
3700 *
3701 ************************************************************************************************************************
3702 */
3703 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3704 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3705 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
3706 UINT_32* pHeightAlign
3707 ) const
3708 {
3709 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3710
3711 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3712
3713 if (eqIndex < m_numEquations)
3714 {
3715 if (IsXor(pIn->swizzleMode))
3716 {
3717 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3718 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
3719 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
3720 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
3721 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3722 MAYBE_UNUSED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3723
3724 ADDR_ASSERT(maxYCoordBlock256 ==
3725 GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
3726
3727 const UINT_32 maxYCoordInBaseEquation =
3728 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
3729
3730 ADDR_ASSERT(maxYCoordInBaseEquation ==
3731 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3732
3733 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3734
3735 ADDR_ASSERT(maxYCoordInPipeXor ==
3736 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3737
3738 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3739 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3740
3741 ADDR_ASSERT(maxYCoordInBankXor ==
3742 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3743
3744 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3745
3746 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3747 {
3748 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3749
3750 if (pOut->pStereoInfo != NULL)
3751 {
3752 pOut->pStereoInfo->rightSwizzle = 0;
3753
3754 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3755 {
3756 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3757 {
3758 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3759 }
3760
3761 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3762 {
3763 pOut->pStereoInfo->rightSwizzle |=
3764 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3765 }
3766
3767 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3768 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3769 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3770 }
3771 }
3772 }
3773 }
3774 }
3775 else
3776 {
3777 ADDR_ASSERT_ALWAYS();
3778 returnCode = ADDR_ERROR;
3779 }
3780
3781 return returnCode;
3782 }
3783
3784 /**
3785 ************************************************************************************************************************
3786 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3787 *
3788 * @brief
3789 * Internal function to calculate alignment for tiled surface
3790 *
3791 * @return
3792 * ADDR_E_RETURNCODE
3793 ************************************************************************************************************************
3794 */
3795 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3796 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3797 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3798 ) const
3799 {
3800 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3801 &pOut->blockHeight,
3802 &pOut->blockSlices,
3803 pIn->bpp,
3804 pIn->numFrags,
3805 pIn->resourceType,
3806 pIn->swizzleMode);
3807
3808 if (returnCode == ADDR_OK)
3809 {
3810 UINT_32 pitchAlignInElement = pOut->blockWidth;
3811
3812 if ((IsTex2d(pIn->resourceType) == TRUE) &&
3813 (pIn->flags.display || pIn->flags.rotated) &&
3814 (pIn->numMipLevels <= 1) &&
3815 (pIn->numSamples <= 1) &&
3816 (pIn->numFrags <= 1))
3817 {
3818 // Display engine needs pitch align to be at least 32 pixels.
3819 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3820 }
3821
3822 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3823
3824 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3825 {
3826 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3827 {
3828 returnCode = ADDR_INVALIDPARAMS;
3829 }
3830 else if (pIn->pitchInElement < pOut->pitch)
3831 {
3832 returnCode = ADDR_INVALIDPARAMS;
3833 }
3834 else
3835 {
3836 pOut->pitch = pIn->pitchInElement;
3837 }
3838 }
3839
3840 UINT_32 heightAlign = 0;
3841
3842 if (pIn->flags.qbStereo)
3843 {
3844 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3845 }
3846
3847 if (returnCode == ADDR_OK)
3848 {
3849 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3850
3851 if (heightAlign > 1)
3852 {
3853 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3854 }
3855
3856 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3857
3858 pOut->epitchIsHeight = FALSE;
3859 pOut->mipChainInTail = FALSE;
3860 pOut->firstMipIdInTail = pIn->numMipLevels;
3861
3862 pOut->mipChainPitch = pOut->pitch;
3863 pOut->mipChainHeight = pOut->height;
3864 pOut->mipChainSlice = pOut->numSlices;
3865
3866 if (pIn->numMipLevels > 1)
3867 {
3868 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
3869 pIn->swizzleMode,
3870 pIn->bpp,
3871 pIn->width,
3872 pIn->height,
3873 pIn->numSlices,
3874 pOut->blockWidth,
3875 pOut->blockHeight,
3876 pOut->blockSlices,
3877 pIn->numMipLevels,
3878 pOut->pMipInfo);
3879
3880 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
3881
3882 if (endingMipId == 0)
3883 {
3884 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3885 pIn->swizzleMode,
3886 pOut->blockWidth,
3887 pOut->blockHeight,
3888 pOut->blockSlices);
3889
3890 pOut->epitchIsHeight = TRUE;
3891 pOut->pitch = tailMaxDim.w;
3892 pOut->height = tailMaxDim.h;
3893 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
3894 tailMaxDim.d : pIn->numSlices;
3895 pOut->mipChainInTail = TRUE;
3896 }
3897 else
3898 {
3899 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
3900 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
3901
3902 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
3903 pIn->swizzleMode,
3904 mip0WidthInBlk,
3905 mip0HeightInBlk,
3906 pOut->numSlices / pOut->blockSlices);
3907 if (majorMode == ADDR_MAJOR_Y)
3908 {
3909 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
3910
3911 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
3912 {
3913 mip1WidthInBlk++;
3914 }
3915
3916 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
3917
3918 pOut->epitchIsHeight = FALSE;
3919 }
3920 else
3921 {
3922 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
3923
3924 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
3925 {
3926 mip1HeightInBlk++;
3927 }
3928
3929 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
3930
3931 pOut->epitchIsHeight = TRUE;
3932 }
3933 }
3934
3935 if (pOut->pMipInfo != NULL)
3936 {
3937 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
3938
3939 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3940 {
3941 Dim3d mipStartPos = {0};
3942 UINT_32 mipTailOffsetInBytes = 0;
3943
3944 mipStartPos = GetMipStartPos(pIn->resourceType,
3945 pIn->swizzleMode,
3946 pOut->pitch,
3947 pOut->height,
3948 pOut->numSlices,
3949 pOut->blockWidth,
3950 pOut->blockHeight,
3951 pOut->blockSlices,
3952 i,
3953 elementBytesLog2,
3954 &mipTailOffsetInBytes);
3955
3956 UINT_32 pitchInBlock =
3957 pOut->mipChainPitch / pOut->blockWidth;
3958 UINT_32 sliceInBlock =
3959 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
3960 UINT_64 blockIndex =
3961 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
3962 UINT_64 macroBlockOffset =
3963 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
3964
3965 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
3966 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
3967 }
3968 }
3969 }
3970 else if (pOut->pMipInfo != NULL)
3971 {
3972 pOut->pMipInfo[0].pitch = pOut->pitch;
3973 pOut->pMipInfo[0].height = pOut->height;
3974 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3975 pOut->pMipInfo[0].offset = 0;
3976 }
3977
3978 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
3979 (pIn->bpp >> 3) * pIn->numFrags;
3980 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
3981 pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
3982
3983 if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
3984 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
3985 (pIn->flags.texture == TRUE) &&
3986 (pIn->flags.noMetadata == FALSE) &&
3987 (pIn->flags.metaPipeUnaligned == FALSE))
3988 {
3989 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
3990 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
3991 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
3992 // them, which may cause invalid metadata to be fetched.
3993 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes);
3994 }
3995
3996 if (pIn->flags.prt)
3997 {
3998 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
3999 }
4000 }
4001 }
4002
4003 return returnCode;
4004 }
4005
4006 /**
4007 ************************************************************************************************************************
4008 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4009 *
4010 * @brief
4011 * Internal function to calculate alignment for linear surface
4012 *
4013 * @return
4014 * ADDR_E_RETURNCODE
4015 ************************************************************************************************************************
4016 */
4017 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4018 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4019 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4020 ) const
4021 {
4022 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4023 UINT_32 pitch = 0;
4024 UINT_32 actualHeight = 0;
4025 UINT_32 elementBytes = pIn->bpp >> 3;
4026 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4027
4028 if (IsTex1d(pIn->resourceType))
4029 {
4030 if (pIn->height > 1)
4031 {
4032 returnCode = ADDR_INVALIDPARAMS;
4033 }
4034 else
4035 {
4036 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4037
4038 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4039 actualHeight = pIn->numMipLevels;
4040
4041 if (pIn->flags.prt == FALSE)
4042 {
4043 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4044 &pitch, &actualHeight);
4045 }
4046
4047 if (returnCode == ADDR_OK)
4048 {
4049 if (pOut->pMipInfo != NULL)
4050 {
4051 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4052 {
4053 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4054 pOut->pMipInfo[i].pitch = pitch;
4055 pOut->pMipInfo[i].height = 1;
4056 pOut->pMipInfo[i].depth = 1;
4057 }
4058 }
4059 }
4060 }
4061 }
4062 else
4063 {
4064 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4065 }
4066
4067 if ((pitch == 0) || (actualHeight == 0))
4068 {
4069 returnCode = ADDR_INVALIDPARAMS;
4070 }
4071
4072 if (returnCode == ADDR_OK)
4073 {
4074 pOut->pitch = pitch;
4075 pOut->height = pIn->height;
4076 pOut->numSlices = pIn->numSlices;
4077 pOut->mipChainPitch = pitch;
4078 pOut->mipChainHeight = actualHeight;
4079 pOut->mipChainSlice = pOut->numSlices;
4080 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4081 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4082 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4083 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4084 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4085 pOut->blockHeight = 1;
4086 pOut->blockSlices = 1;
4087 }
4088
4089 // Post calculation validate
4090 ADDR_ASSERT(pOut->sliceSize > 0);
4091
4092 return returnCode;
4093 }
4094
4095 /**
4096 ************************************************************************************************************************
4097 * Gfx9Lib::GetMipChainInfo
4098 *
4099 * @brief
4100 * Internal function to get out information about mip chain
4101 *
4102 * @return
4103 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4104 ************************************************************************************************************************
4105 */
4106 UINT_32 Gfx9Lib::GetMipChainInfo(
4107 AddrResourceType resourceType,
4108 AddrSwizzleMode swizzleMode,
4109 UINT_32 bpp,
4110 UINT_32 mip0Width,
4111 UINT_32 mip0Height,
4112 UINT_32 mip0Depth,
4113 UINT_32 blockWidth,
4114 UINT_32 blockHeight,
4115 UINT_32 blockDepth,
4116 UINT_32 numMipLevel,
4117 ADDR2_MIP_INFO* pMipInfo) const
4118 {
4119 const Dim3d tailMaxDim =
4120 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4121
4122 UINT_32 mipPitch = mip0Width;
4123 UINT_32 mipHeight = mip0Height;
4124 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4125 UINT_32 offset = 0;
4126 UINT_32 firstMipIdInTail = numMipLevel;
4127 BOOL_32 inTail = FALSE;
4128 BOOL_32 finalDim = FALSE;
4129 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4130 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4131
4132 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4133 {
4134 if (inTail)
4135 {
4136 if (finalDim == FALSE)
4137 {
4138 UINT_32 mipSize;
4139
4140 if (is3dThick)
4141 {
4142 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4143 }
4144 else
4145 {
4146 mipSize = mipPitch * mipHeight * (bpp >> 3);
4147 }
4148
4149 if (mipSize <= 256)
4150 {
4151 UINT_32 index = Log2(bpp >> 3);
4152
4153 if (is3dThick)
4154 {
4155 mipPitch = Block256_3dZ[index].w;
4156 mipHeight = Block256_3dZ[index].h;
4157 mipDepth = Block256_3dZ[index].d;
4158 }
4159 else
4160 {
4161 mipPitch = Block256_2d[index].w;
4162 mipHeight = Block256_2d[index].h;
4163 }
4164
4165 finalDim = TRUE;
4166 }
4167 }
4168 }
4169 else
4170 {
4171 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4172 mipPitch, mipHeight, mipDepth);
4173
4174 if (inTail)
4175 {
4176 firstMipIdInTail = mipId;
4177 mipPitch = tailMaxDim.w;
4178 mipHeight = tailMaxDim.h;
4179
4180 if (is3dThick)
4181 {
4182 mipDepth = tailMaxDim.d;
4183 }
4184 }
4185 else
4186 {
4187 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4188 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4189
4190 if (is3dThick)
4191 {
4192 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4193 }
4194 }
4195 }
4196
4197 if (pMipInfo != NULL)
4198 {
4199 pMipInfo[mipId].pitch = mipPitch;
4200 pMipInfo[mipId].height = mipHeight;
4201 pMipInfo[mipId].depth = mipDepth;
4202 pMipInfo[mipId].offset = offset;
4203 }
4204
4205 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4206
4207 if (finalDim)
4208 {
4209 if (is3dThin)
4210 {
4211 mipDepth = Max(mipDepth >> 1, 1u);
4212 }
4213 }
4214 else
4215 {
4216 mipPitch = Max(mipPitch >> 1, 1u);
4217 mipHeight = Max(mipHeight >> 1, 1u);
4218
4219 if (is3dThick || is3dThin)
4220 {
4221 mipDepth = Max(mipDepth >> 1, 1u);
4222 }
4223 }
4224 }
4225
4226 return firstMipIdInTail;
4227 }
4228
4229 /**
4230 ************************************************************************************************************************
4231 * Gfx9Lib::GetMetaMiptailInfo
4232 *
4233 * @brief
4234 * Get mip tail coordinate information.
4235 *
4236 * @return
4237 * N/A
4238 ************************************************************************************************************************
4239 */
4240 VOID Gfx9Lib::GetMetaMiptailInfo(
4241 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4242 Dim3d mipCoord, ///< [in] mip tail base coord
4243 UINT_32 numMipInTail, ///< [in] number of mips in tail
4244 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4245 ) const
4246 {
4247 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4248 UINT_32 mipWidth = pMetaBlkDim->w;
4249 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4250 UINT_32 mipDepth = pMetaBlkDim->d;
4251 UINT_32 minInc;
4252
4253 if (isThick)
4254 {
4255 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4256 }
4257 else if (pMetaBlkDim->h >= 1024)
4258 {
4259 minInc = 256;
4260 }
4261 else if (pMetaBlkDim->h == 512)
4262 {
4263 minInc = 128;
4264 }
4265 else
4266 {
4267 minInc = 64;
4268 }
4269
4270 UINT_32 blk32MipId = 0xFFFFFFFF;
4271
4272 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4273 {
4274 pInfo[mip].inMiptail = TRUE;
4275 pInfo[mip].startX = mipCoord.w;
4276 pInfo[mip].startY = mipCoord.h;
4277 pInfo[mip].startZ = mipCoord.d;
4278 pInfo[mip].width = mipWidth;
4279 pInfo[mip].height = mipHeight;
4280 pInfo[mip].depth = mipDepth;
4281
4282 if (mipWidth <= 32)
4283 {
4284 if (blk32MipId == 0xFFFFFFFF)
4285 {
4286 blk32MipId = mip;
4287 }
4288
4289 mipCoord.w = pInfo[blk32MipId].startX;
4290 mipCoord.h = pInfo[blk32MipId].startY;
4291 mipCoord.d = pInfo[blk32MipId].startZ;
4292
4293 switch (mip - blk32MipId)
4294 {
4295 case 0:
4296 mipCoord.w += 32; // 16x16
4297 break;
4298 case 1:
4299 mipCoord.h += 32; // 8x8
4300 break;
4301 case 2:
4302 mipCoord.h += 32; // 4x4
4303 mipCoord.w += 16;
4304 break;
4305 case 3:
4306 mipCoord.h += 32; // 2x2
4307 mipCoord.w += 32;
4308 break;
4309 case 4:
4310 mipCoord.h += 32; // 1x1
4311 mipCoord.w += 48;
4312 break;
4313 // The following are for BC/ASTC formats
4314 case 5:
4315 mipCoord.h += 48; // 1/2 x 1/2
4316 break;
4317 case 6:
4318 mipCoord.h += 48; // 1/4 x 1/4
4319 mipCoord.w += 16;
4320 break;
4321 case 7:
4322 mipCoord.h += 48; // 1/8 x 1/8
4323 mipCoord.w += 32;
4324 break;
4325 case 8:
4326 mipCoord.h += 48; // 1/16 x 1/16
4327 mipCoord.w += 48;
4328 break;
4329 default:
4330 ADDR_ASSERT_ALWAYS();
4331 break;
4332 }
4333
4334 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4335 mipHeight = mipWidth;
4336
4337 if (isThick)
4338 {
4339 mipDepth = mipWidth;
4340 }
4341 }
4342 else
4343 {
4344 if (mipWidth <= minInc)
4345 {
4346 // if we're below the minimal increment...
4347 if (isThick)
4348 {
4349 // For 3d, just go in z direction
4350 mipCoord.d += mipDepth;
4351 }
4352 else
4353 {
4354 // For 2d, first go across, then down
4355 if ((mipWidth * 2) == minInc)
4356 {
4357 // if we're 2 mips below, that's when we go back in x, and down in y
4358 mipCoord.w -= minInc;
4359 mipCoord.h += minInc;
4360 }
4361 else
4362 {
4363 // otherwise, just go across in x
4364 mipCoord.w += minInc;
4365 }
4366 }
4367 }
4368 else
4369 {
4370 // On even mip, go down, otherwise, go across
4371 if (mip & 1)
4372 {
4373 mipCoord.w += mipWidth;
4374 }
4375 else
4376 {
4377 mipCoord.h += mipHeight;
4378 }
4379 }
4380 // Divide the width by 2
4381 mipWidth >>= 1;
4382 // After the first mip in tail, the mip is always a square
4383 mipHeight = mipWidth;
4384 // ...or for 3d, a cube
4385 if (isThick)
4386 {
4387 mipDepth = mipWidth;
4388 }
4389 }
4390 }
4391 }
4392
4393 /**
4394 ************************************************************************************************************************
4395 * Gfx9Lib::GetMipStartPos
4396 *
4397 * @brief
4398 * Internal function to get out information about mip logical start position
4399 *
4400 * @return
4401 * logical start position in macro block width/heith/depth of one mip level within one slice
4402 ************************************************************************************************************************
4403 */
4404 Dim3d Gfx9Lib::GetMipStartPos(
4405 AddrResourceType resourceType,
4406 AddrSwizzleMode swizzleMode,
4407 UINT_32 width,
4408 UINT_32 height,
4409 UINT_32 depth,
4410 UINT_32 blockWidth,
4411 UINT_32 blockHeight,
4412 UINT_32 blockDepth,
4413 UINT_32 mipId,
4414 UINT_32 log2ElementBytes,
4415 UINT_32* pMipTailBytesOffset) const
4416 {
4417 Dim3d mipStartPos = {0};
4418 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4419
4420 // Report mip in tail if Mip0 is already in mip tail
4421 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4422 UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
4423 UINT_32 mipIndexInTail = mipId;
4424
4425 if (inMipTail == FALSE)
4426 {
4427 // Mip 0 dimension, unit in block
4428 UINT_32 mipWidthInBlk = width / blockWidth;
4429 UINT_32 mipHeightInBlk = height / blockHeight;
4430 UINT_32 mipDepthInBlk = depth / blockDepth;
4431 AddrMajorMode majorMode = GetMajorMode(resourceType,
4432 swizzleMode,
4433 mipWidthInBlk,
4434 mipHeightInBlk,
4435 mipDepthInBlk);
4436
4437 UINT_32 endingMip = mipId + 1;
4438
4439 for (UINT_32 i = 1; i <= mipId; i++)
4440 {
4441 if ((i == 1) || (i == 3))
4442 {
4443 if (majorMode == ADDR_MAJOR_Y)
4444 {
4445 mipStartPos.w += mipWidthInBlk;
4446 }
4447 else
4448 {
4449 mipStartPos.h += mipHeightInBlk;
4450 }
4451 }
4452 else
4453 {
4454 if (majorMode == ADDR_MAJOR_X)
4455 {
4456 mipStartPos.w += mipWidthInBlk;
4457 }
4458 else if (majorMode == ADDR_MAJOR_Y)
4459 {
4460 mipStartPos.h += mipHeightInBlk;
4461 }
4462 else
4463 {
4464 mipStartPos.d += mipDepthInBlk;
4465 }
4466 }
4467
4468 BOOL_32 inTail = FALSE;
4469
4470 if (IsThick(resourceType, swizzleMode))
4471 {
4472 UINT_32 dim = log2blkSize % 3;
4473
4474 if (dim == 0)
4475 {
4476 inTail =
4477 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4478 }
4479 else if (dim == 1)
4480 {
4481 inTail =
4482 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4483 }
4484 else
4485 {
4486 inTail =
4487 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4488 }
4489 }
4490 else
4491 {
4492 if (log2blkSize & 1)
4493 {
4494 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4495 }
4496 else
4497 {
4498 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4499 }
4500 }
4501
4502 if (inTail)
4503 {
4504 endingMip = i;
4505 break;
4506 }
4507
4508 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4509 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4510 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4511 }
4512
4513 if (mipId >= endingMip)
4514 {
4515 inMipTail = TRUE;
4516 mipIndexInTail = mipId - endingMip;
4517 }
4518 }
4519
4520 if (inMipTail)
4521 {
4522 UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
4523 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4524 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4525 }
4526
4527 return mipStartPos;
4528 }
4529
4530 /**
4531 ************************************************************************************************************************
4532 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4533 *
4534 * @brief
4535 * Internal function to calculate address from coord for tiled swizzle surface
4536 *
4537 * @return
4538 * ADDR_E_RETURNCODE
4539 ************************************************************************************************************************
4540 */
4541 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4542 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4543 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4544 ) const
4545 {
4546 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4547 localIn.swizzleMode = pIn->swizzleMode;
4548 localIn.flags = pIn->flags;
4549 localIn.resourceType = pIn->resourceType;
4550 localIn.bpp = pIn->bpp;
4551 localIn.width = Max(pIn->unalignedWidth, 1u);
4552 localIn.height = Max(pIn->unalignedHeight, 1u);
4553 localIn.numSlices = Max(pIn->numSlices, 1u);
4554 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4555 localIn.numSamples = Max(pIn->numSamples, 1u);
4556 localIn.numFrags = Max(pIn->numFrags, 1u);
4557 if (localIn.numMipLevels <= 1)
4558 {
4559 localIn.pitchInElement = pIn->pitchInElement;
4560 }
4561
4562 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4563 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4564
4565 BOOL_32 valid = (returnCode == ADDR_OK) &&
4566 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4567 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4568 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4569
4570 if (valid)
4571 {
4572 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4573 Dim3d mipStartPos = {0};
4574 UINT_32 mipTailBytesOffset = 0;
4575
4576 if (pIn->numMipLevels > 1)
4577 {
4578 // Mip-map chain cannot be MSAA surface
4579 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4580
4581 mipStartPos = GetMipStartPos(pIn->resourceType,
4582 pIn->swizzleMode,
4583 localOut.pitch,
4584 localOut.height,
4585 localOut.numSlices,
4586 localOut.blockWidth,
4587 localOut.blockHeight,
4588 localOut.blockSlices,
4589 pIn->mipId,
4590 log2ElementBytes,
4591 &mipTailBytesOffset);
4592 }
4593
4594 UINT_32 interleaveOffset = 0;
4595 UINT_32 pipeBits = 0;
4596 UINT_32 pipeXor = 0;
4597 UINT_32 bankBits = 0;
4598 UINT_32 bankXor = 0;
4599
4600 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4601 {
4602 UINT_32 blockOffset = 0;
4603 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4604
4605 if (IsZOrderSwizzle(pIn->swizzleMode))
4606 {
4607 // Morton generation
4608 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4609 {
4610 UINT_32 totalLowBits = 6 - log2ElementBytes;
4611 UINT_32 mortBits = totalLowBits / 2;
4612 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4613 // Are 9 bits enough?
4614 UINT_32 highBitsValue =
4615 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4616 blockOffset = lowBitsValue | highBitsValue;
4617 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4618 }
4619 else
4620 {
4621 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4622 }
4623
4624 // Fill LSBs with sample bits
4625 if (pIn->numSamples > 1)
4626 {
4627 blockOffset *= pIn->numSamples;
4628 blockOffset |= pIn->sample;
4629 }
4630
4631 // Shift according to BytesPP
4632 blockOffset <<= log2ElementBytes;
4633 }
4634 else
4635 {
4636 // Micro block offset
4637 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4638 blockOffset = microBlockOffset;
4639
4640 // Micro block dimension
4641 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4642 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4643 // Morton generation, does 12 bit enough?
4644 blockOffset |=
4645 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4646
4647 // Sample bits start location
4648 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
4649 // Join sample bits information to the highest Macro block bits
4650 if (IsNonPrtXor(pIn->swizzleMode))
4651 {
4652 // Non-prt-Xor : xor highest Macro block bits with sample bits
4653 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4654 }
4655 else
4656 {
4657 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4658 // after this op, the blockOffset only contains log2 Macro block size bits
4659 blockOffset %= (1 << sampleStart);
4660 blockOffset |= (pIn->sample << sampleStart);
4661 ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
4662 }
4663 }
4664
4665 if (IsXor(pIn->swizzleMode))
4666 {
4667 // Mask off bits above Macro block bits to keep page synonyms working for prt
4668 if (IsPrt(pIn->swizzleMode))
4669 {
4670 blockOffset &= ((1 << log2blkSize) - 1);
4671 }
4672
4673 // Preserve offset inside pipe interleave
4674 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4675 blockOffset >>= m_pipeInterleaveLog2;
4676
4677 // Pipe/Se xor bits
4678 pipeBits = GetPipeXorBits(log2blkSize);
4679 // Pipe xor
4680 pipeXor = FoldXor2d(blockOffset, pipeBits);
4681 blockOffset >>= pipeBits;
4682
4683 // Bank xor bits
4684 bankBits = GetBankXorBits(log2blkSize);
4685 // Bank Xor
4686 bankXor = FoldXor2d(blockOffset, bankBits);
4687 blockOffset >>= bankBits;
4688
4689 // Put all the part back together
4690 blockOffset <<= bankBits;
4691 blockOffset |= bankXor;
4692 blockOffset <<= pipeBits;
4693 blockOffset |= pipeXor;
4694 blockOffset <<= m_pipeInterleaveLog2;
4695 blockOffset |= interleaveOffset;
4696 }
4697
4698 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4699 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4700
4701 blockOffset |= mipTailBytesOffset;
4702
4703 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4704 {
4705 // Apply slice xor if not MSAA/PRT
4706 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4707 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4708 (m_pipeInterleaveLog2 + pipeBits));
4709 }
4710
4711 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4712 bankBits, pipeBits, &blockOffset);
4713
4714 blockOffset %= (1 << log2blkSize);
4715
4716 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4717 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4718 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4719 UINT_64 macroBlockIndex =
4720 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4721 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4722 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4723
4724 pOut->addr = blockOffset | (macroBlockIndex << log2blkSize);
4725 }
4726 else
4727 {
4728 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4729
4730 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4731
4732 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4733 (pIn->y / microBlockDim.h),
4734 (pIn->slice / microBlockDim.d),
4735 8);
4736
4737 blockOffset <<= 10;
4738 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4739
4740 if (IsXor(pIn->swizzleMode))
4741 {
4742 // Mask off bits above Macro block bits to keep page synonyms working for prt
4743 if (IsPrt(pIn->swizzleMode))
4744 {
4745 blockOffset &= ((1 << log2blkSize) - 1);
4746 }
4747
4748 // Preserve offset inside pipe interleave
4749 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4750 blockOffset >>= m_pipeInterleaveLog2;
4751
4752 // Pipe/Se xor bits
4753 pipeBits = GetPipeXorBits(log2blkSize);
4754 // Pipe xor
4755 pipeXor = FoldXor3d(blockOffset, pipeBits);
4756 blockOffset >>= pipeBits;
4757
4758 // Bank xor bits
4759 bankBits = GetBankXorBits(log2blkSize);
4760 // Bank Xor
4761 bankXor = FoldXor3d(blockOffset, bankBits);
4762 blockOffset >>= bankBits;
4763
4764 // Put all the part back together
4765 blockOffset <<= bankBits;
4766 blockOffset |= bankXor;
4767 blockOffset <<= pipeBits;
4768 blockOffset |= pipeXor;
4769 blockOffset <<= m_pipeInterleaveLog2;
4770 blockOffset |= interleaveOffset;
4771 }
4772
4773 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4774 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4775 blockOffset |= mipTailBytesOffset;
4776
4777 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4778 bankBits, pipeBits, &blockOffset);
4779
4780 blockOffset %= (1 << log2blkSize);
4781
4782 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
4783 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4784 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4785
4786 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4787 UINT_32 sliceSizeInBlock =
4788 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4789 UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4790
4791 pOut->addr = blockOffset | (blockIndex << log2blkSize);
4792 }
4793 }
4794 else
4795 {
4796 returnCode = ADDR_INVALIDPARAMS;
4797 }
4798
4799 return returnCode;
4800 }
4801
4802 /**
4803 ************************************************************************************************************************
4804 * Gfx9Lib::ComputeSurfaceInfoLinear
4805 *
4806 * @brief
4807 * Internal function to calculate padding for linear swizzle 2D/3D surface
4808 *
4809 * @return
4810 * N/A
4811 ************************************************************************************************************************
4812 */
4813 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4814 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
4815 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
4816 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
4817 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
4818 ) const
4819 {
4820 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4821
4822 UINT_32 elementBytes = pIn->bpp >> 3;
4823 UINT_32 pitchAlignInElement = 0;
4824
4825 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4826 {
4827 ADDR_ASSERT(pIn->numMipLevels <= 1);
4828 ADDR_ASSERT(pIn->numSlices <= 1);
4829 pitchAlignInElement = 1;
4830 }
4831 else
4832 {
4833 pitchAlignInElement = (256 / elementBytes);
4834 }
4835
4836 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
4837 UINT_32 slice0PaddedHeight = pIn->height;
4838
4839 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4840 &mipChainWidth, &slice0PaddedHeight);
4841
4842 if (returnCode == ADDR_OK)
4843 {
4844 UINT_32 mipChainHeight = 0;
4845 UINT_32 mipHeight = pIn->height;
4846 UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4847
4848 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4849 {
4850 if (pMipInfo != NULL)
4851 {
4852 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4853 pMipInfo[i].pitch = mipChainWidth;
4854 pMipInfo[i].height = mipHeight;
4855 pMipInfo[i].depth = mipDepth;
4856 }
4857
4858 mipChainHeight += mipHeight;
4859 mipHeight = RoundHalf(mipHeight);
4860 mipHeight = Max(mipHeight, 1u);
4861 }
4862
4863 *pMipmap0PaddedWidth = mipChainWidth;
4864 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
4865 }
4866
4867 return returnCode;
4868 }
4869
4870 } // V2
4871 } // Addr