radeonsi: add support for Renoir
[mesa.git] / src / amd / addrlib / src / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 #include "util/macros.h"
41
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
44
45 namespace Addr
46 {
47
48 /**
49 ************************************************************************************************************************
50 * Gfx9HwlInit
51 *
52 * @brief
53 * Creates an Gfx9Lib object.
54 *
55 * @return
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
58 */
59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
60 {
61 return V2::Gfx9Lib::CreateObj(pClient);
62 }
63
64 namespace V2
65 {
66
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
70
71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
77
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
82
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
87
88 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
89 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
90 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
91 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
92
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
97
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
102
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
107
108 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
109 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
110 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
111 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
113 };
114
115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
116 8, 6, 5, 4, 3, 2, 1, 0};
117
118 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
119
120 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
121
122 /**
123 ************************************************************************************************************************
124 * Gfx9Lib::Gfx9Lib
125 *
126 * @brief
127 * Constructor
128 *
129 ************************************************************************************************************************
130 */
131 Gfx9Lib::Gfx9Lib(const Client* pClient)
132 :
133 Lib(pClient),
134 m_numEquations(0)
135 {
136 m_class = AI_ADDRLIB;
137 memset(&m_settings, 0, sizeof(m_settings));
138 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
139 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
140 m_metaEqOverrideIndex = 0;
141 }
142
143 /**
144 ************************************************************************************************************************
145 * Gfx9Lib::~Gfx9Lib
146 *
147 * @brief
148 * Destructor
149 ************************************************************************************************************************
150 */
151 Gfx9Lib::~Gfx9Lib()
152 {
153 }
154
155 /**
156 ************************************************************************************************************************
157 * Gfx9Lib::HwlComputeHtileInfo
158 *
159 * @brief
160 * Interface function stub of AddrComputeHtilenfo
161 *
162 * @return
163 * ADDR_E_RETURNCODE
164 ************************************************************************************************************************
165 */
166 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
167 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
168 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
169 ) const
170 {
171 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
172 pIn->swizzleMode);
173
174 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
175
176 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
177
178 if ((numPipeTotal == 1) && (numRbTotal == 1))
179 {
180 numCompressBlkPerMetaBlkLog2 = 10;
181 }
182 else
183 {
184 if (m_settings.applyAliasFix)
185 {
186 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
187 }
188 else
189 {
190 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
191 }
192 }
193
194 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
195
196 Dim3d metaBlkDim = {8, 8, 1};
197 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
198 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
199 UINT_32 heightAmp = totalAmpBits - widthAmp;
200 metaBlkDim.w <<= widthAmp;
201 metaBlkDim.h <<= heightAmp;
202
203 #if DEBUG
204 Dim3d metaBlkDimDbg = {8, 8, 1};
205 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
206 {
207 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
208 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
209 {
210 metaBlkDimDbg.h <<= 1;
211 }
212 else
213 {
214 metaBlkDimDbg.w <<= 1;
215 }
216 }
217 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
218 #endif
219
220 UINT_32 numMetaBlkX;
221 UINT_32 numMetaBlkY;
222 UINT_32 numMetaBlkZ;
223
224 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
225 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
226 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
227
228 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
229 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
230
231 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
232 {
233 align *= (numPipeTotal >> 1);
234 }
235
236 align = Max(align, metaBlkSize);
237
238 if (m_settings.metaBaseAlignFix)
239 {
240 align = Max(align, GetBlockSize(pIn->swizzleMode));
241 }
242
243 if (m_settings.htileAlignFix)
244 {
245 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
246 const INT_32 htileCachelineSizeLog2 = 11;
247 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
248
249 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
250
251 align <<= rbMaskPadding;
252 }
253
254 pOut->pitch = numMetaBlkX * metaBlkDim.w;
255 pOut->height = numMetaBlkY * metaBlkDim.h;
256 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
257
258 pOut->metaBlkWidth = metaBlkDim.w;
259 pOut->metaBlkHeight = metaBlkDim.h;
260 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
261
262 pOut->baseAlign = align;
263 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
264
265 return ADDR_OK;
266 }
267
268 /**
269 ************************************************************************************************************************
270 * Gfx9Lib::HwlComputeCmaskInfo
271 *
272 * @brief
273 * Interface function stub of AddrComputeCmaskInfo
274 *
275 * @return
276 * ADDR_E_RETURNCODE
277 ************************************************************************************************************************
278 */
279 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
280 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
281 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
282 ) const
283 {
284 // TODO: Clarify with AddrLib team
285 // ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
286
287 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
288 pIn->swizzleMode);
289
290 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
291
292 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
293
294 if ((numPipeTotal == 1) && (numRbTotal == 1))
295 {
296 numCompressBlkPerMetaBlkLog2 = 13;
297 }
298 else
299 {
300 if (m_settings.applyAliasFix)
301 {
302 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
303 }
304 else
305 {
306 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
307 }
308
309 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
310 }
311
312 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
313
314 Dim2d metaBlkDim = {8, 8};
315 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
316 UINT_32 heightAmp = totalAmpBits >> 1;
317 UINT_32 widthAmp = totalAmpBits - heightAmp;
318 metaBlkDim.w <<= widthAmp;
319 metaBlkDim.h <<= heightAmp;
320
321 #if DEBUG
322 Dim2d metaBlkDimDbg = {8, 8};
323 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
324 {
325 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
326 {
327 metaBlkDimDbg.h <<= 1;
328 }
329 else
330 {
331 metaBlkDimDbg.w <<= 1;
332 }
333 }
334 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
335 #endif
336
337 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
338 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
339 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
340
341 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
342
343 if (m_settings.metaBaseAlignFix)
344 {
345 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
346 }
347
348 pOut->pitch = numMetaBlkX * metaBlkDim.w;
349 pOut->height = numMetaBlkY * metaBlkDim.h;
350 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
351 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
352 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
353
354 pOut->metaBlkWidth = metaBlkDim.w;
355 pOut->metaBlkHeight = metaBlkDim.h;
356
357 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
358
359 return ADDR_OK;
360 }
361
362 /**
363 ************************************************************************************************************************
364 * Gfx9Lib::GetMetaMipInfo
365 *
366 * @brief
367 * Get meta mip info
368 *
369 * @return
370 * N/A
371 ************************************************************************************************************************
372 */
373 VOID Gfx9Lib::GetMetaMipInfo(
374 UINT_32 numMipLevels, ///< [in] number of mip levels
375 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
376 BOOL_32 dataThick, ///< [in] data surface is thick
377 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
378 UINT_32 mip0Width, ///< [in] mip0 width
379 UINT_32 mip0Height, ///< [in] mip0 height
380 UINT_32 mip0Depth, ///< [in] mip0 depth
381 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
382 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
383 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
384 const
385 {
386 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
387 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
388 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
389 UINT_32 tailWidth = pMetaBlkDim->w;
390 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
391 UINT_32 tailDepth = pMetaBlkDim->d;
392 BOOL_32 inTail = FALSE;
393 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
394
395 if (numMipLevels > 1)
396 {
397 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
398 {
399 // Z major
400 major = ADDR_MAJOR_Z;
401 }
402 else if (numMetaBlkX >= numMetaBlkY)
403 {
404 // X major
405 major = ADDR_MAJOR_X;
406 }
407 else
408 {
409 // Y major
410 major = ADDR_MAJOR_Y;
411 }
412
413 inTail = ((mip0Width <= tailWidth) &&
414 (mip0Height <= tailHeight) &&
415 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
416
417 if (inTail == FALSE)
418 {
419 UINT_32 orderLimit;
420 UINT_32 *pMipDim;
421 UINT_32 *pOrderDim;
422
423 if (major == ADDR_MAJOR_Z)
424 {
425 // Z major
426 pMipDim = &numMetaBlkY;
427 pOrderDim = &numMetaBlkZ;
428 orderLimit = 4;
429 }
430 else if (major == ADDR_MAJOR_X)
431 {
432 // X major
433 pMipDim = &numMetaBlkY;
434 pOrderDim = &numMetaBlkX;
435 orderLimit = 4;
436 }
437 else
438 {
439 // Y major
440 pMipDim = &numMetaBlkX;
441 pOrderDim = &numMetaBlkY;
442 orderLimit = 2;
443 }
444
445 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
446 {
447 *pMipDim += 2;
448 }
449 else
450 {
451 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
452 }
453 }
454 }
455
456 if (pInfo != NULL)
457 {
458 UINT_32 mipWidth = mip0Width;
459 UINT_32 mipHeight = mip0Height;
460 UINT_32 mipDepth = mip0Depth;
461 Dim3d mipCoord = {0};
462
463 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
464 {
465 if (inTail)
466 {
467 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
468 pMetaBlkDim);
469 break;
470 }
471 else
472 {
473 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
474 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
475 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
476
477 pInfo[mip].inMiptail = FALSE;
478 pInfo[mip].startX = mipCoord.w;
479 pInfo[mip].startY = mipCoord.h;
480 pInfo[mip].startZ = mipCoord.d;
481 pInfo[mip].width = mipWidth;
482 pInfo[mip].height = mipHeight;
483 pInfo[mip].depth = dataThick ? mipDepth : 1;
484
485 if ((mip >= 3) || (mip & 1))
486 {
487 switch (major)
488 {
489 case ADDR_MAJOR_X:
490 mipCoord.w += mipWidth;
491 break;
492 case ADDR_MAJOR_Y:
493 mipCoord.h += mipHeight;
494 break;
495 case ADDR_MAJOR_Z:
496 mipCoord.d += mipDepth;
497 break;
498 default:
499 break;
500 }
501 }
502 else
503 {
504 switch (major)
505 {
506 case ADDR_MAJOR_X:
507 mipCoord.h += mipHeight;
508 break;
509 case ADDR_MAJOR_Y:
510 mipCoord.w += mipWidth;
511 break;
512 case ADDR_MAJOR_Z:
513 mipCoord.h += mipHeight;
514 break;
515 default:
516 break;
517 }
518 }
519
520 mipWidth = Max(mipWidth >> 1, 1u);
521 mipHeight = Max(mipHeight >> 1, 1u);
522 mipDepth = Max(mipDepth >> 1, 1u);
523
524 inTail = ((mipWidth <= tailWidth) &&
525 (mipHeight <= tailHeight) &&
526 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
527 }
528 }
529 }
530
531 *pNumMetaBlkX = numMetaBlkX;
532 *pNumMetaBlkY = numMetaBlkY;
533 *pNumMetaBlkZ = numMetaBlkZ;
534 }
535
536 /**
537 ************************************************************************************************************************
538 * Gfx9Lib::HwlComputeDccInfo
539 *
540 * @brief
541 * Interface function to compute DCC key info
542 *
543 * @return
544 * ADDR_E_RETURNCODE
545 ************************************************************************************************************************
546 */
547 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
548 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
549 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
550 ) const
551 {
552 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
553 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
554 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
555
556 if (dataLinear)
557 {
558 metaLinear = TRUE;
559 }
560 else if (metaLinear == TRUE)
561 {
562 pipeAligned = FALSE;
563 }
564
565 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
566
567 if (metaLinear)
568 {
569 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
570 ADDR_ASSERT_ALWAYS();
571
572 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
573 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
574 }
575 else
576 {
577 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
578
579 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
580
581 UINT_32 numFrags = Max(pIn->numFrags, 1u);
582 UINT_32 numSlices = Max(pIn->numSlices, 1u);
583
584 minMetaBlkSize /= numFrags;
585
586 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
587
588 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
589
590 if ((numPipeTotal > 1) || (numRbTotal > 1))
591 {
592 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
593
594 numCompressBlkPerMetaBlk =
595 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
596
597 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
598 {
599 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
600 }
601 }
602
603 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
604 Dim3d metaBlkDim = compressBlkDim;
605
606 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
607 {
608 if ((metaBlkDim.h < metaBlkDim.w) ||
609 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
610 {
611 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
612 {
613 metaBlkDim.h <<= 1;
614 }
615 else
616 {
617 metaBlkDim.d <<= 1;
618 }
619 }
620 else
621 {
622 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
623 {
624 metaBlkDim.w <<= 1;
625 }
626 else
627 {
628 metaBlkDim.d <<= 1;
629 }
630 }
631 }
632
633 UINT_32 numMetaBlkX;
634 UINT_32 numMetaBlkY;
635 UINT_32 numMetaBlkZ;
636
637 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
638 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
639 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
640
641 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
642
643 if (numFrags > m_maxCompFrag)
644 {
645 sizeAlign *= (numFrags / m_maxCompFrag);
646 }
647
648 if (m_settings.metaBaseAlignFix)
649 {
650 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
651 }
652
653 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
654 numCompressBlkPerMetaBlk * numFrags;
655 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
656 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
657
658 pOut->pitch = numMetaBlkX * metaBlkDim.w;
659 pOut->height = numMetaBlkY * metaBlkDim.h;
660 pOut->depth = numMetaBlkZ * metaBlkDim.d;
661
662 pOut->compressBlkWidth = compressBlkDim.w;
663 pOut->compressBlkHeight = compressBlkDim.h;
664 pOut->compressBlkDepth = compressBlkDim.d;
665
666 pOut->metaBlkWidth = metaBlkDim.w;
667 pOut->metaBlkHeight = metaBlkDim.h;
668 pOut->metaBlkDepth = metaBlkDim.d;
669
670 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
671 pOut->fastClearSizePerSlice =
672 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
673 }
674
675 return ADDR_OK;
676 }
677
678 /**
679 ************************************************************************************************************************
680 * Gfx9Lib::HwlComputeMaxBaseAlignments
681 *
682 * @brief
683 * Gets maximum alignments
684 * @return
685 * maximum alignments
686 ************************************************************************************************************************
687 */
688 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
689 {
690 return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
691 }
692
693 /**
694 ************************************************************************************************************************
695 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
696 *
697 * @brief
698 * Gets maximum alignments for metadata
699 * @return
700 * maximum alignments for metadata
701 ************************************************************************************************************************
702 */
703 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
704 {
705 // Max base alignment for Htile
706 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
707 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
708
709 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
710 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
711 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
712 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
713
714 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
715
716 if (maxNumPipeTotal > 2)
717 {
718 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
719 }
720
721 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
722
723 if (m_settings.metaBaseAlignFix)
724 {
725 maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
726 }
727
728 if (m_settings.htileAlignFix)
729 {
730 maxBaseAlignHtile *= maxNumPipeTotal;
731 }
732
733 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
734
735 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
736 UINT_32 maxBaseAlignDcc3D = 65536;
737
738 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
739 {
740 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
741 }
742
743 // Max base alignment for Msaa Dcc
744 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
745
746 if (m_settings.metaBaseAlignFix)
747 {
748 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
749 }
750
751 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
752 }
753
754 /**
755 ************************************************************************************************************************
756 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
757 *
758 * @brief
759 * Interface function stub of AddrComputeCmaskAddrFromCoord
760 *
761 * @return
762 * ADDR_E_RETURNCODE
763 ************************************************************************************************************************
764 */
765 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
766 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
767 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
768 {
769 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
770 input.size = sizeof(input);
771 input.cMaskFlags = pIn->cMaskFlags;
772 input.colorFlags = pIn->colorFlags;
773 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
774 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
775 input.numSlices = Max(pIn->numSlices, 1u);
776 input.swizzleMode = pIn->swizzleMode;
777 input.resourceType = pIn->resourceType;
778
779 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
780 output.size = sizeof(output);
781
782 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
783
784 if (returnCode == ADDR_OK)
785 {
786 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
787 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
788 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
789 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
790
791 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
792 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
793 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
794
795 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
796
797 UINT_32 xb = pIn->x / output.metaBlkWidth;
798 UINT_32 yb = pIn->y / output.metaBlkHeight;
799 UINT_32 zb = pIn->slice;
800
801 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
802 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
803 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
804
805 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
806
807 pOut->addr = address >> 1;
808 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
809
810 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
811 pIn->swizzleMode);
812
813 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
814
815 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
816 }
817
818 return returnCode;
819 }
820
821 /**
822 ************************************************************************************************************************
823 * Gfx9Lib::HwlComputeHtileAddrFromCoord
824 *
825 * @brief
826 * Interface function stub of AddrComputeHtileAddrFromCoord
827 *
828 * @return
829 * ADDR_E_RETURNCODE
830 ************************************************************************************************************************
831 */
832 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
833 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
834 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
835 {
836 ADDR_E_RETURNCODE returnCode = ADDR_OK;
837
838 if (pIn->numMipLevels > 1)
839 {
840 returnCode = ADDR_NOTIMPLEMENTED;
841 }
842 else
843 {
844 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
845 input.size = sizeof(input);
846 input.hTileFlags = pIn->hTileFlags;
847 input.depthFlags = pIn->depthflags;
848 input.swizzleMode = pIn->swizzleMode;
849 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
850 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
851 input.numSlices = Max(pIn->numSlices, 1u);
852 input.numMipLevels = Max(pIn->numMipLevels, 1u);
853
854 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
855 output.size = sizeof(output);
856
857 returnCode = ComputeHtileInfo(&input, &output);
858
859 if (returnCode == ADDR_OK)
860 {
861 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
862 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
863 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
864 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
865
866 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
867 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
868 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
869
870 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
871
872 UINT_32 xb = pIn->x / output.metaBlkWidth;
873 UINT_32 yb = pIn->y / output.metaBlkHeight;
874 UINT_32 zb = pIn->slice;
875
876 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
877 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
878 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
879
880 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
881
882 pOut->addr = address >> 1;
883
884 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
885 pIn->swizzleMode);
886
887 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
888
889 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
890 }
891 }
892
893 return returnCode;
894 }
895
896 /**
897 ************************************************************************************************************************
898 * Gfx9Lib::HwlComputeHtileCoordFromAddr
899 *
900 * @brief
901 * Interface function stub of AddrComputeHtileCoordFromAddr
902 *
903 * @return
904 * ADDR_E_RETURNCODE
905 ************************************************************************************************************************
906 */
907 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
908 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
909 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
910 {
911 ADDR_E_RETURNCODE returnCode = ADDR_OK;
912
913 if (pIn->numMipLevels > 1)
914 {
915 returnCode = ADDR_NOTIMPLEMENTED;
916 }
917 else
918 {
919 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
920 input.size = sizeof(input);
921 input.hTileFlags = pIn->hTileFlags;
922 input.swizzleMode = pIn->swizzleMode;
923 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
924 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
925 input.numSlices = Max(pIn->numSlices, 1u);
926 input.numMipLevels = Max(pIn->numMipLevels, 1u);
927
928 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
929 output.size = sizeof(output);
930
931 returnCode = ComputeHtileInfo(&input, &output);
932
933 if (returnCode == ADDR_OK)
934 {
935 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
936 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
937 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
938 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
939
940 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
941 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
942 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
943
944 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
945
946 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
947 pIn->swizzleMode);
948
949 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
950
951 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
952
953 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
954 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
955
956 UINT_32 x, y, z, s, m;
957 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
958
959 pOut->slice = m / sliceSizeInBlock;
960 pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
961 pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x;
962 }
963 }
964
965 return returnCode;
966 }
967
968 /**
969 ************************************************************************************************************************
970 * Gfx9Lib::HwlComputeDccAddrFromCoord
971 *
972 * @brief
973 * Interface function stub of AddrComputeDccAddrFromCoord
974 *
975 * @return
976 * ADDR_E_RETURNCODE
977 ************************************************************************************************************************
978 */
979 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
980 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
981 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
982 {
983 ADDR_E_RETURNCODE returnCode = ADDR_OK;
984
985 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
986 {
987 returnCode = ADDR_NOTIMPLEMENTED;
988 }
989 else
990 {
991 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
992 input.size = sizeof(input);
993 input.dccKeyFlags = pIn->dccKeyFlags;
994 input.colorFlags = pIn->colorFlags;
995 input.swizzleMode = pIn->swizzleMode;
996 input.resourceType = pIn->resourceType;
997 input.bpp = pIn->bpp;
998 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
999 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
1000 input.numSlices = Max(pIn->numSlices, 1u);
1001 input.numFrags = Max(pIn->numFrags, 1u);
1002 input.numMipLevels = Max(pIn->numMipLevels, 1u);
1003
1004 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
1005 output.size = sizeof(output);
1006
1007 returnCode = ComputeDccInfo(&input, &output);
1008
1009 if (returnCode == ADDR_OK)
1010 {
1011 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1012 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
1013 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
1014 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1015 UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
1016 UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
1017 UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
1018 UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
1019
1020 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1021 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1022 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1023 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1024
1025 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1026
1027 UINT_32 xb = pIn->x / output.metaBlkWidth;
1028 UINT_32 yb = pIn->y / output.metaBlkHeight;
1029 UINT_32 zb = pIn->slice / output.metaBlkDepth;
1030
1031 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
1032 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1033 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1034
1035 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
1036
1037 pOut->addr = address >> 1;
1038
1039 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1040 pIn->swizzleMode);
1041
1042 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1043
1044 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1045 }
1046 }
1047
1048 return returnCode;
1049 }
1050
1051 /**
1052 ************************************************************************************************************************
1053 * Gfx9Lib::HwlInitGlobalParams
1054 *
1055 * @brief
1056 * Initializes global parameters
1057 *
1058 * @return
1059 * TRUE if all settings are valid
1060 *
1061 ************************************************************************************************************************
1062 */
1063 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1064 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1065 {
1066 BOOL_32 valid = TRUE;
1067
1068 if (m_settings.isArcticIsland)
1069 {
1070 GB_ADDR_CONFIG gbAddrConfig;
1071
1072 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1073
1074 // These values are copied from CModel code
1075 switch (gbAddrConfig.bits.NUM_PIPES)
1076 {
1077 case ADDR_CONFIG_1_PIPE:
1078 m_pipes = 1;
1079 m_pipesLog2 = 0;
1080 break;
1081 case ADDR_CONFIG_2_PIPE:
1082 m_pipes = 2;
1083 m_pipesLog2 = 1;
1084 break;
1085 case ADDR_CONFIG_4_PIPE:
1086 m_pipes = 4;
1087 m_pipesLog2 = 2;
1088 break;
1089 case ADDR_CONFIG_8_PIPE:
1090 m_pipes = 8;
1091 m_pipesLog2 = 3;
1092 break;
1093 case ADDR_CONFIG_16_PIPE:
1094 m_pipes = 16;
1095 m_pipesLog2 = 4;
1096 break;
1097 case ADDR_CONFIG_32_PIPE:
1098 m_pipes = 32;
1099 m_pipesLog2 = 5;
1100 break;
1101 default:
1102 ADDR_ASSERT_ALWAYS();
1103 break;
1104 }
1105
1106 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1107 {
1108 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1109 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1110 m_pipeInterleaveLog2 = 8;
1111 break;
1112 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1113 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1114 m_pipeInterleaveLog2 = 9;
1115 break;
1116 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1117 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1118 m_pipeInterleaveLog2 = 10;
1119 break;
1120 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1121 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1122 m_pipeInterleaveLog2 = 11;
1123 break;
1124 default:
1125 ADDR_ASSERT_ALWAYS();
1126 break;
1127 }
1128
1129 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1130 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1131 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1132
1133 switch (gbAddrConfig.bits.NUM_BANKS)
1134 {
1135 case ADDR_CONFIG_1_BANK:
1136 m_banks = 1;
1137 m_banksLog2 = 0;
1138 break;
1139 case ADDR_CONFIG_2_BANK:
1140 m_banks = 2;
1141 m_banksLog2 = 1;
1142 break;
1143 case ADDR_CONFIG_4_BANK:
1144 m_banks = 4;
1145 m_banksLog2 = 2;
1146 break;
1147 case ADDR_CONFIG_8_BANK:
1148 m_banks = 8;
1149 m_banksLog2 = 3;
1150 break;
1151 case ADDR_CONFIG_16_BANK:
1152 m_banks = 16;
1153 m_banksLog2 = 4;
1154 break;
1155 default:
1156 ADDR_ASSERT_ALWAYS();
1157 break;
1158 }
1159
1160 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1161 {
1162 case ADDR_CONFIG_1_SHADER_ENGINE:
1163 m_se = 1;
1164 m_seLog2 = 0;
1165 break;
1166 case ADDR_CONFIG_2_SHADER_ENGINE:
1167 m_se = 2;
1168 m_seLog2 = 1;
1169 break;
1170 case ADDR_CONFIG_4_SHADER_ENGINE:
1171 m_se = 4;
1172 m_seLog2 = 2;
1173 break;
1174 case ADDR_CONFIG_8_SHADER_ENGINE:
1175 m_se = 8;
1176 m_seLog2 = 3;
1177 break;
1178 default:
1179 ADDR_ASSERT_ALWAYS();
1180 break;
1181 }
1182
1183 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1184 {
1185 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1186 m_rbPerSe = 1;
1187 m_rbPerSeLog2 = 0;
1188 break;
1189 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1190 m_rbPerSe = 2;
1191 m_rbPerSeLog2 = 1;
1192 break;
1193 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1194 m_rbPerSe = 4;
1195 m_rbPerSeLog2 = 2;
1196 break;
1197 default:
1198 ADDR_ASSERT_ALWAYS();
1199 break;
1200 }
1201
1202 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1203 {
1204 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1205 m_maxCompFrag = 1;
1206 m_maxCompFragLog2 = 0;
1207 break;
1208 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1209 m_maxCompFrag = 2;
1210 m_maxCompFragLog2 = 1;
1211 break;
1212 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1213 m_maxCompFrag = 4;
1214 m_maxCompFragLog2 = 2;
1215 break;
1216 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1217 m_maxCompFrag = 8;
1218 m_maxCompFragLog2 = 3;
1219 break;
1220 default:
1221 ADDR_ASSERT_ALWAYS();
1222 break;
1223 }
1224
1225 m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1226 ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1227 ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1228 m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1229
1230 if ((m_rbPerSeLog2 == 1) &&
1231 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1232 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1233 {
1234 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1235 ADDR_ASSERT(m_settings.isRaven == FALSE);
1236
1237 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1238
1239 if (m_settings.isVega12)
1240 {
1241 m_settings.htileCacheRbConflict = 1;
1242 }
1243 }
1244 }
1245 else
1246 {
1247 valid = FALSE;
1248 ADDR_NOT_IMPLEMENTED();
1249 }
1250
1251 if (valid)
1252 {
1253 InitEquationTable();
1254 }
1255
1256 return valid;
1257 }
1258
1259 /**
1260 ************************************************************************************************************************
1261 * Gfx9Lib::HwlConvertChipFamily
1262 *
1263 * @brief
1264 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1265 * @return
1266 * ChipFamily
1267 ************************************************************************************************************************
1268 */
1269 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1270 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1271 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1272 {
1273 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1274
1275 switch (uChipFamily)
1276 {
1277 case FAMILY_AI:
1278 m_settings.isArcticIsland = 1;
1279 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1280 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1281 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1282 m_settings.isDce12 = 1;
1283
1284 if (m_settings.isVega10 == 0)
1285 {
1286 m_settings.htileAlignFix = 1;
1287 m_settings.applyAliasFix = 1;
1288 }
1289
1290 m_settings.metaBaseAlignFix = 1;
1291
1292 m_settings.depthPipeXorDisable = 1;
1293 break;
1294 case FAMILY_RV:
1295 m_settings.isArcticIsland = 1;
1296
1297 if (ASICREV_IS_RAVEN(uChipRevision))
1298 {
1299 m_settings.isRaven = 1;
1300
1301 m_settings.depthPipeXorDisable = 1;
1302 }
1303
1304 if (ASICREV_IS_RAVEN2(uChipRevision))
1305 {
1306 m_settings.isRaven = 1;
1307 }
1308
1309 if (m_settings.isRaven == 0)
1310 {
1311 m_settings.htileAlignFix = 1;
1312 m_settings.applyAliasFix = 1;
1313 }
1314
1315 if (ASICREV_IS_RENOIR(uChipRevision))
1316 {
1317 m_settings.isRaven = 1;
1318 }
1319
1320 m_settings.isDcn1 = m_settings.isRaven;
1321
1322 m_settings.metaBaseAlignFix = 1;
1323 break;
1324
1325 default:
1326 ADDR_ASSERT(!"This should be a Fusion");
1327 break;
1328 }
1329
1330 return family;
1331 }
1332
1333 /**
1334 ************************************************************************************************************************
1335 * Gfx9Lib::InitRbEquation
1336 *
1337 * @brief
1338 * Init RB equation
1339 * @return
1340 * N/A
1341 ************************************************************************************************************************
1342 */
1343 VOID Gfx9Lib::GetRbEquation(
1344 CoordEq* pRbEq, ///< [out] rb equation
1345 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1346 UINT_32 numSeLog2) ///< [in] number of shader engine
1347 const
1348 {
1349 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1350 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1351 Coordinate cx('x', rbRegion);
1352 Coordinate cy('y', rbRegion);
1353
1354 UINT_32 start = 0;
1355 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1356
1357 // Clear the rb equation
1358 pRbEq->resize(0);
1359 pRbEq->resize(numRbTotalLog2);
1360
1361 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1362 {
1363 // Special case when more than 1 SE, and 2 RB per SE
1364 (*pRbEq)[0].add(cx);
1365 (*pRbEq)[0].add(cy);
1366 cx++;
1367 cy++;
1368
1369 if (m_settings.applyAliasFix == false)
1370 {
1371 (*pRbEq)[0].add(cy);
1372 }
1373
1374 (*pRbEq)[0].add(cy);
1375 start++;
1376 }
1377
1378 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1379
1380 for (UINT_32 i = 0; i < numBits; i++)
1381 {
1382 UINT_32 idx =
1383 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1384
1385 if ((i % 2) == 1)
1386 {
1387 (*pRbEq)[idx].add(cx);
1388 cx++;
1389 }
1390 else
1391 {
1392 (*pRbEq)[idx].add(cy);
1393 cy++;
1394 }
1395 }
1396 }
1397
1398 /**
1399 ************************************************************************************************************************
1400 * Gfx9Lib::GetDataEquation
1401 *
1402 * @brief
1403 * Get data equation for fmask and Z
1404 * @return
1405 * N/A
1406 ************************************************************************************************************************
1407 */
1408 VOID Gfx9Lib::GetDataEquation(
1409 CoordEq* pDataEq, ///< [out] data surface equation
1410 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1411 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1412 AddrResourceType resourceType, ///< [in] data surface resource type
1413 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1414 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1415 const
1416 {
1417 Coordinate cx('x', 0);
1418 Coordinate cy('y', 0);
1419 Coordinate cz('z', 0);
1420 Coordinate cs('s', 0);
1421
1422 // Clear the equation
1423 pDataEq->resize(0);
1424 pDataEq->resize(27);
1425
1426 if (dataSurfaceType == Gfx9DataColor)
1427 {
1428 if (IsLinear(swizzleMode))
1429 {
1430 Coordinate cm('m', 0);
1431
1432 pDataEq->resize(49);
1433
1434 for (UINT_32 i = 0; i < 49; i++)
1435 {
1436 (*pDataEq)[i].add(cm);
1437 cm++;
1438 }
1439 }
1440 else if (IsThick(resourceType, swizzleMode))
1441 {
1442 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1443 UINT_32 i;
1444 if (IsStandardSwizzle(resourceType, swizzleMode))
1445 {
1446 // Standard 3d swizzle
1447 // Fill in bottom x bits
1448 for (i = elementBytesLog2; i < 4; i++)
1449 {
1450 (*pDataEq)[i].add(cx);
1451 cx++;
1452 }
1453 // Fill in 2 bits of y and then z
1454 for (i = 4; i < 6; i++)
1455 {
1456 (*pDataEq)[i].add(cy);
1457 cy++;
1458 }
1459 for (i = 6; i < 8; i++)
1460 {
1461 (*pDataEq)[i].add(cz);
1462 cz++;
1463 }
1464 if (elementBytesLog2 < 2)
1465 {
1466 // fill in z & y bit
1467 (*pDataEq)[8].add(cz);
1468 (*pDataEq)[9].add(cy);
1469 cz++;
1470 cy++;
1471 }
1472 else if (elementBytesLog2 == 2)
1473 {
1474 // fill in y and x bit
1475 (*pDataEq)[8].add(cy);
1476 (*pDataEq)[9].add(cx);
1477 cy++;
1478 cx++;
1479 }
1480 else
1481 {
1482 // fill in 2 x bits
1483 (*pDataEq)[8].add(cx);
1484 cx++;
1485 (*pDataEq)[9].add(cx);
1486 cx++;
1487 }
1488 }
1489 else
1490 {
1491 // Z 3d swizzle
1492 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1493 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1494 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1495 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1496 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1497 {
1498 (*pDataEq)[i].add(cz);
1499 cz++;
1500 }
1501 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1502 {
1503 // add an x and z
1504 (*pDataEq)[6].add(cx);
1505 (*pDataEq)[7].add(cz);
1506 cx++;
1507 cz++;
1508 }
1509 else if (elementBytesLog2 == 2)
1510 {
1511 // add a y and z
1512 (*pDataEq)[6].add(cy);
1513 (*pDataEq)[7].add(cz);
1514 cy++;
1515 cz++;
1516 }
1517 // add y and x
1518 (*pDataEq)[8].add(cy);
1519 (*pDataEq)[9].add(cx);
1520 cy++;
1521 cx++;
1522 }
1523 // Fill in bit 10 and up
1524 pDataEq->mort3d( cz, cy, cx, 10 );
1525 }
1526 else if (IsThin(resourceType, swizzleMode))
1527 {
1528 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1529 // Color 2D
1530 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1531 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1532 UINT_32 i;
1533 // Fill in bottom x bits
1534 for (i = elementBytesLog2; i < 4; i++)
1535 {
1536 (*pDataEq)[i].add(cx);
1537 cx++;
1538 }
1539 // Fill in bottom y bits
1540 for (i = 4; i < 4 + microYBits; i++)
1541 {
1542 (*pDataEq)[i].add(cy);
1543 cy++;
1544 }
1545 // Fill in last of the micro_x bits
1546 for (i = 4 + microYBits; i < 8; i++)
1547 {
1548 (*pDataEq)[i].add(cx);
1549 cx++;
1550 }
1551 // Fill in x/y bits below sample split
1552 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1553 // Fill in sample bits
1554 for (i = 0; i < numSamplesLog2; i++)
1555 {
1556 cs.set('s', i);
1557 (*pDataEq)[tileSplitStart + i].add(cs);
1558 }
1559 // Fill in x/y bits above sample split
1560 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1561 {
1562 pDataEq->mort2d(cx, cy, blockSizeLog2);
1563 }
1564 else
1565 {
1566 pDataEq->mort2d(cy, cx, blockSizeLog2);
1567 }
1568 }
1569 else
1570 {
1571 ADDR_ASSERT_ALWAYS();
1572 }
1573 }
1574 else
1575 {
1576 // Fmask or depth
1577 UINT_32 sampleStart = elementBytesLog2;
1578 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1579 UINT_32 ymajStart = 6 + numSamplesLog2;
1580
1581 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1582 {
1583 cs.set('s', s);
1584 (*pDataEq)[sampleStart + s].add(cs);
1585 }
1586
1587 // Put in the x-major order pixel bits
1588 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1589 // Put in the y-major order pixel bits
1590 pDataEq->mort2d(cy, cx, ymajStart);
1591 }
1592 }
1593
1594 /**
1595 ************************************************************************************************************************
1596 * Gfx9Lib::GetPipeEquation
1597 *
1598 * @brief
1599 * Get pipe equation
1600 * @return
1601 * N/A
1602 ************************************************************************************************************************
1603 */
1604 VOID Gfx9Lib::GetPipeEquation(
1605 CoordEq* pPipeEq, ///< [out] pipe equation
1606 CoordEq* pDataEq, ///< [in] data equation
1607 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1608 UINT_32 numPipeLog2, ///< [in] number of pipes
1609 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1610 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1611 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1612 AddrResourceType resourceType ///< [in] data surface resource type
1613 ) const
1614 {
1615 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1616 CoordEq dataEq;
1617
1618 pDataEq->copy(dataEq);
1619
1620 if (dataSurfaceType == Gfx9DataColor)
1621 {
1622 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1623 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1624 }
1625
1626 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1627
1628 // This section should only apply to z/stencil, maybe fmask
1629 // If the pipe bit is below the comp block size,
1630 // then keep moving up the address until we find a bit that is above
1631 UINT_32 pipeStart = 0;
1632
1633 if (dataSurfaceType != Gfx9DataColor)
1634 {
1635 Coordinate tileMin('x', 3);
1636
1637 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1638 {
1639 pipeStart++;
1640 }
1641
1642 // if pipe is 0, then the first pipe bit is above the comp block size,
1643 // so we don't need to do anything
1644 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1645 // we will get the same pipe equation
1646 if (pipeStart != 0)
1647 {
1648 for (UINT_32 i = 0; i < numPipeLog2; i++)
1649 {
1650 // Copy the jth bit above pipe interleave to the current pipe equation bit
1651 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1652 }
1653 }
1654 }
1655
1656 if (IsPrt(swizzleMode))
1657 {
1658 // Clear out bits above the block size if prt's are enabled
1659 dataEq.resize(blockSizeLog2);
1660 dataEq.resize(48);
1661 }
1662
1663 if (IsXor(swizzleMode))
1664 {
1665 CoordEq xorMask;
1666
1667 if (IsThick(resourceType, swizzleMode))
1668 {
1669 CoordEq xorMask2;
1670
1671 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1672
1673 xorMask.resize(numPipeLog2);
1674
1675 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1676 {
1677 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1678 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1679 }
1680 }
1681 else
1682 {
1683 // Xor in the bits above the pipe+gpu bits
1684 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1685
1686 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1687 {
1688 Coordinate co;
1689 CoordEq xorMask2;
1690 // if 1xaa and not prt, then xor in the z bits
1691 xorMask2.resize(0);
1692 xorMask2.resize(numPipeLog2);
1693 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1694 {
1695 co.set('z', numPipeLog2 - 1 - pipeIdx);
1696 xorMask2[pipeIdx].add(co);
1697 }
1698
1699 pPipeEq->xorin(xorMask2);
1700 }
1701 }
1702
1703 xorMask.reverse();
1704 pPipeEq->xorin(xorMask);
1705 }
1706 }
1707 /**
1708 ************************************************************************************************************************
1709 * Gfx9Lib::GetMetaEquation
1710 *
1711 * @brief
1712 * Get meta equation for cmask/htile/DCC
1713 * @return
1714 * Pointer to a calculated meta equation
1715 ************************************************************************************************************************
1716 */
1717 const CoordEq* Gfx9Lib::GetMetaEquation(
1718 const MetaEqParams& metaEqParams)
1719 {
1720 UINT_32 cachedMetaEqIndex;
1721
1722 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1723 {
1724 if (memcmp(&metaEqParams,
1725 &m_cachedMetaEqKey[cachedMetaEqIndex],
1726 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1727 {
1728 break;
1729 }
1730 }
1731
1732 CoordEq* pMetaEq = NULL;
1733
1734 if (cachedMetaEqIndex < MaxCachedMetaEq)
1735 {
1736 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1737 }
1738 else
1739 {
1740 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1741
1742 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1743
1744 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1745
1746 GenMetaEquation(pMetaEq,
1747 metaEqParams.maxMip,
1748 metaEqParams.elementBytesLog2,
1749 metaEqParams.numSamplesLog2,
1750 metaEqParams.metaFlag,
1751 metaEqParams.dataSurfaceType,
1752 metaEqParams.swizzleMode,
1753 metaEqParams.resourceType,
1754 metaEqParams.metaBlkWidthLog2,
1755 metaEqParams.metaBlkHeightLog2,
1756 metaEqParams.metaBlkDepthLog2,
1757 metaEqParams.compBlkWidthLog2,
1758 metaEqParams.compBlkHeightLog2,
1759 metaEqParams.compBlkDepthLog2);
1760 }
1761
1762 return pMetaEq;
1763 }
1764
1765 /**
1766 ************************************************************************************************************************
1767 * Gfx9Lib::GenMetaEquation
1768 *
1769 * @brief
1770 * Get meta equation for cmask/htile/DCC
1771 * @return
1772 * N/A
1773 ************************************************************************************************************************
1774 */
1775 VOID Gfx9Lib::GenMetaEquation(
1776 CoordEq* pMetaEq, ///< [out] meta equation
1777 UINT_32 maxMip, ///< [in] max mip Id
1778 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1779 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1780 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1781 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1782 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1783 AddrResourceType resourceType, ///< [in] data surface resource type
1784 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1785 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1786 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1787 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1788 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1789 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1790 const
1791 {
1792 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1793 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1794
1795 // Get the correct data address and rb equation
1796 CoordEq dataEq;
1797 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1798 elementBytesLog2, numSamplesLog2);
1799
1800 // Get pipe and rb equations
1801 CoordEq pipeEquation;
1802 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1803 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1804 numPipeTotalLog2 = pipeEquation.getsize();
1805
1806 if (metaFlag.linear)
1807 {
1808 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1809 ADDR_ASSERT_ALWAYS();
1810
1811 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1812
1813 dataEq.copy(*pMetaEq);
1814
1815 if (IsLinear(swizzleMode))
1816 {
1817 if (metaFlag.pipeAligned)
1818 {
1819 // Remove the pipe bits
1820 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1821 pMetaEq->shift(-shift, pipeInterleaveLog2);
1822 }
1823 // Divide by comp block size, which for linear (which is always color) is 256 B
1824 pMetaEq->shift(-8);
1825
1826 if (metaFlag.pipeAligned)
1827 {
1828 // Put pipe bits back in
1829 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1830
1831 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1832 {
1833 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1834 }
1835 }
1836 }
1837
1838 pMetaEq->shift(1);
1839 }
1840 else
1841 {
1842 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1843 UINT_32 compFragLog2 =
1844 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1845 maxCompFragLog2 : numSamplesLog2;
1846
1847 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1848
1849 // Make sure the metaaddr is cleared
1850 pMetaEq->resize(0);
1851 pMetaEq->resize(27);
1852
1853 if (IsThick(resourceType, swizzleMode))
1854 {
1855 Coordinate cx('x', 0);
1856 Coordinate cy('y', 0);
1857 Coordinate cz('z', 0);
1858
1859 if (maxMip > 0)
1860 {
1861 pMetaEq->mort3d(cy, cx, cz);
1862 }
1863 else
1864 {
1865 pMetaEq->mort3d(cx, cy, cz);
1866 }
1867 }
1868 else
1869 {
1870 Coordinate cx('x', 0);
1871 Coordinate cy('y', 0);
1872 Coordinate cs;
1873
1874 if (maxMip > 0)
1875 {
1876 pMetaEq->mort2d(cy, cx, compFragLog2);
1877 }
1878 else
1879 {
1880 pMetaEq->mort2d(cx, cy, compFragLog2);
1881 }
1882
1883 //------------------------------------------------------------------------------------------------------------------------
1884 // Put the compressible fragments at the lsb
1885 // the uncompressible frags will be at the msb of the micro address
1886 //------------------------------------------------------------------------------------------------------------------------
1887 for (UINT_32 s = 0; s < compFragLog2; s++)
1888 {
1889 cs.set('s', s);
1890 (*pMetaEq)[s].add(cs);
1891 }
1892 }
1893
1894 // Keep a copy of the pipe equations
1895 CoordEq origPipeEquation;
1896 pipeEquation.copy(origPipeEquation);
1897
1898 Coordinate co;
1899 // filter out everything under the compressed block size
1900 co.set('x', compBlkWidthLog2);
1901 pMetaEq->Filter('<', co, 0, 'x');
1902 co.set('y', compBlkHeightLog2);
1903 pMetaEq->Filter('<', co, 0, 'y');
1904 co.set('z', compBlkDepthLog2);
1905 pMetaEq->Filter('<', co, 0, 'z');
1906
1907 // For non-color, filter out sample bits
1908 if (dataSurfaceType != Gfx9DataColor)
1909 {
1910 co.set('x', 0);
1911 pMetaEq->Filter('<', co, 0, 's');
1912 }
1913
1914 // filter out everything above the metablock size
1915 co.set('x', metaBlkWidthLog2 - 1);
1916 pMetaEq->Filter('>', co, 0, 'x');
1917 co.set('y', metaBlkHeightLog2 - 1);
1918 pMetaEq->Filter('>', co, 0, 'y');
1919 co.set('z', metaBlkDepthLog2 - 1);
1920 pMetaEq->Filter('>', co, 0, 'z');
1921
1922 // filter out everything above the metablock size for the channel bits
1923 co.set('x', metaBlkWidthLog2 - 1);
1924 pipeEquation.Filter('>', co, 0, 'x');
1925 co.set('y', metaBlkHeightLog2 - 1);
1926 pipeEquation.Filter('>', co, 0, 'y');
1927 co.set('z', metaBlkDepthLog2 - 1);
1928 pipeEquation.Filter('>', co, 0, 'z');
1929
1930 // Make sure we still have the same number of channel bits
1931 if (pipeEquation.getsize() != numPipeTotalLog2)
1932 {
1933 ADDR_ASSERT_ALWAYS();
1934 }
1935
1936 // Loop through all channel and rb bits,
1937 // and make sure these components exist in the metadata address
1938 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1939 {
1940 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1941 {
1942 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1943 {
1944 ADDR_ASSERT_ALWAYS();
1945 }
1946 }
1947 }
1948
1949 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1950 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1951 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1952 CoordEq origRbEquation;
1953
1954 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1955
1956 CoordEq rbEquation = origRbEquation;
1957
1958 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1959 {
1960 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1961 {
1962 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1963 {
1964 ADDR_ASSERT_ALWAYS();
1965 }
1966 }
1967 }
1968
1969 if (m_settings.applyAliasFix)
1970 {
1971 co.set('z', -1);
1972 }
1973
1974 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1975 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1976 {
1977 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1978 {
1979 BOOL_32 isRbEquationInPipeEquation = FALSE;
1980
1981 if (m_settings.applyAliasFix)
1982 {
1983 CoordTerm filteredPipeEq;
1984 filteredPipeEq = pipeEquation[j];
1985
1986 filteredPipeEq.Filter('>', co, 0, 'z');
1987
1988 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1989 }
1990 else
1991 {
1992 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1993 }
1994
1995 if (isRbEquationInPipeEquation)
1996 {
1997 rbEquation[i].Clear();
1998 }
1999 }
2000 }
2001
2002 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
2003
2004 // Loop through each bit of the channel, get the smallest coordinate,
2005 // and remove it from the metaaddr, and rb_equation
2006 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2007 {
2008 pipeEquation[i].getsmallest(co);
2009
2010 UINT_32 old_size = pMetaEq->getsize();
2011 pMetaEq->Filter('=', co);
2012 UINT_32 new_size = pMetaEq->getsize();
2013 if (new_size != old_size-1)
2014 {
2015 ADDR_ASSERT_ALWAYS();
2016 }
2017 pipeEquation.remove(co);
2018 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2019 {
2020 if (rbEquation[j].remove(co))
2021 {
2022 // if we actually removed something from this bit, then add the remaining
2023 // channel bits, as these can be removed for this bit
2024 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2025 {
2026 if (pipeEquation[i][k] != co)
2027 {
2028 rbEquation[j].add(pipeEquation[i][k]);
2029 rbAppendedWithPipeBits[j] = true;
2030 }
2031 }
2032 }
2033 }
2034 }
2035
2036 // Loop through the rb bits and see what remain;
2037 // filter out the smallest coordinate if it remains
2038 UINT_32 rbBitsLeft = 0;
2039 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2040 {
2041 BOOL_32 isRbEqAppended = FALSE;
2042
2043 if (m_settings.applyAliasFix)
2044 {
2045 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2046 }
2047 else
2048 {
2049 isRbEqAppended = (rbEquation[i].getsize() > 0);
2050 }
2051
2052 if (isRbEqAppended)
2053 {
2054 rbBitsLeft++;
2055 rbEquation[i].getsmallest(co);
2056 UINT_32 old_size = pMetaEq->getsize();
2057 pMetaEq->Filter('=', co);
2058 UINT_32 new_size = pMetaEq->getsize();
2059 if (new_size != old_size - 1)
2060 {
2061 // assert warning
2062 }
2063 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2064 {
2065 if (rbEquation[j].remove(co))
2066 {
2067 // if we actually removed something from this bit, then add the remaining
2068 // rb bits, as these can be removed for this bit
2069 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2070 {
2071 if (rbEquation[i][k] != co)
2072 {
2073 rbEquation[j].add(rbEquation[i][k]);
2074 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2075 }
2076 }
2077 }
2078 }
2079 }
2080 }
2081
2082 // capture the size of the metaaddr
2083 UINT_32 metaSize = pMetaEq->getsize();
2084 // resize to 49 bits...make this a nibble address
2085 pMetaEq->resize(49);
2086 // Concatenate the macro address above the current address
2087 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2088 {
2089 co.set('m', j);
2090 (*pMetaEq)[i].add(co);
2091 }
2092
2093 // Multiply by meta element size (in nibbles)
2094 if (dataSurfaceType == Gfx9DataColor)
2095 {
2096 pMetaEq->shift(1);
2097 }
2098 else if (dataSurfaceType == Gfx9DataDepthStencil)
2099 {
2100 pMetaEq->shift(3);
2101 }
2102
2103 //------------------------------------------------------------------------------------------
2104 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2105 // Shift up from pipe interleave number of channel
2106 // and rb bits left, and uncompressed fragments
2107 //------------------------------------------------------------------------------------------
2108
2109 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2110
2111 // Put in the channel bits
2112 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2113 {
2114 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2115 }
2116
2117 // Put in remaining rb bits
2118 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2119 {
2120 BOOL_32 isRbEqAppended = FALSE;
2121
2122 if (m_settings.applyAliasFix)
2123 {
2124 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2125 }
2126 else
2127 {
2128 isRbEqAppended = (rbEquation[i].getsize() > 0);
2129 }
2130
2131 if (isRbEqAppended)
2132 {
2133 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2134 // Mark any rb bit we add in to the rb mask
2135 j++;
2136 }
2137 }
2138
2139 //------------------------------------------------------------------------------------------
2140 // Put in the uncompressed fragment bits
2141 //------------------------------------------------------------------------------------------
2142 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2143 {
2144 co.set('s', compFragLog2 + i);
2145 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2146 }
2147 }
2148 }
2149
2150 /**
2151 ************************************************************************************************************************
2152 * Gfx9Lib::IsEquationSupported
2153 *
2154 * @brief
2155 * Check if equation is supported for given swizzle mode and resource type.
2156 *
2157 * @return
2158 * TRUE if supported
2159 ************************************************************************************************************************
2160 */
2161 BOOL_32 Gfx9Lib::IsEquationSupported(
2162 AddrResourceType rsrcType,
2163 AddrSwizzleMode swMode,
2164 UINT_32 elementBytesLog2) const
2165 {
2166 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2167 (IsLinear(swMode) == FALSE) &&
2168 (((IsTex2d(rsrcType) == TRUE) &&
2169 ((elementBytesLog2 < 4) ||
2170 ((IsRotateSwizzle(swMode) == FALSE) &&
2171 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2172 ((IsTex3d(rsrcType) == TRUE) &&
2173 (IsRotateSwizzle(swMode) == FALSE) &&
2174 (IsBlock256b(swMode) == FALSE)));
2175
2176 return supported;
2177 }
2178
2179 /**
2180 ************************************************************************************************************************
2181 * Gfx9Lib::InitEquationTable
2182 *
2183 * @brief
2184 * Initialize Equation table.
2185 *
2186 * @return
2187 * N/A
2188 ************************************************************************************************************************
2189 */
2190 VOID Gfx9Lib::InitEquationTable()
2191 {
2192 memset(m_equationTable, 0, sizeof(m_equationTable));
2193
2194 // Loop all possible resource type (2D/3D)
2195 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2196 {
2197 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2198
2199 // Loop all possible swizzle mode
2200 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
2201 {
2202 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2203
2204 // Loop all possible bpp
2205 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2206 {
2207 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2208
2209 // Check if the input is supported
2210 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2211 {
2212 ADDR_EQUATION equation;
2213 ADDR_E_RETURNCODE retCode;
2214
2215 memset(&equation, 0, sizeof(ADDR_EQUATION));
2216
2217 // Generate the equation
2218 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2219 {
2220 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2221 }
2222 else if (IsThin(rsrcType, swMode))
2223 {
2224 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2225 }
2226 else
2227 {
2228 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2229 }
2230
2231 // Only fill the equation into the table if the return code is ADDR_OK,
2232 // otherwise if the return code is not ADDR_OK, it indicates this is not
2233 // a valid input, we do nothing but just fill invalid equation index
2234 // into the lookup table.
2235 if (retCode == ADDR_OK)
2236 {
2237 equationIndex = m_numEquations;
2238 ADDR_ASSERT(equationIndex < EquationTableSize);
2239
2240 m_equationTable[equationIndex] = equation;
2241
2242 m_numEquations++;
2243 }
2244 else
2245 {
2246 ADDR_ASSERT_ALWAYS();
2247 }
2248 }
2249
2250 // Fill the index into the lookup table, if the combination is not supported
2251 // fill the invalid equation index
2252 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2253 }
2254 }
2255 }
2256 }
2257
2258 /**
2259 ************************************************************************************************************************
2260 * Gfx9Lib::HwlGetEquationIndex
2261 *
2262 * @brief
2263 * Interface function stub of GetEquationIndex
2264 *
2265 * @return
2266 * ADDR_E_RETURNCODE
2267 ************************************************************************************************************************
2268 */
2269 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2270 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2271 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2272 ) const
2273 {
2274 AddrResourceType rsrcType = pIn->resourceType;
2275 AddrSwizzleMode swMode = pIn->swizzleMode;
2276 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2277 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2278
2279 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2280 {
2281 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2282 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2283
2284 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2285 }
2286
2287 if (pOut->pMipInfo != NULL)
2288 {
2289 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2290 {
2291 pOut->pMipInfo[i].equationIndex = index;
2292 }
2293 }
2294
2295 return index;
2296 }
2297
2298 /**
2299 ************************************************************************************************************************
2300 * Gfx9Lib::HwlComputeBlock256Equation
2301 *
2302 * @brief
2303 * Interface function stub of ComputeBlock256Equation
2304 *
2305 * @return
2306 * ADDR_E_RETURNCODE
2307 ************************************************************************************************************************
2308 */
2309 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2310 AddrResourceType rsrcType,
2311 AddrSwizzleMode swMode,
2312 UINT_32 elementBytesLog2,
2313 ADDR_EQUATION* pEquation) const
2314 {
2315 ADDR_E_RETURNCODE ret = ADDR_OK;
2316
2317 pEquation->numBits = 8;
2318
2319 UINT_32 i = 0;
2320 for (; i < elementBytesLog2; i++)
2321 {
2322 InitChannel(1, 0 , i, &pEquation->addr[i]);
2323 }
2324
2325 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2326
2327 const UINT_32 maxBitsUsed = 4;
2328 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2329 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2330
2331 for (i = 0; i < maxBitsUsed; i++)
2332 {
2333 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2334 InitChannel(1, 1, i, &y[i]);
2335 }
2336
2337 if (IsStandardSwizzle(rsrcType, swMode))
2338 {
2339 switch (elementBytesLog2)
2340 {
2341 case 0:
2342 pixelBit[0] = x[0];
2343 pixelBit[1] = x[1];
2344 pixelBit[2] = x[2];
2345 pixelBit[3] = x[3];
2346 pixelBit[4] = y[0];
2347 pixelBit[5] = y[1];
2348 pixelBit[6] = y[2];
2349 pixelBit[7] = y[3];
2350 break;
2351 case 1:
2352 pixelBit[0] = x[0];
2353 pixelBit[1] = x[1];
2354 pixelBit[2] = x[2];
2355 pixelBit[3] = y[0];
2356 pixelBit[4] = y[1];
2357 pixelBit[5] = y[2];
2358 pixelBit[6] = x[3];
2359 break;
2360 case 2:
2361 pixelBit[0] = x[0];
2362 pixelBit[1] = x[1];
2363 pixelBit[2] = y[0];
2364 pixelBit[3] = y[1];
2365 pixelBit[4] = y[2];
2366 pixelBit[5] = x[2];
2367 break;
2368 case 3:
2369 pixelBit[0] = x[0];
2370 pixelBit[1] = y[0];
2371 pixelBit[2] = y[1];
2372 pixelBit[3] = x[1];
2373 pixelBit[4] = x[2];
2374 break;
2375 case 4:
2376 pixelBit[0] = y[0];
2377 pixelBit[1] = y[1];
2378 pixelBit[2] = x[0];
2379 pixelBit[3] = x[1];
2380 break;
2381 default:
2382 ADDR_ASSERT_ALWAYS();
2383 ret = ADDR_INVALIDPARAMS;
2384 break;
2385 }
2386 }
2387 else if (IsDisplaySwizzle(rsrcType, swMode))
2388 {
2389 switch (elementBytesLog2)
2390 {
2391 case 0:
2392 pixelBit[0] = x[0];
2393 pixelBit[1] = x[1];
2394 pixelBit[2] = x[2];
2395 pixelBit[3] = y[1];
2396 pixelBit[4] = y[0];
2397 pixelBit[5] = y[2];
2398 pixelBit[6] = x[3];
2399 pixelBit[7] = y[3];
2400 break;
2401 case 1:
2402 pixelBit[0] = x[0];
2403 pixelBit[1] = x[1];
2404 pixelBit[2] = x[2];
2405 pixelBit[3] = y[0];
2406 pixelBit[4] = y[1];
2407 pixelBit[5] = y[2];
2408 pixelBit[6] = x[3];
2409 break;
2410 case 2:
2411 pixelBit[0] = x[0];
2412 pixelBit[1] = x[1];
2413 pixelBit[2] = y[0];
2414 pixelBit[3] = x[2];
2415 pixelBit[4] = y[1];
2416 pixelBit[5] = y[2];
2417 break;
2418 case 3:
2419 pixelBit[0] = x[0];
2420 pixelBit[1] = y[0];
2421 pixelBit[2] = x[1];
2422 pixelBit[3] = x[2];
2423 pixelBit[4] = y[1];
2424 break;
2425 case 4:
2426 pixelBit[0] = x[0];
2427 pixelBit[1] = y[0];
2428 pixelBit[2] = x[1];
2429 pixelBit[3] = y[1];
2430 break;
2431 default:
2432 ADDR_ASSERT_ALWAYS();
2433 ret = ADDR_INVALIDPARAMS;
2434 break;
2435 }
2436 }
2437 else if (IsRotateSwizzle(swMode))
2438 {
2439 switch (elementBytesLog2)
2440 {
2441 case 0:
2442 pixelBit[0] = y[0];
2443 pixelBit[1] = y[1];
2444 pixelBit[2] = y[2];
2445 pixelBit[3] = x[1];
2446 pixelBit[4] = x[0];
2447 pixelBit[5] = x[2];
2448 pixelBit[6] = x[3];
2449 pixelBit[7] = y[3];
2450 break;
2451 case 1:
2452 pixelBit[0] = y[0];
2453 pixelBit[1] = y[1];
2454 pixelBit[2] = y[2];
2455 pixelBit[3] = x[0];
2456 pixelBit[4] = x[1];
2457 pixelBit[5] = x[2];
2458 pixelBit[6] = x[3];
2459 break;
2460 case 2:
2461 pixelBit[0] = y[0];
2462 pixelBit[1] = y[1];
2463 pixelBit[2] = x[0];
2464 pixelBit[3] = y[2];
2465 pixelBit[4] = x[1];
2466 pixelBit[5] = x[2];
2467 break;
2468 case 3:
2469 pixelBit[0] = y[0];
2470 pixelBit[1] = x[0];
2471 pixelBit[2] = y[1];
2472 pixelBit[3] = x[1];
2473 pixelBit[4] = x[2];
2474 break;
2475 default:
2476 ADDR_ASSERT_ALWAYS();
2477 case 4:
2478 ret = ADDR_INVALIDPARAMS;
2479 break;
2480 }
2481 }
2482 else
2483 {
2484 ADDR_ASSERT_ALWAYS();
2485 ret = ADDR_INVALIDPARAMS;
2486 }
2487
2488 // Post validation
2489 if (ret == ADDR_OK)
2490 {
2491 ASSERTED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2492 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2493 (microBlockDim.w * (1 << elementBytesLog2)));
2494 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2495 }
2496
2497 return ret;
2498 }
2499
2500 /**
2501 ************************************************************************************************************************
2502 * Gfx9Lib::HwlComputeThinEquation
2503 *
2504 * @brief
2505 * Interface function stub of ComputeThinEquation
2506 *
2507 * @return
2508 * ADDR_E_RETURNCODE
2509 ************************************************************************************************************************
2510 */
2511 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2512 AddrResourceType rsrcType,
2513 AddrSwizzleMode swMode,
2514 UINT_32 elementBytesLog2,
2515 ADDR_EQUATION* pEquation) const
2516 {
2517 ADDR_E_RETURNCODE ret = ADDR_OK;
2518
2519 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2520
2521 UINT_32 maxXorBits = blockSizeLog2;
2522 if (IsNonPrtXor(swMode))
2523 {
2524 // For non-prt-xor, maybe need to initialize some more bits for xor
2525 // The highest xor bit used in equation will be max the following 3 items:
2526 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2527 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2528 // 3. blockSizeLog2
2529
2530 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2531 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2532 GetPipeXorBits(blockSizeLog2) +
2533 2 * GetBankXorBits(blockSizeLog2));
2534 }
2535
2536 const UINT_32 maxBitsUsed = 14;
2537 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2538 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2539 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2540
2541 const UINT_32 extraXorBits = 16;
2542 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2543 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2544
2545 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2546 {
2547 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2548 InitChannel(1, 1, i, &y[i]);
2549 }
2550
2551 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2552
2553 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2554 {
2555 InitChannel(1, 0 , i, &pixelBit[i]);
2556 }
2557
2558 UINT_32 xIdx = 0;
2559 UINT_32 yIdx = 0;
2560 UINT_32 lowBits = 0;
2561
2562 if (IsZOrderSwizzle(swMode))
2563 {
2564 if (elementBytesLog2 <= 3)
2565 {
2566 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2567 {
2568 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2569 }
2570
2571 lowBits = 6;
2572 }
2573 else
2574 {
2575 ret = ADDR_INVALIDPARAMS;
2576 }
2577 }
2578 else
2579 {
2580 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2581
2582 if (ret == ADDR_OK)
2583 {
2584 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2585 xIdx = Log2(microBlockDim.w);
2586 yIdx = Log2(microBlockDim.h);
2587 lowBits = 8;
2588 }
2589 }
2590
2591 if (ret == ADDR_OK)
2592 {
2593 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2594 {
2595 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2596 }
2597
2598 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2599 {
2600 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2601 }
2602
2603 if (IsXor(swMode))
2604 {
2605 // Fill XOR bits
2606 UINT_32 pipeStart = m_pipeInterleaveLog2;
2607 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2608
2609 UINT_32 bankStart = pipeStart + pipeXorBits;
2610 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2611
2612 for (UINT_32 i = 0; i < pipeXorBits; i++)
2613 {
2614 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2615 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2616 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2617
2618 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2619 }
2620
2621 for (UINT_32 i = 0; i < bankXorBits; i++)
2622 {
2623 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2624 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2625 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2626
2627 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2628 }
2629
2630 if (IsPrt(swMode) == FALSE)
2631 {
2632 for (UINT_32 i = 0; i < pipeXorBits; i++)
2633 {
2634 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2635 }
2636
2637 for (UINT_32 i = 0; i < bankXorBits; i++)
2638 {
2639 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2640 }
2641 }
2642 }
2643
2644 pEquation->numBits = blockSizeLog2;
2645 }
2646
2647 return ret;
2648 }
2649
2650 /**
2651 ************************************************************************************************************************
2652 * Gfx9Lib::HwlComputeThickEquation
2653 *
2654 * @brief
2655 * Interface function stub of ComputeThickEquation
2656 *
2657 * @return
2658 * ADDR_E_RETURNCODE
2659 ************************************************************************************************************************
2660 */
2661 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2662 AddrResourceType rsrcType,
2663 AddrSwizzleMode swMode,
2664 UINT_32 elementBytesLog2,
2665 ADDR_EQUATION* pEquation) const
2666 {
2667 ADDR_E_RETURNCODE ret = ADDR_OK;
2668
2669 ADDR_ASSERT(IsTex3d(rsrcType));
2670
2671 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2672
2673 UINT_32 maxXorBits = blockSizeLog2;
2674 if (IsNonPrtXor(swMode))
2675 {
2676 // For non-prt-xor, maybe need to initialize some more bits for xor
2677 // The highest xor bit used in equation will be max the following 3:
2678 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2679 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2680 // 3. blockSizeLog2
2681
2682 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2683 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2684 GetPipeXorBits(blockSizeLog2) +
2685 3 * GetBankXorBits(blockSizeLog2));
2686 }
2687
2688 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2689 {
2690 InitChannel(1, 0 , i, &pEquation->addr[i]);
2691 }
2692
2693 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2694
2695 const UINT_32 maxBitsUsed = 12;
2696 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2697 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2698 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2699 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2700
2701 const UINT_32 extraXorBits = 24;
2702 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2703 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2704
2705 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2706 {
2707 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2708 InitChannel(1, 1, i, &y[i]);
2709 InitChannel(1, 2, i, &z[i]);
2710 }
2711
2712 if (IsZOrderSwizzle(swMode))
2713 {
2714 switch (elementBytesLog2)
2715 {
2716 case 0:
2717 pixelBit[0] = x[0];
2718 pixelBit[1] = y[0];
2719 pixelBit[2] = x[1];
2720 pixelBit[3] = y[1];
2721 pixelBit[4] = z[0];
2722 pixelBit[5] = z[1];
2723 pixelBit[6] = x[2];
2724 pixelBit[7] = z[2];
2725 pixelBit[8] = y[2];
2726 pixelBit[9] = x[3];
2727 break;
2728 case 1:
2729 pixelBit[0] = x[0];
2730 pixelBit[1] = y[0];
2731 pixelBit[2] = x[1];
2732 pixelBit[3] = y[1];
2733 pixelBit[4] = z[0];
2734 pixelBit[5] = z[1];
2735 pixelBit[6] = z[2];
2736 pixelBit[7] = y[2];
2737 pixelBit[8] = x[2];
2738 break;
2739 case 2:
2740 pixelBit[0] = x[0];
2741 pixelBit[1] = y[0];
2742 pixelBit[2] = x[1];
2743 pixelBit[3] = z[0];
2744 pixelBit[4] = y[1];
2745 pixelBit[5] = z[1];
2746 pixelBit[6] = y[2];
2747 pixelBit[7] = x[2];
2748 break;
2749 case 3:
2750 pixelBit[0] = x[0];
2751 pixelBit[1] = y[0];
2752 pixelBit[2] = z[0];
2753 pixelBit[3] = x[1];
2754 pixelBit[4] = z[1];
2755 pixelBit[5] = y[1];
2756 pixelBit[6] = x[2];
2757 break;
2758 case 4:
2759 pixelBit[0] = x[0];
2760 pixelBit[1] = y[0];
2761 pixelBit[2] = z[0];
2762 pixelBit[3] = z[1];
2763 pixelBit[4] = y[1];
2764 pixelBit[5] = x[1];
2765 break;
2766 default:
2767 ADDR_ASSERT_ALWAYS();
2768 ret = ADDR_INVALIDPARAMS;
2769 break;
2770 }
2771 }
2772 else if (IsStandardSwizzle(rsrcType, swMode))
2773 {
2774 switch (elementBytesLog2)
2775 {
2776 case 0:
2777 pixelBit[0] = x[0];
2778 pixelBit[1] = x[1];
2779 pixelBit[2] = x[2];
2780 pixelBit[3] = x[3];
2781 pixelBit[4] = y[0];
2782 pixelBit[5] = y[1];
2783 pixelBit[6] = z[0];
2784 pixelBit[7] = z[1];
2785 pixelBit[8] = z[2];
2786 pixelBit[9] = y[2];
2787 break;
2788 case 1:
2789 pixelBit[0] = x[0];
2790 pixelBit[1] = x[1];
2791 pixelBit[2] = x[2];
2792 pixelBit[3] = y[0];
2793 pixelBit[4] = y[1];
2794 pixelBit[5] = z[0];
2795 pixelBit[6] = z[1];
2796 pixelBit[7] = z[2];
2797 pixelBit[8] = y[2];
2798 break;
2799 case 2:
2800 pixelBit[0] = x[0];
2801 pixelBit[1] = x[1];
2802 pixelBit[2] = y[0];
2803 pixelBit[3] = y[1];
2804 pixelBit[4] = z[0];
2805 pixelBit[5] = z[1];
2806 pixelBit[6] = y[2];
2807 pixelBit[7] = x[2];
2808 break;
2809 case 3:
2810 pixelBit[0] = x[0];
2811 pixelBit[1] = y[0];
2812 pixelBit[2] = y[1];
2813 pixelBit[3] = z[0];
2814 pixelBit[4] = z[1];
2815 pixelBit[5] = x[1];
2816 pixelBit[6] = x[2];
2817 break;
2818 case 4:
2819 pixelBit[0] = y[0];
2820 pixelBit[1] = y[1];
2821 pixelBit[2] = z[0];
2822 pixelBit[3] = z[1];
2823 pixelBit[4] = x[0];
2824 pixelBit[5] = x[1];
2825 break;
2826 default:
2827 ADDR_ASSERT_ALWAYS();
2828 ret = ADDR_INVALIDPARAMS;
2829 break;
2830 }
2831 }
2832 else
2833 {
2834 ADDR_ASSERT_ALWAYS();
2835 ret = ADDR_INVALIDPARAMS;
2836 }
2837
2838 if (ret == ADDR_OK)
2839 {
2840 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2841 UINT_32 xIdx = Log2(microBlockDim.w);
2842 UINT_32 yIdx = Log2(microBlockDim.h);
2843 UINT_32 zIdx = Log2(microBlockDim.d);
2844
2845 pixelBit = pEquation->addr;
2846
2847 const UINT_32 lowBits = 10;
2848 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2849 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2850
2851 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2852 {
2853 if ((i % 3) == 0)
2854 {
2855 pixelBit[i] = x[xIdx++];
2856 }
2857 else if ((i % 3) == 1)
2858 {
2859 pixelBit[i] = z[zIdx++];
2860 }
2861 else
2862 {
2863 pixelBit[i] = y[yIdx++];
2864 }
2865 }
2866
2867 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2868 {
2869 if ((i % 3) == 0)
2870 {
2871 xorExtra[i - blockSizeLog2] = x[xIdx++];
2872 }
2873 else if ((i % 3) == 1)
2874 {
2875 xorExtra[i - blockSizeLog2] = z[zIdx++];
2876 }
2877 else
2878 {
2879 xorExtra[i - blockSizeLog2] = y[yIdx++];
2880 }
2881 }
2882
2883 if (IsXor(swMode))
2884 {
2885 // Fill XOR bits
2886 UINT_32 pipeStart = m_pipeInterleaveLog2;
2887 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2888 for (UINT_32 i = 0; i < pipeXorBits; i++)
2889 {
2890 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2891 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2892 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2893
2894 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2895
2896 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2897 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2898 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2899
2900 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2901 }
2902
2903 UINT_32 bankStart = pipeStart + pipeXorBits;
2904 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2905 for (UINT_32 i = 0; i < bankXorBits; i++)
2906 {
2907 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2908 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2909 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2910
2911 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2912
2913 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2914 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2915 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2916
2917 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2918 }
2919 }
2920
2921 pEquation->numBits = blockSizeLog2;
2922 }
2923
2924 return ret;
2925 }
2926
2927 /**
2928 ************************************************************************************************************************
2929 * Gfx9Lib::IsValidDisplaySwizzleMode
2930 *
2931 * @brief
2932 * Check if a swizzle mode is supported by display engine
2933 *
2934 * @return
2935 * TRUE is swizzle mode is supported by display engine
2936 ************************************************************************************************************************
2937 */
2938 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2939 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2940 {
2941 BOOL_32 support = FALSE;
2942
2943 if (m_settings.isDce12)
2944 {
2945 switch (pIn->swizzleMode)
2946 {
2947 case ADDR_SW_256B_D:
2948 case ADDR_SW_256B_R:
2949 support = (pIn->bpp == 32);
2950 break;
2951
2952 case ADDR_SW_LINEAR:
2953 case ADDR_SW_4KB_D:
2954 case ADDR_SW_4KB_R:
2955 case ADDR_SW_64KB_D:
2956 case ADDR_SW_64KB_R:
2957 case ADDR_SW_VAR_D:
2958 case ADDR_SW_VAR_R:
2959 case ADDR_SW_4KB_D_X:
2960 case ADDR_SW_4KB_R_X:
2961 case ADDR_SW_64KB_D_X:
2962 case ADDR_SW_64KB_R_X:
2963 case ADDR_SW_VAR_D_X:
2964 case ADDR_SW_VAR_R_X:
2965 support = (pIn->bpp <= 64);
2966 break;
2967
2968 default:
2969 break;
2970 }
2971 }
2972 else if (m_settings.isDcn1)
2973 {
2974 switch (pIn->swizzleMode)
2975 {
2976 case ADDR_SW_4KB_D:
2977 case ADDR_SW_64KB_D:
2978 case ADDR_SW_VAR_D:
2979 case ADDR_SW_64KB_D_T:
2980 case ADDR_SW_4KB_D_X:
2981 case ADDR_SW_64KB_D_X:
2982 case ADDR_SW_VAR_D_X:
2983 support = (pIn->bpp == 64);
2984 break;
2985
2986 case ADDR_SW_LINEAR:
2987 case ADDR_SW_4KB_S:
2988 case ADDR_SW_64KB_S:
2989 case ADDR_SW_VAR_S:
2990 case ADDR_SW_64KB_S_T:
2991 case ADDR_SW_4KB_S_X:
2992 case ADDR_SW_64KB_S_X:
2993 case ADDR_SW_VAR_S_X:
2994 support = (pIn->bpp <= 64);
2995 break;
2996
2997 default:
2998 break;
2999 }
3000 }
3001 else
3002 {
3003 ADDR_NOT_IMPLEMENTED();
3004 }
3005
3006 return support;
3007 }
3008
3009 /**
3010 ************************************************************************************************************************
3011 * Gfx9Lib::HwlComputePipeBankXor
3012 *
3013 * @brief
3014 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3015 *
3016 * @return
3017 * PipeBankXor value
3018 ************************************************************************************************************************
3019 */
3020 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3021 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3022 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
3023 {
3024 if (IsXor(pIn->swizzleMode))
3025 {
3026 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3027 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3028 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3029
3030 UINT_32 pipeXor = 0;
3031 UINT_32 bankXor = 0;
3032
3033 const UINT_32 bankMask = (1 << bankBits) - 1;
3034 const UINT_32 index = pIn->surfIndex & bankMask;
3035
3036 const UINT_32 bpp = pIn->flags.fmask ?
3037 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3038 if (bankBits == 4)
3039 {
3040 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3041 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3042
3043 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3044 }
3045 else if (bankBits > 0)
3046 {
3047 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3048 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3049 bankXor = (index * bankIncrease) & bankMask;
3050 }
3051
3052 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3053 }
3054 else
3055 {
3056 pOut->pipeBankXor = 0;
3057 }
3058
3059 return ADDR_OK;
3060 }
3061
3062 /**
3063 ************************************************************************************************************************
3064 * Gfx9Lib::HwlComputeSlicePipeBankXor
3065 *
3066 * @brief
3067 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3068 *
3069 * @return
3070 * PipeBankXor value
3071 ************************************************************************************************************************
3072 */
3073 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3074 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3075 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3076 {
3077 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3078 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3079 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3080
3081 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3082 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3083
3084 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3085
3086 return ADDR_OK;
3087 }
3088
3089 /**
3090 ************************************************************************************************************************
3091 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3092 *
3093 * @brief
3094 * Compute sub resource offset to support swizzle pattern
3095 *
3096 * @return
3097 * Offset
3098 ************************************************************************************************************************
3099 */
3100 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3101 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3102 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3103 {
3104 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3105
3106 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3107 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3108 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3109 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3110 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3111 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3112
3113 pOut->offset = pIn->slice * pIn->sliceSize +
3114 pIn->macroBlockOffset +
3115 (pIn->mipTailOffset ^ pipeBankXor) -
3116 static_cast<UINT_64>(pipeBankXor);
3117 return ADDR_OK;
3118 }
3119
3120 /**
3121 ************************************************************************************************************************
3122 * Gfx9Lib::ValidateNonSwModeParams
3123 *
3124 * @brief
3125 * Validate compute surface info params except swizzle mode
3126 *
3127 * @return
3128 * TRUE if parameters are valid, FALSE otherwise
3129 ************************************************************************************************************************
3130 */
3131 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3132 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3133 {
3134 BOOL_32 valid = TRUE;
3135
3136 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3137 {
3138 ADDR_ASSERT_ALWAYS();
3139 valid = FALSE;
3140 }
3141
3142 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3143 {
3144 ADDR_ASSERT_ALWAYS();
3145 valid = FALSE;
3146 }
3147
3148 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3149 const BOOL_32 msaa = (pIn->numFrags > 1);
3150 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3151
3152 const AddrResourceType rsrcType = pIn->resourceType;
3153 const BOOL_32 tex3d = IsTex3d(rsrcType);
3154 const BOOL_32 tex2d = IsTex2d(rsrcType);
3155 const BOOL_32 tex1d = IsTex1d(rsrcType);
3156
3157 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3158 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3159 const BOOL_32 display = flags.display || flags.rotated;
3160 const BOOL_32 stereo = flags.qbStereo;
3161 const BOOL_32 fmask = flags.fmask;
3162
3163 // Resource type check
3164 if (tex1d)
3165 {
3166 if (msaa || zbuffer || display || stereo || isBc || fmask)
3167 {
3168 ADDR_ASSERT_ALWAYS();
3169 valid = FALSE;
3170 }
3171 }
3172 else if (tex2d)
3173 {
3174 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3175 {
3176 ADDR_ASSERT_ALWAYS();
3177 valid = FALSE;
3178 }
3179 }
3180 else if (tex3d)
3181 {
3182 if (msaa || zbuffer || display || stereo || fmask)
3183 {
3184 ADDR_ASSERT_ALWAYS();
3185 valid = FALSE;
3186 }
3187 }
3188 else
3189 {
3190 ADDR_ASSERT_ALWAYS();
3191 valid = FALSE;
3192 }
3193
3194 return valid;
3195 }
3196
3197 /**
3198 ************************************************************************************************************************
3199 * Gfx9Lib::ValidateSwModeParams
3200 *
3201 * @brief
3202 * Validate compute surface info related to swizzle mode
3203 *
3204 * @return
3205 * TRUE if parameters are valid, FALSE otherwise
3206 ************************************************************************************************************************
3207 */
3208 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3209 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3210 {
3211 BOOL_32 valid = TRUE;
3212
3213 if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
3214 {
3215 ADDR_ASSERT_ALWAYS();
3216 valid = FALSE;
3217 }
3218
3219 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3220 const BOOL_32 msaa = (pIn->numFrags > 1);
3221 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3222 const BOOL_32 is422 = ElemLib::IsMacroPixelPacked(pIn->format);
3223
3224 const AddrResourceType rsrcType = pIn->resourceType;
3225 const BOOL_32 tex3d = IsTex3d(rsrcType);
3226 const BOOL_32 tex2d = IsTex2d(rsrcType);
3227 const BOOL_32 tex1d = IsTex1d(rsrcType);
3228
3229 const AddrSwizzleMode swizzle = pIn->swizzleMode;
3230 const BOOL_32 linear = IsLinear(swizzle);
3231 const BOOL_32 blk256B = IsBlock256b(swizzle);
3232 const BOOL_32 blkVar = IsBlockVariable(swizzle);
3233 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3234
3235 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3236 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3237 const BOOL_32 color = flags.color;
3238 const BOOL_32 texture = flags.texture;
3239 const BOOL_32 display = flags.display || flags.rotated;
3240 const BOOL_32 prt = flags.prt;
3241 const BOOL_32 fmask = flags.fmask;
3242
3243 const BOOL_32 thin3d = tex3d && flags.view3dAs2dArray;
3244 const BOOL_32 zMaxMip = tex3d && mipmap &&
3245 (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3246
3247 // Misc check
3248 if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3249 {
3250 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3251 ADDR_ASSERT_ALWAYS();
3252 valid = FALSE;
3253 }
3254
3255 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3256 {
3257 ADDR_ASSERT_ALWAYS();
3258 valid = FALSE;
3259 }
3260
3261 if ((pIn->bpp == 96) && (linear == FALSE))
3262 {
3263 ADDR_ASSERT_ALWAYS();
3264 valid = FALSE;
3265 }
3266
3267 if (prt && isNonPrtXor)
3268 {
3269 ADDR_ASSERT_ALWAYS();
3270 valid = FALSE;
3271 }
3272
3273 // Resource type check
3274 if (tex1d)
3275 {
3276 if (linear == FALSE)
3277 {
3278 ADDR_ASSERT_ALWAYS();
3279 valid = FALSE;
3280 }
3281 }
3282
3283 // Swizzle type check
3284 if (linear)
3285 {
3286 if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3287 ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3288 {
3289 ADDR_ASSERT_ALWAYS();
3290 valid = FALSE;
3291 }
3292 }
3293 else if (IsZOrderSwizzle(swizzle))
3294 {
3295 if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3296 {
3297 ADDR_ASSERT_ALWAYS();
3298 valid = FALSE;
3299 }
3300 }
3301 else if (IsStandardSwizzle(swizzle))
3302 {
3303 if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3304 {
3305 ADDR_ASSERT_ALWAYS();
3306 valid = FALSE;
3307 }
3308 }
3309 else if (IsDisplaySwizzle(swizzle))
3310 {
3311 if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3312 {
3313 ADDR_ASSERT_ALWAYS();
3314 valid = FALSE;
3315 }
3316 }
3317 else if (IsRotateSwizzle(swizzle))
3318 {
3319 if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3320 {
3321 ADDR_ASSERT_ALWAYS();
3322 valid = FALSE;
3323 }
3324 }
3325 else
3326 {
3327 ADDR_ASSERT_ALWAYS();
3328 valid = FALSE;
3329 }
3330
3331 // Block type check
3332 if (blk256B)
3333 {
3334 if (prt || zbuffer || tex3d || mipmap || msaa)
3335 {
3336 ADDR_ASSERT_ALWAYS();
3337 valid = FALSE;
3338 }
3339 }
3340 else if (blkVar)
3341 {
3342 ADDR_ASSERT_ALWAYS();
3343 valid = FALSE;
3344 }
3345
3346 return valid;
3347 }
3348
3349 /**
3350 ************************************************************************************************************************
3351 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3352 *
3353 * @brief
3354 * Compute surface info sanity check
3355 *
3356 * @return
3357 * ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3358 ************************************************************************************************************************
3359 */
3360 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3361 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3362 {
3363 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3364 }
3365
3366 /**
3367 ************************************************************************************************************************
3368 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3369 *
3370 * @brief
3371 * Internal function to get suggested surface information for cliet to use
3372 *
3373 * @return
3374 * ADDR_E_RETURNCODE
3375 ************************************************************************************************************************
3376 */
3377 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3378 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3379 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3380 {
3381 ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3382 ElemLib* pElemLib = GetElemLib();
3383
3384 UINT_32 bpp = pIn->bpp;
3385 UINT_32 width = Max(pIn->width, 1u);
3386 UINT_32 height = Max(pIn->height, 1u);
3387 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3388 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3389
3390 if (pIn->flags.fmask)
3391 {
3392 bpp = GetFmaskBpp(numSamples, numFrags);
3393 numFrags = 1;
3394 numSamples = 1;
3395 pOut->resourceType = ADDR_RSRC_TEX_2D;
3396 }
3397 else
3398 {
3399 // Set format to INVALID will skip this conversion
3400 if (pIn->format != ADDR_FMT_INVALID)
3401 {
3402 UINT_32 expandX, expandY;
3403
3404 // Don't care for this case
3405 ElemMode elemMode = ADDR_UNCOMPRESSED;
3406
3407 // Get compression/expansion factors and element mode which indicates compression/expansion
3408 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3409 &elemMode,
3410 &expandX,
3411 &expandY);
3412
3413 UINT_32 basePitch = 0;
3414 GetElemLib()->AdjustSurfaceInfo(elemMode,
3415 expandX,
3416 expandY,
3417 &bpp,
3418 &basePitch,
3419 &width,
3420 &height);
3421 }
3422
3423 // The output may get changed for volume(3D) texture resource in future
3424 pOut->resourceType = pIn->resourceType;
3425 }
3426
3427 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3428 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3429 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
3430 const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated;
3431
3432 // Pre sanity check on non swizzle mode parameters
3433 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3434 localIn.flags = pIn->flags;
3435 localIn.resourceType = pOut->resourceType;
3436 localIn.format = pIn->format;
3437 localIn.bpp = bpp;
3438 localIn.width = width;
3439 localIn.height = height;
3440 localIn.numSlices = numSlices;
3441 localIn.numMipLevels = numMipLevels;
3442 localIn.numSamples = numSamples;
3443 localIn.numFrags = numFrags;
3444
3445 if (ValidateNonSwModeParams(&localIn))
3446 {
3447 // Forbid swizzle mode(s) by client setting, for simplicity we never allow VAR swizzle mode for GFX9
3448 ADDR2_SWMODE_SET allowedSwModeSet = {};
3449 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3450 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
3451 allowedSwModeSet.value |= pIn->forbiddenBlock.macro4KB ? 0 : Gfx9Blk4KBSwModeMask;
3452 allowedSwModeSet.value |= pIn->forbiddenBlock.macro64KB ? 0 : Gfx9Blk64KBSwModeMask;
3453
3454 if (pIn->preferredSwSet.value != 0)
3455 {
3456 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3457 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3458 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3459 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3460 }
3461
3462 if (pIn->noXor)
3463 {
3464 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3465 }
3466
3467 if (pIn->maxAlign > 0)
3468 {
3469 if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3470 {
3471 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3472 }
3473
3474 if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3475 {
3476 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3477 }
3478
3479 if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3480 {
3481 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3482 }
3483 }
3484
3485 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3486 switch (pOut->resourceType)
3487 {
3488 case ADDR_RSRC_TEX_1D:
3489 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3490 break;
3491
3492 case ADDR_RSRC_TEX_2D:
3493 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3494
3495 if (bpp > 64)
3496 {
3497 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3498 }
3499 break;
3500
3501 case ADDR_RSRC_TEX_3D:
3502 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3503
3504 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3505 {
3506 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3507 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3508 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3509 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3510 }
3511
3512 if ((bpp == 128) && pIn->flags.color)
3513 {
3514 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3515 }
3516
3517 if (pIn->flags.view3dAs2dArray)
3518 {
3519 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3520 }
3521 break;
3522
3523 default:
3524 ADDR_ASSERT_ALWAYS();
3525 allowedSwModeSet.value = 0;
3526 break;
3527 }
3528
3529 if (pIn->format == ADDR_FMT_32_32_32)
3530 {
3531 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3532 }
3533
3534 if (ElemLib::IsBlockCompressed(pIn->format))
3535 {
3536 if (pIn->flags.texture)
3537 {
3538 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3539 }
3540 else
3541 {
3542 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3543 }
3544 }
3545
3546 if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3547 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3548 {
3549 allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3550 }
3551
3552 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3553 {
3554 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3555
3556 if (pIn->flags.noMetadata == FALSE)
3557 {
3558 if (pIn->flags.depth &&
3559 pIn->flags.texture &&
3560 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3561 {
3562 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3563 // equation from wrong address within memory range a tile covered and use the
3564 // garbage data for compressed Z reading which finally leads to corruption.
3565 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3566 }
3567
3568 if (m_settings.htileCacheRbConflict &&
3569 (pIn->flags.depth || pIn->flags.stencil) &&
3570 (numSlices > 1) &&
3571 (pIn->flags.metaRbUnaligned == FALSE) &&
3572 (pIn->flags.metaPipeUnaligned == FALSE))
3573 {
3574 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3575 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3576 }
3577 }
3578 }
3579
3580 if (msaa)
3581 {
3582 allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3583 }
3584
3585 if ((numFrags > 1) &&
3586 (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3587 {
3588 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3589 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3590 }
3591
3592 if (numMipLevels > 1)
3593 {
3594 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3595 }
3596
3597 if (displayRsrc)
3598 {
3599 if (m_settings.isDce12)
3600 {
3601 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3602 }
3603 else if (m_settings.isDcn1)
3604 {
3605 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3606 }
3607 else
3608 {
3609 ADDR_NOT_IMPLEMENTED();
3610 }
3611 }
3612
3613 if (allowedSwModeSet.value != 0)
3614 {
3615 #if DEBUG
3616 // Post sanity check, at least AddrLib should accept the output generated by its own
3617 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3618
3619 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3620 {
3621 if (validateSwModeSet & 1)
3622 {
3623 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3624 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3625 }
3626
3627 validateSwModeSet >>= 1;
3628 }
3629 #endif
3630
3631 pOut->validSwModeSet = allowedSwModeSet;
3632 pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3633 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet);
3634 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3635
3636 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3637
3638 if (pOut->clientPreferredSwSet.value == 0)
3639 {
3640 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3641 }
3642
3643 if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3644 {
3645 pOut->swizzleMode = ADDR_SW_LINEAR;
3646 }
3647 else
3648 {
3649 // Always ignore linear swizzle mode if there is other choice.
3650 allowedSwModeSet.swLinear = 0;
3651
3652 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet);
3653
3654 // Determine block size if there is 2 or more block type candidates
3655 if (IsPow2(allowedBlockSet.value) == FALSE)
3656 {
3657 const AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_256B, ADDR_SW_4KB, ADDR_SW_64KB};
3658 Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3659 Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3660 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3661
3662 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3663 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3664 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3665 UINT_32 minSizeBlk = AddrBlockMicro;
3666 UINT_64 minSize = 0;
3667
3668 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3669 {
3670 if (allowedBlockSet.value & (1 << i))
3671 {
3672 ComputeBlockDimensionForSurf(&blkDim[i].w,
3673 &blkDim[i].h,
3674 &blkDim[i].d,
3675 bpp,
3676 numFrags,
3677 pOut->resourceType,
3678 swMode[i]);
3679
3680 if (displayRsrc)
3681 {
3682 blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3683 }
3684
3685 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3686 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
3687
3688 if ((minSize == 0) ||
3689 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3690 {
3691 minSize = padSize[i];
3692 minSizeBlk = i;
3693 }
3694 }
3695 }
3696
3697 if ((allowedBlockSet.micro == TRUE) &&
3698 (width <= blkDim[AddrBlockMicro].w) &&
3699 (height <= blkDim[AddrBlockMicro].h) &&
3700 (NextPow2(pIn->minSizeAlign) <= GetBlockSize(ADDR_SW_256B)))
3701 {
3702 minSizeBlk = AddrBlockMicro;
3703 }
3704
3705 if (minSizeBlk == AddrBlockMicro)
3706 {
3707 allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3708 }
3709 else if (minSizeBlk == AddrBlock4KB)
3710 {
3711 allowedSwModeSet.value &= Gfx9Blk4KBSwModeMask;
3712 }
3713 else
3714 {
3715 ADDR_ASSERT(minSizeBlk == AddrBlock64KB);
3716 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3717 }
3718 }
3719
3720 // Block type should be determined.
3721 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet).value));
3722
3723 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3724
3725 // Determine swizzle type if there is 2 or more swizzle type candidates
3726 if (IsPow2(allowedSwSet.value) == FALSE)
3727 {
3728 if (ElemLib::IsBlockCompressed(pIn->format))
3729 {
3730 if (allowedSwSet.sw_D)
3731 {
3732 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3733 }
3734 else
3735 {
3736 ADDR_ASSERT(allowedSwSet.sw_S);
3737 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3738 }
3739 }
3740 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3741 {
3742 if (allowedSwSet.sw_S)
3743 {
3744 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3745 }
3746 else if (allowedSwSet.sw_D)
3747 {
3748 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3749 }
3750 else
3751 {
3752 ADDR_ASSERT(allowedSwSet.sw_R);
3753 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3754 }
3755 }
3756 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3757 {
3758 if (pIn->flags.color && allowedSwSet.sw_D)
3759 {
3760 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3761 }
3762 else if (allowedSwSet.sw_Z)
3763 {
3764 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3765 }
3766 else
3767 {
3768 ADDR_ASSERT(allowedSwSet.sw_S);
3769 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3770 }
3771 }
3772 else
3773 {
3774 if (pIn->flags.rotated && allowedSwSet.sw_R)
3775 {
3776 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3777 }
3778 else if (displayRsrc && allowedSwSet.sw_D)
3779 {
3780 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3781 }
3782 else if (allowedSwSet.sw_S)
3783 {
3784 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3785 }
3786 else
3787 {
3788 ADDR_ASSERT(allowedSwSet.sw_Z);
3789 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3790 }
3791 }
3792 }
3793
3794 // Swizzle type should be determined.
3795 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3796
3797 // Determine swizzle mode now - always select the "largest" swizzle mode for a given block type +
3798 // swizzle type combination. For example, for AddrBlock64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3799 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3800 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3801 }
3802
3803 returnCode = ADDR_OK;
3804 }
3805 else
3806 {
3807 // Invalid combination...
3808 ADDR_ASSERT_ALWAYS();
3809 }
3810 }
3811 else
3812 {
3813 // Invalid combination...
3814 ADDR_ASSERT_ALWAYS();
3815 }
3816
3817 return returnCode;
3818 }
3819
3820 /**
3821 ************************************************************************************************************************
3822 * Gfx9Lib::ComputeStereoInfo
3823 *
3824 * @brief
3825 * Compute height alignment and right eye pipeBankXor for stereo surface
3826 *
3827 * @return
3828 * Error code
3829 *
3830 ************************************************************************************************************************
3831 */
3832 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3833 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3834 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
3835 UINT_32* pHeightAlign
3836 ) const
3837 {
3838 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3839
3840 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3841
3842 if (eqIndex < m_numEquations)
3843 {
3844 if (IsXor(pIn->swizzleMode))
3845 {
3846 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3847 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
3848 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
3849 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
3850 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3851 ASSERTED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3852
3853 ADDR_ASSERT(maxYCoordBlock256 ==
3854 GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
3855
3856 const UINT_32 maxYCoordInBaseEquation =
3857 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
3858
3859 ADDR_ASSERT(maxYCoordInBaseEquation ==
3860 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3861
3862 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3863
3864 ADDR_ASSERT(maxYCoordInPipeXor ==
3865 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3866
3867 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3868 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3869
3870 ADDR_ASSERT(maxYCoordInBankXor ==
3871 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3872
3873 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3874
3875 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3876 {
3877 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3878
3879 if (pOut->pStereoInfo != NULL)
3880 {
3881 pOut->pStereoInfo->rightSwizzle = 0;
3882
3883 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3884 {
3885 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3886 {
3887 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3888 }
3889
3890 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3891 {
3892 pOut->pStereoInfo->rightSwizzle |=
3893 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3894 }
3895
3896 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3897 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3898 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3899 }
3900 }
3901 }
3902 }
3903 }
3904 else
3905 {
3906 ADDR_ASSERT_ALWAYS();
3907 returnCode = ADDR_ERROR;
3908 }
3909
3910 return returnCode;
3911 }
3912
3913 /**
3914 ************************************************************************************************************************
3915 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3916 *
3917 * @brief
3918 * Internal function to calculate alignment for tiled surface
3919 *
3920 * @return
3921 * ADDR_E_RETURNCODE
3922 ************************************************************************************************************************
3923 */
3924 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3925 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3926 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3927 ) const
3928 {
3929 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3930 &pOut->blockHeight,
3931 &pOut->blockSlices,
3932 pIn->bpp,
3933 pIn->numFrags,
3934 pIn->resourceType,
3935 pIn->swizzleMode);
3936
3937 if (returnCode == ADDR_OK)
3938 {
3939 UINT_32 pitchAlignInElement = pOut->blockWidth;
3940
3941 if ((IsTex2d(pIn->resourceType) == TRUE) &&
3942 (pIn->flags.display || pIn->flags.rotated) &&
3943 (pIn->numMipLevels <= 1) &&
3944 (pIn->numSamples <= 1) &&
3945 (pIn->numFrags <= 1))
3946 {
3947 // Display engine needs pitch align to be at least 32 pixels.
3948 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3949 }
3950
3951 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3952
3953 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3954 {
3955 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3956 {
3957 returnCode = ADDR_INVALIDPARAMS;
3958 }
3959 else if (pIn->pitchInElement < pOut->pitch)
3960 {
3961 returnCode = ADDR_INVALIDPARAMS;
3962 }
3963 else
3964 {
3965 pOut->pitch = pIn->pitchInElement;
3966 }
3967 }
3968
3969 UINT_32 heightAlign = 0;
3970
3971 if (pIn->flags.qbStereo)
3972 {
3973 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3974 }
3975
3976 if (returnCode == ADDR_OK)
3977 {
3978 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3979
3980 if (heightAlign > 1)
3981 {
3982 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3983 }
3984
3985 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3986
3987 pOut->epitchIsHeight = FALSE;
3988 pOut->mipChainInTail = FALSE;
3989 pOut->firstMipIdInTail = pIn->numMipLevels;
3990
3991 pOut->mipChainPitch = pOut->pitch;
3992 pOut->mipChainHeight = pOut->height;
3993 pOut->mipChainSlice = pOut->numSlices;
3994
3995 if (pIn->numMipLevels > 1)
3996 {
3997 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
3998 pIn->swizzleMode,
3999 pIn->bpp,
4000 pIn->width,
4001 pIn->height,
4002 pIn->numSlices,
4003 pOut->blockWidth,
4004 pOut->blockHeight,
4005 pOut->blockSlices,
4006 pIn->numMipLevels,
4007 pOut->pMipInfo);
4008
4009 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4010
4011 if (endingMipId == 0)
4012 {
4013 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4014 pIn->swizzleMode,
4015 pOut->blockWidth,
4016 pOut->blockHeight,
4017 pOut->blockSlices);
4018
4019 pOut->epitchIsHeight = TRUE;
4020 pOut->pitch = tailMaxDim.w;
4021 pOut->height = tailMaxDim.h;
4022 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4023 tailMaxDim.d : pIn->numSlices;
4024 pOut->mipChainInTail = TRUE;
4025 }
4026 else
4027 {
4028 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
4029 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4030
4031 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4032 pIn->swizzleMode,
4033 mip0WidthInBlk,
4034 mip0HeightInBlk,
4035 pOut->numSlices / pOut->blockSlices);
4036 if (majorMode == ADDR_MAJOR_Y)
4037 {
4038 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4039
4040 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4041 {
4042 mip1WidthInBlk++;
4043 }
4044
4045 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4046
4047 pOut->epitchIsHeight = FALSE;
4048 }
4049 else
4050 {
4051 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4052
4053 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4054 {
4055 mip1HeightInBlk++;
4056 }
4057
4058 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4059
4060 pOut->epitchIsHeight = TRUE;
4061 }
4062 }
4063
4064 if (pOut->pMipInfo != NULL)
4065 {
4066 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4067
4068 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4069 {
4070 Dim3d mipStartPos = {0};
4071 UINT_32 mipTailOffsetInBytes = 0;
4072
4073 mipStartPos = GetMipStartPos(pIn->resourceType,
4074 pIn->swizzleMode,
4075 pOut->pitch,
4076 pOut->height,
4077 pOut->numSlices,
4078 pOut->blockWidth,
4079 pOut->blockHeight,
4080 pOut->blockSlices,
4081 i,
4082 elementBytesLog2,
4083 &mipTailOffsetInBytes);
4084
4085 UINT_32 pitchInBlock =
4086 pOut->mipChainPitch / pOut->blockWidth;
4087 UINT_32 sliceInBlock =
4088 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4089 UINT_64 blockIndex =
4090 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4091 UINT_64 macroBlockOffset =
4092 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4093
4094 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4095 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
4096 }
4097 }
4098 }
4099 else if (pOut->pMipInfo != NULL)
4100 {
4101 pOut->pMipInfo[0].pitch = pOut->pitch;
4102 pOut->pMipInfo[0].height = pOut->height;
4103 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4104 pOut->pMipInfo[0].offset = 0;
4105 }
4106
4107 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4108 (pIn->bpp >> 3) * pIn->numFrags;
4109 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
4110 pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4111
4112 if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4113 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4114 (pIn->flags.texture == TRUE) &&
4115 (pIn->flags.noMetadata == FALSE) &&
4116 (pIn->flags.metaPipeUnaligned == FALSE))
4117 {
4118 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4119 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4120 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4121 // them, which may cause invalid metadata to be fetched.
4122 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4123 }
4124
4125 if (pIn->flags.prt)
4126 {
4127 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4128 }
4129 }
4130 }
4131
4132 return returnCode;
4133 }
4134
4135 /**
4136 ************************************************************************************************************************
4137 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4138 *
4139 * @brief
4140 * Internal function to calculate alignment for linear surface
4141 *
4142 * @return
4143 * ADDR_E_RETURNCODE
4144 ************************************************************************************************************************
4145 */
4146 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4147 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4148 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4149 ) const
4150 {
4151 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4152 UINT_32 pitch = 0;
4153 UINT_32 actualHeight = 0;
4154 UINT_32 elementBytes = pIn->bpp >> 3;
4155 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4156
4157 if (IsTex1d(pIn->resourceType))
4158 {
4159 if (pIn->height > 1)
4160 {
4161 returnCode = ADDR_INVALIDPARAMS;
4162 }
4163 else
4164 {
4165 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4166
4167 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4168 actualHeight = pIn->numMipLevels;
4169
4170 if (pIn->flags.prt == FALSE)
4171 {
4172 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4173 &pitch, &actualHeight);
4174 }
4175
4176 if (returnCode == ADDR_OK)
4177 {
4178 if (pOut->pMipInfo != NULL)
4179 {
4180 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4181 {
4182 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4183 pOut->pMipInfo[i].pitch = pitch;
4184 pOut->pMipInfo[i].height = 1;
4185 pOut->pMipInfo[i].depth = 1;
4186 }
4187 }
4188 }
4189 }
4190 }
4191 else
4192 {
4193 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4194 }
4195
4196 if ((pitch == 0) || (actualHeight == 0))
4197 {
4198 returnCode = ADDR_INVALIDPARAMS;
4199 }
4200
4201 if (returnCode == ADDR_OK)
4202 {
4203 pOut->pitch = pitch;
4204 pOut->height = pIn->height;
4205 pOut->numSlices = pIn->numSlices;
4206 pOut->mipChainPitch = pitch;
4207 pOut->mipChainHeight = actualHeight;
4208 pOut->mipChainSlice = pOut->numSlices;
4209 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4210 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4211 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4212 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4213 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4214 pOut->blockHeight = 1;
4215 pOut->blockSlices = 1;
4216 }
4217
4218 // Post calculation validate
4219 ADDR_ASSERT(pOut->sliceSize > 0);
4220
4221 return returnCode;
4222 }
4223
4224 /**
4225 ************************************************************************************************************************
4226 * Gfx9Lib::GetMipChainInfo
4227 *
4228 * @brief
4229 * Internal function to get out information about mip chain
4230 *
4231 * @return
4232 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4233 ************************************************************************************************************************
4234 */
4235 UINT_32 Gfx9Lib::GetMipChainInfo(
4236 AddrResourceType resourceType,
4237 AddrSwizzleMode swizzleMode,
4238 UINT_32 bpp,
4239 UINT_32 mip0Width,
4240 UINT_32 mip0Height,
4241 UINT_32 mip0Depth,
4242 UINT_32 blockWidth,
4243 UINT_32 blockHeight,
4244 UINT_32 blockDepth,
4245 UINT_32 numMipLevel,
4246 ADDR2_MIP_INFO* pMipInfo) const
4247 {
4248 const Dim3d tailMaxDim =
4249 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4250
4251 UINT_32 mipPitch = mip0Width;
4252 UINT_32 mipHeight = mip0Height;
4253 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4254 UINT_32 offset = 0;
4255 UINT_32 firstMipIdInTail = numMipLevel;
4256 BOOL_32 inTail = FALSE;
4257 BOOL_32 finalDim = FALSE;
4258 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4259 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4260
4261 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4262 {
4263 if (inTail)
4264 {
4265 if (finalDim == FALSE)
4266 {
4267 UINT_32 mipSize;
4268
4269 if (is3dThick)
4270 {
4271 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4272 }
4273 else
4274 {
4275 mipSize = mipPitch * mipHeight * (bpp >> 3);
4276 }
4277
4278 if (mipSize <= 256)
4279 {
4280 UINT_32 index = Log2(bpp >> 3);
4281
4282 if (is3dThick)
4283 {
4284 mipPitch = Block256_3dZ[index].w;
4285 mipHeight = Block256_3dZ[index].h;
4286 mipDepth = Block256_3dZ[index].d;
4287 }
4288 else
4289 {
4290 mipPitch = Block256_2d[index].w;
4291 mipHeight = Block256_2d[index].h;
4292 }
4293
4294 finalDim = TRUE;
4295 }
4296 }
4297 }
4298 else
4299 {
4300 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4301 mipPitch, mipHeight, mipDepth);
4302
4303 if (inTail)
4304 {
4305 firstMipIdInTail = mipId;
4306 mipPitch = tailMaxDim.w;
4307 mipHeight = tailMaxDim.h;
4308
4309 if (is3dThick)
4310 {
4311 mipDepth = tailMaxDim.d;
4312 }
4313 }
4314 else
4315 {
4316 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4317 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4318
4319 if (is3dThick)
4320 {
4321 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4322 }
4323 }
4324 }
4325
4326 if (pMipInfo != NULL)
4327 {
4328 pMipInfo[mipId].pitch = mipPitch;
4329 pMipInfo[mipId].height = mipHeight;
4330 pMipInfo[mipId].depth = mipDepth;
4331 pMipInfo[mipId].offset = offset;
4332 }
4333
4334 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4335
4336 if (finalDim)
4337 {
4338 if (is3dThin)
4339 {
4340 mipDepth = Max(mipDepth >> 1, 1u);
4341 }
4342 }
4343 else
4344 {
4345 mipPitch = Max(mipPitch >> 1, 1u);
4346 mipHeight = Max(mipHeight >> 1, 1u);
4347
4348 if (is3dThick || is3dThin)
4349 {
4350 mipDepth = Max(mipDepth >> 1, 1u);
4351 }
4352 }
4353 }
4354
4355 return firstMipIdInTail;
4356 }
4357
4358 /**
4359 ************************************************************************************************************************
4360 * Gfx9Lib::GetMetaMiptailInfo
4361 *
4362 * @brief
4363 * Get mip tail coordinate information.
4364 *
4365 * @return
4366 * N/A
4367 ************************************************************************************************************************
4368 */
4369 VOID Gfx9Lib::GetMetaMiptailInfo(
4370 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4371 Dim3d mipCoord, ///< [in] mip tail base coord
4372 UINT_32 numMipInTail, ///< [in] number of mips in tail
4373 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4374 ) const
4375 {
4376 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4377 UINT_32 mipWidth = pMetaBlkDim->w;
4378 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4379 UINT_32 mipDepth = pMetaBlkDim->d;
4380 UINT_32 minInc;
4381
4382 if (isThick)
4383 {
4384 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4385 }
4386 else if (pMetaBlkDim->h >= 1024)
4387 {
4388 minInc = 256;
4389 }
4390 else if (pMetaBlkDim->h == 512)
4391 {
4392 minInc = 128;
4393 }
4394 else
4395 {
4396 minInc = 64;
4397 }
4398
4399 UINT_32 blk32MipId = 0xFFFFFFFF;
4400
4401 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4402 {
4403 pInfo[mip].inMiptail = TRUE;
4404 pInfo[mip].startX = mipCoord.w;
4405 pInfo[mip].startY = mipCoord.h;
4406 pInfo[mip].startZ = mipCoord.d;
4407 pInfo[mip].width = mipWidth;
4408 pInfo[mip].height = mipHeight;
4409 pInfo[mip].depth = mipDepth;
4410
4411 if (mipWidth <= 32)
4412 {
4413 if (blk32MipId == 0xFFFFFFFF)
4414 {
4415 blk32MipId = mip;
4416 }
4417
4418 mipCoord.w = pInfo[blk32MipId].startX;
4419 mipCoord.h = pInfo[blk32MipId].startY;
4420 mipCoord.d = pInfo[blk32MipId].startZ;
4421
4422 switch (mip - blk32MipId)
4423 {
4424 case 0:
4425 mipCoord.w += 32; // 16x16
4426 break;
4427 case 1:
4428 mipCoord.h += 32; // 8x8
4429 break;
4430 case 2:
4431 mipCoord.h += 32; // 4x4
4432 mipCoord.w += 16;
4433 break;
4434 case 3:
4435 mipCoord.h += 32; // 2x2
4436 mipCoord.w += 32;
4437 break;
4438 case 4:
4439 mipCoord.h += 32; // 1x1
4440 mipCoord.w += 48;
4441 break;
4442 // The following are for BC/ASTC formats
4443 case 5:
4444 mipCoord.h += 48; // 1/2 x 1/2
4445 break;
4446 case 6:
4447 mipCoord.h += 48; // 1/4 x 1/4
4448 mipCoord.w += 16;
4449 break;
4450 case 7:
4451 mipCoord.h += 48; // 1/8 x 1/8
4452 mipCoord.w += 32;
4453 break;
4454 case 8:
4455 mipCoord.h += 48; // 1/16 x 1/16
4456 mipCoord.w += 48;
4457 break;
4458 default:
4459 ADDR_ASSERT_ALWAYS();
4460 break;
4461 }
4462
4463 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4464 mipHeight = mipWidth;
4465
4466 if (isThick)
4467 {
4468 mipDepth = mipWidth;
4469 }
4470 }
4471 else
4472 {
4473 if (mipWidth <= minInc)
4474 {
4475 // if we're below the minimal increment...
4476 if (isThick)
4477 {
4478 // For 3d, just go in z direction
4479 mipCoord.d += mipDepth;
4480 }
4481 else
4482 {
4483 // For 2d, first go across, then down
4484 if ((mipWidth * 2) == minInc)
4485 {
4486 // if we're 2 mips below, that's when we go back in x, and down in y
4487 mipCoord.w -= minInc;
4488 mipCoord.h += minInc;
4489 }
4490 else
4491 {
4492 // otherwise, just go across in x
4493 mipCoord.w += minInc;
4494 }
4495 }
4496 }
4497 else
4498 {
4499 // On even mip, go down, otherwise, go across
4500 if (mip & 1)
4501 {
4502 mipCoord.w += mipWidth;
4503 }
4504 else
4505 {
4506 mipCoord.h += mipHeight;
4507 }
4508 }
4509 // Divide the width by 2
4510 mipWidth >>= 1;
4511 // After the first mip in tail, the mip is always a square
4512 mipHeight = mipWidth;
4513 // ...or for 3d, a cube
4514 if (isThick)
4515 {
4516 mipDepth = mipWidth;
4517 }
4518 }
4519 }
4520 }
4521
4522 /**
4523 ************************************************************************************************************************
4524 * Gfx9Lib::GetMipStartPos
4525 *
4526 * @brief
4527 * Internal function to get out information about mip logical start position
4528 *
4529 * @return
4530 * logical start position in macro block width/heith/depth of one mip level within one slice
4531 ************************************************************************************************************************
4532 */
4533 Dim3d Gfx9Lib::GetMipStartPos(
4534 AddrResourceType resourceType,
4535 AddrSwizzleMode swizzleMode,
4536 UINT_32 width,
4537 UINT_32 height,
4538 UINT_32 depth,
4539 UINT_32 blockWidth,
4540 UINT_32 blockHeight,
4541 UINT_32 blockDepth,
4542 UINT_32 mipId,
4543 UINT_32 log2ElementBytes,
4544 UINT_32* pMipTailBytesOffset) const
4545 {
4546 Dim3d mipStartPos = {0};
4547 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4548
4549 // Report mip in tail if Mip0 is already in mip tail
4550 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4551 UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
4552 UINT_32 mipIndexInTail = mipId;
4553
4554 if (inMipTail == FALSE)
4555 {
4556 // Mip 0 dimension, unit in block
4557 UINT_32 mipWidthInBlk = width / blockWidth;
4558 UINT_32 mipHeightInBlk = height / blockHeight;
4559 UINT_32 mipDepthInBlk = depth / blockDepth;
4560 AddrMajorMode majorMode = GetMajorMode(resourceType,
4561 swizzleMode,
4562 mipWidthInBlk,
4563 mipHeightInBlk,
4564 mipDepthInBlk);
4565
4566 UINT_32 endingMip = mipId + 1;
4567
4568 for (UINT_32 i = 1; i <= mipId; i++)
4569 {
4570 if ((i == 1) || (i == 3))
4571 {
4572 if (majorMode == ADDR_MAJOR_Y)
4573 {
4574 mipStartPos.w += mipWidthInBlk;
4575 }
4576 else
4577 {
4578 mipStartPos.h += mipHeightInBlk;
4579 }
4580 }
4581 else
4582 {
4583 if (majorMode == ADDR_MAJOR_X)
4584 {
4585 mipStartPos.w += mipWidthInBlk;
4586 }
4587 else if (majorMode == ADDR_MAJOR_Y)
4588 {
4589 mipStartPos.h += mipHeightInBlk;
4590 }
4591 else
4592 {
4593 mipStartPos.d += mipDepthInBlk;
4594 }
4595 }
4596
4597 BOOL_32 inTail = FALSE;
4598
4599 if (IsThick(resourceType, swizzleMode))
4600 {
4601 UINT_32 dim = log2blkSize % 3;
4602
4603 if (dim == 0)
4604 {
4605 inTail =
4606 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4607 }
4608 else if (dim == 1)
4609 {
4610 inTail =
4611 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4612 }
4613 else
4614 {
4615 inTail =
4616 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4617 }
4618 }
4619 else
4620 {
4621 if (log2blkSize & 1)
4622 {
4623 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4624 }
4625 else
4626 {
4627 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4628 }
4629 }
4630
4631 if (inTail)
4632 {
4633 endingMip = i;
4634 break;
4635 }
4636
4637 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4638 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4639 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4640 }
4641
4642 if (mipId >= endingMip)
4643 {
4644 inMipTail = TRUE;
4645 mipIndexInTail = mipId - endingMip;
4646 }
4647 }
4648
4649 if (inMipTail)
4650 {
4651 UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
4652 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4653 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4654 }
4655
4656 return mipStartPos;
4657 }
4658
4659 /**
4660 ************************************************************************************************************************
4661 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4662 *
4663 * @brief
4664 * Internal function to calculate address from coord for tiled swizzle surface
4665 *
4666 * @return
4667 * ADDR_E_RETURNCODE
4668 ************************************************************************************************************************
4669 */
4670 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4671 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4672 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4673 ) const
4674 {
4675 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4676 localIn.swizzleMode = pIn->swizzleMode;
4677 localIn.flags = pIn->flags;
4678 localIn.resourceType = pIn->resourceType;
4679 localIn.bpp = pIn->bpp;
4680 localIn.width = Max(pIn->unalignedWidth, 1u);
4681 localIn.height = Max(pIn->unalignedHeight, 1u);
4682 localIn.numSlices = Max(pIn->numSlices, 1u);
4683 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4684 localIn.numSamples = Max(pIn->numSamples, 1u);
4685 localIn.numFrags = Max(pIn->numFrags, 1u);
4686 if (localIn.numMipLevels <= 1)
4687 {
4688 localIn.pitchInElement = pIn->pitchInElement;
4689 }
4690
4691 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4692 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4693
4694 BOOL_32 valid = (returnCode == ADDR_OK) &&
4695 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4696 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4697 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4698
4699 if (valid)
4700 {
4701 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4702 Dim3d mipStartPos = {0};
4703 UINT_32 mipTailBytesOffset = 0;
4704
4705 if (pIn->numMipLevels > 1)
4706 {
4707 // Mip-map chain cannot be MSAA surface
4708 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4709
4710 mipStartPos = GetMipStartPos(pIn->resourceType,
4711 pIn->swizzleMode,
4712 localOut.pitch,
4713 localOut.height,
4714 localOut.numSlices,
4715 localOut.blockWidth,
4716 localOut.blockHeight,
4717 localOut.blockSlices,
4718 pIn->mipId,
4719 log2ElementBytes,
4720 &mipTailBytesOffset);
4721 }
4722
4723 UINT_32 interleaveOffset = 0;
4724 UINT_32 pipeBits = 0;
4725 UINT_32 pipeXor = 0;
4726 UINT_32 bankBits = 0;
4727 UINT_32 bankXor = 0;
4728
4729 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4730 {
4731 UINT_32 blockOffset = 0;
4732 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4733
4734 if (IsZOrderSwizzle(pIn->swizzleMode))
4735 {
4736 // Morton generation
4737 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4738 {
4739 UINT_32 totalLowBits = 6 - log2ElementBytes;
4740 UINT_32 mortBits = totalLowBits / 2;
4741 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4742 // Are 9 bits enough?
4743 UINT_32 highBitsValue =
4744 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4745 blockOffset = lowBitsValue | highBitsValue;
4746 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4747 }
4748 else
4749 {
4750 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4751 }
4752
4753 // Fill LSBs with sample bits
4754 if (pIn->numSamples > 1)
4755 {
4756 blockOffset *= pIn->numSamples;
4757 blockOffset |= pIn->sample;
4758 }
4759
4760 // Shift according to BytesPP
4761 blockOffset <<= log2ElementBytes;
4762 }
4763 else
4764 {
4765 // Micro block offset
4766 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4767 blockOffset = microBlockOffset;
4768
4769 // Micro block dimension
4770 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4771 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4772 // Morton generation, does 12 bit enough?
4773 blockOffset |=
4774 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4775
4776 // Sample bits start location
4777 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
4778 // Join sample bits information to the highest Macro block bits
4779 if (IsNonPrtXor(pIn->swizzleMode))
4780 {
4781 // Non-prt-Xor : xor highest Macro block bits with sample bits
4782 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4783 }
4784 else
4785 {
4786 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4787 // after this op, the blockOffset only contains log2 Macro block size bits
4788 blockOffset %= (1 << sampleStart);
4789 blockOffset |= (pIn->sample << sampleStart);
4790 ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
4791 }
4792 }
4793
4794 if (IsXor(pIn->swizzleMode))
4795 {
4796 // Mask off bits above Macro block bits to keep page synonyms working for prt
4797 if (IsPrt(pIn->swizzleMode))
4798 {
4799 blockOffset &= ((1 << log2blkSize) - 1);
4800 }
4801
4802 // Preserve offset inside pipe interleave
4803 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4804 blockOffset >>= m_pipeInterleaveLog2;
4805
4806 // Pipe/Se xor bits
4807 pipeBits = GetPipeXorBits(log2blkSize);
4808 // Pipe xor
4809 pipeXor = FoldXor2d(blockOffset, pipeBits);
4810 blockOffset >>= pipeBits;
4811
4812 // Bank xor bits
4813 bankBits = GetBankXorBits(log2blkSize);
4814 // Bank Xor
4815 bankXor = FoldXor2d(blockOffset, bankBits);
4816 blockOffset >>= bankBits;
4817
4818 // Put all the part back together
4819 blockOffset <<= bankBits;
4820 blockOffset |= bankXor;
4821 blockOffset <<= pipeBits;
4822 blockOffset |= pipeXor;
4823 blockOffset <<= m_pipeInterleaveLog2;
4824 blockOffset |= interleaveOffset;
4825 }
4826
4827 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4828 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4829
4830 blockOffset |= mipTailBytesOffset;
4831
4832 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4833 {
4834 // Apply slice xor if not MSAA/PRT
4835 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4836 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4837 (m_pipeInterleaveLog2 + pipeBits));
4838 }
4839
4840 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4841 bankBits, pipeBits, &blockOffset);
4842
4843 blockOffset %= (1 << log2blkSize);
4844
4845 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4846 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4847 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4848 UINT_64 macroBlockIndex =
4849 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4850 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4851 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4852
4853 pOut->addr = blockOffset | (macroBlockIndex << log2blkSize);
4854 }
4855 else
4856 {
4857 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4858
4859 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4860
4861 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4862 (pIn->y / microBlockDim.h),
4863 (pIn->slice / microBlockDim.d),
4864 8);
4865
4866 blockOffset <<= 10;
4867 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4868
4869 if (IsXor(pIn->swizzleMode))
4870 {
4871 // Mask off bits above Macro block bits to keep page synonyms working for prt
4872 if (IsPrt(pIn->swizzleMode))
4873 {
4874 blockOffset &= ((1 << log2blkSize) - 1);
4875 }
4876
4877 // Preserve offset inside pipe interleave
4878 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4879 blockOffset >>= m_pipeInterleaveLog2;
4880
4881 // Pipe/Se xor bits
4882 pipeBits = GetPipeXorBits(log2blkSize);
4883 // Pipe xor
4884 pipeXor = FoldXor3d(blockOffset, pipeBits);
4885 blockOffset >>= pipeBits;
4886
4887 // Bank xor bits
4888 bankBits = GetBankXorBits(log2blkSize);
4889 // Bank Xor
4890 bankXor = FoldXor3d(blockOffset, bankBits);
4891 blockOffset >>= bankBits;
4892
4893 // Put all the part back together
4894 blockOffset <<= bankBits;
4895 blockOffset |= bankXor;
4896 blockOffset <<= pipeBits;
4897 blockOffset |= pipeXor;
4898 blockOffset <<= m_pipeInterleaveLog2;
4899 blockOffset |= interleaveOffset;
4900 }
4901
4902 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4903 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4904 blockOffset |= mipTailBytesOffset;
4905
4906 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4907 bankBits, pipeBits, &blockOffset);
4908
4909 blockOffset %= (1 << log2blkSize);
4910
4911 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
4912 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4913 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4914
4915 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4916 UINT_32 sliceSizeInBlock =
4917 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4918 UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4919
4920 pOut->addr = blockOffset | (blockIndex << log2blkSize);
4921 }
4922 }
4923 else
4924 {
4925 returnCode = ADDR_INVALIDPARAMS;
4926 }
4927
4928 return returnCode;
4929 }
4930
4931 /**
4932 ************************************************************************************************************************
4933 * Gfx9Lib::ComputeSurfaceInfoLinear
4934 *
4935 * @brief
4936 * Internal function to calculate padding for linear swizzle 2D/3D surface
4937 *
4938 * @return
4939 * N/A
4940 ************************************************************************************************************************
4941 */
4942 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4943 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
4944 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
4945 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
4946 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
4947 ) const
4948 {
4949 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4950
4951 UINT_32 elementBytes = pIn->bpp >> 3;
4952 UINT_32 pitchAlignInElement = 0;
4953
4954 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4955 {
4956 ADDR_ASSERT(pIn->numMipLevels <= 1);
4957 ADDR_ASSERT(pIn->numSlices <= 1);
4958 pitchAlignInElement = 1;
4959 }
4960 else
4961 {
4962 pitchAlignInElement = (256 / elementBytes);
4963 }
4964
4965 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
4966 UINT_32 slice0PaddedHeight = pIn->height;
4967
4968 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4969 &mipChainWidth, &slice0PaddedHeight);
4970
4971 if (returnCode == ADDR_OK)
4972 {
4973 UINT_32 mipChainHeight = 0;
4974 UINT_32 mipHeight = pIn->height;
4975 UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4976
4977 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4978 {
4979 if (pMipInfo != NULL)
4980 {
4981 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4982 pMipInfo[i].pitch = mipChainWidth;
4983 pMipInfo[i].height = mipHeight;
4984 pMipInfo[i].depth = mipDepth;
4985 }
4986
4987 mipChainHeight += mipHeight;
4988 mipHeight = RoundHalf(mipHeight);
4989 mipHeight = Max(mipHeight, 1u);
4990 }
4991
4992 *pMipmap0PaddedWidth = mipChainWidth;
4993 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
4994 }
4995
4996 return returnCode;
4997 }
4998
4999 } // V2
5000 } // Addr