amd/addrlib: fix another C++ one definition rule violation
[mesa.git] / src / amd / addrlib / src / gfx10 / gfx10addrlib.cpp
1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx10addrlib.cpp
30 * @brief Contain the implementation for the Gfx10Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx10addrlib.h"
35 #include "gfx10_gb_reg.h"
36
37 #include "amdgpu_asic_addr.h"
38
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
41
42 namespace Addr
43 {
44 /**
45 ************************************************************************************************************************
46 * Gfx10HwlInit
47 *
48 * @brief
49 * Creates an Gfx10Lib object.
50 *
51 * @return
52 * Returns an Gfx10Lib object pointer.
53 ************************************************************************************************************************
54 */
55 Addr::Lib* Gfx10HwlInit(const Client* pClient)
56 {
57 return V2::Gfx10Lib::CreateObj(pClient);
58 }
59
60 namespace V2
61 {
62
63 ////////////////////////////////////////////////////////////////////////////////////////////////////
64 // Static Const Member
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66
67 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
68 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
69 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
70 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
71 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
72 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
73
74 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
75 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
76 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
77 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
78
79 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
80 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
81 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
82 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
83
84 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
85 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
86 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
87 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
88
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
91 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
92 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
93
94 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
95 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_X
96 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_X
97 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
98
99 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
100 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
101 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
102 {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_64KB_R_X
103
104 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_Z_X
105 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
106 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
107 {0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_VAR_R_X
108 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
109 };
110
111 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
112
113 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114 const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
115
116 /**
117 ************************************************************************************************************************
118 * Gfx10Lib::Gfx10Lib
119 *
120 * @brief
121 * Constructor
122 *
123 ************************************************************************************************************************
124 */
125 Gfx10Lib::Gfx10Lib(const Client* pClient)
126 :
127 Lib(pClient),
128 m_colorBaseIndex(0),
129 m_xmaskBaseIndex(0),
130 m_dccBaseIndex(0)
131 {
132 m_class = AI_ADDRLIB;
133 memset(&m_settings, 0, sizeof(m_settings));
134 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
135 }
136
137 /**
138 ************************************************************************************************************************
139 * Gfx10Lib::~Gfx10Lib
140 *
141 * @brief
142 * Destructor
143 ************************************************************************************************************************
144 */
145 Gfx10Lib::~Gfx10Lib()
146 {
147 }
148
149 /**
150 ************************************************************************************************************************
151 * Gfx10Lib::HwlComputeHtileInfo
152 *
153 * @brief
154 * Interface function stub of AddrComputeHtilenfo
155 *
156 * @return
157 * ADDR_E_RETURNCODE
158 ************************************************************************************************************************
159 */
160 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
161 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
162 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
163 ) const
164 {
165 ADDR_E_RETURNCODE ret = ADDR_OK;
166
167 if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
168 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
169 (pIn->hTileFlags.pipeAligned != TRUE))
170 {
171 ret = ADDR_INVALIDPARAMS;
172 }
173 else
174 {
175 Dim3d metaBlk = {0};
176 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
177 ADDR_RSRC_TEX_2D,
178 pIn->swizzleMode,
179 0,
180 0,
181 TRUE,
182 &metaBlk);
183
184 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
185 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
186 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
187 pOut->metaBlkWidth = metaBlk.w;
188 pOut->metaBlkHeight = metaBlk.h;
189
190 if (pIn->numMipLevels > 1)
191 {
192 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
193
194 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
195
196 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
197 {
198 UINT_32 mipWidth, mipHeight;
199
200 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
201
202 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
203 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
204
205 const UINT_32 pitchInM = mipWidth / metaBlk.w;
206 const UINT_32 heightInM = mipHeight / metaBlk.h;
207 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
208
209 if (pOut->pMipInfo != NULL)
210 {
211 pOut->pMipInfo[i].inMiptail = FALSE;
212 pOut->pMipInfo[i].offset = offset;
213 pOut->pMipInfo[i].sliceSize = mipSliceSize;
214 }
215
216 offset += mipSliceSize;
217 }
218
219 pOut->sliceSize = offset;
220 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
221 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
222
223 if (pOut->pMipInfo != NULL)
224 {
225 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
226 {
227 pOut->pMipInfo[i].inMiptail = TRUE;
228 pOut->pMipInfo[i].offset = 0;
229 pOut->pMipInfo[i].sliceSize = 0;
230 }
231
232 if (pIn->firstMipIdInTail != pIn->numMipLevels)
233 {
234 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
235 }
236 }
237 }
238 else
239 {
240 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
241 const UINT_32 heightInM = pOut->height / metaBlk.h;
242
243 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
244 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
245 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
246
247 if (pOut->pMipInfo != NULL)
248 {
249 pOut->pMipInfo[0].inMiptail = FALSE;
250 pOut->pMipInfo[0].offset = 0;
251 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
252 }
253 }
254 }
255
256 return ret;
257 }
258
259 /**
260 ************************************************************************************************************************
261 * Gfx10Lib::HwlComputeCmaskInfo
262 *
263 * @brief
264 * Interface function stub of AddrComputeCmaskInfo
265 *
266 * @return
267 * ADDR_E_RETURNCODE
268 ************************************************************************************************************************
269 */
270 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
271 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
272 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
273 ) const
274 {
275 ADDR_E_RETURNCODE ret = ADDR_OK;
276
277 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
278 (pIn->cMaskFlags.pipeAligned != TRUE) ||
279 ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
280 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
281 {
282 ret = ADDR_INVALIDPARAMS;
283 }
284 else
285 {
286 Dim3d metaBlk = {0};
287 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
288 ADDR_RSRC_TEX_2D,
289 pIn->swizzleMode,
290 0,
291 0,
292 TRUE,
293 &metaBlk);
294
295 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
296 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
297 pOut->baseAlign = metaBlkSize;
298 pOut->metaBlkWidth = metaBlk.w;
299 pOut->metaBlkHeight = metaBlk.h;
300
301 if (pIn->numMipLevels > 1)
302 {
303 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
304
305 UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
306
307 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
308 {
309 UINT_32 mipWidth, mipHeight;
310
311 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
312
313 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
314 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
315
316 const UINT_32 pitchInM = mipWidth / metaBlk.w;
317 const UINT_32 heightInM = mipHeight / metaBlk.h;
318
319 if (pOut->pMipInfo != NULL)
320 {
321 pOut->pMipInfo[i].inMiptail = FALSE;
322 pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize;
323 pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
324 }
325
326 metaBlkPerSlice += pitchInM * heightInM;
327 }
328
329 pOut->metaBlkNumPerSlice = metaBlkPerSlice;
330
331 if (pOut->pMipInfo != NULL)
332 {
333 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
334 {
335 pOut->pMipInfo[i].inMiptail = TRUE;
336 pOut->pMipInfo[i].offset = 0;
337 pOut->pMipInfo[i].sliceSize = 0;
338 }
339
340 if (pIn->firstMipIdInTail != pIn->numMipLevels)
341 {
342 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
343 }
344 }
345 }
346 else
347 {
348 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
349 const UINT_32 heightInM = pOut->height / metaBlk.h;
350
351 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
352
353 if (pOut->pMipInfo != NULL)
354 {
355 pOut->pMipInfo[0].inMiptail = FALSE;
356 pOut->pMipInfo[0].offset = 0;
357 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
358 }
359 }
360
361 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
362 pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
363 }
364
365 return ret;
366 }
367
368 /**
369 ************************************************************************************************************************
370 * Gfx10Lib::HwlComputeDccInfo
371 *
372 * @brief
373 * Interface function to compute DCC key info
374 *
375 * @return
376 * ADDR_E_RETURNCODE
377 ************************************************************************************************************************
378 */
379 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
380 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
381 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
382 ) const
383 {
384 ADDR_E_RETURNCODE ret = ADDR_OK;
385
386 if (pIn->swizzleMode != ADDR_SW_64KB_Z_X && pIn->swizzleMode != ADDR_SW_64KB_R_X)
387 {
388 // Hardware does not support DCC for this swizzle mode.
389 ret = ADDR_INVALIDPARAMS;
390 }
391 else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
392 {
393 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
394 ret = ADDR_INVALIDPARAMS;
395 }
396 else
397 {
398 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
399 ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
400
401 Dim3d metaBlk = {0};
402 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
403 const UINT_32 numFragLog2 = Log2(pIn->numFrags);
404 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
405 pIn->resourceType,
406 pIn->swizzleMode,
407 elemLog2,
408 numFragLog2,
409 pIn->dccKeyFlags.pipeAligned,
410 &metaBlk);
411 const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
412
413 pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
414 pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
415 pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;
416
417 pOut->dccRamBaseAlign = metaBlkSize;
418 pOut->metaBlkWidth = metaBlk.w;
419 pOut->metaBlkHeight = metaBlk.h;
420 pOut->metaBlkDepth = metaBlk.d;
421
422 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
423 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
424 pOut->depth = PowTwoAlign(pIn->numSlices, metaBlk.d);
425
426 if (pIn->numMipLevels > 1)
427 {
428 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
429
430 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
431
432 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
433 {
434 UINT_32 mipWidth, mipHeight;
435
436 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
437
438 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
439 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
440
441 const UINT_32 pitchInM = mipWidth / metaBlk.w;
442 const UINT_32 heightInM = mipHeight / metaBlk.h;
443 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
444
445 if (pOut->pMipInfo != NULL)
446 {
447 pOut->pMipInfo[i].inMiptail = FALSE;
448 pOut->pMipInfo[i].offset = offset;
449 pOut->pMipInfo[i].sliceSize = mipSliceSize;
450 }
451
452 offset += mipSliceSize;
453 }
454
455 pOut->dccRamSliceSize = offset;
456 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
457 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
458
459 if (pOut->pMipInfo != NULL)
460 {
461 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
462 {
463 pOut->pMipInfo[i].inMiptail = TRUE;
464 pOut->pMipInfo[i].offset = 0;
465 pOut->pMipInfo[i].sliceSize = 0;
466 }
467
468 if (pIn->firstMipIdInTail != pIn->numMipLevels)
469 {
470 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
471 }
472 }
473 }
474 else
475 {
476 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
477 const UINT_32 heightInM = pOut->height / metaBlk.h;
478
479 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
480 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
481 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
482
483 if (pOut->pMipInfo != NULL)
484 {
485 pOut->pMipInfo[0].inMiptail = FALSE;
486 pOut->pMipInfo[0].offset = 0;
487 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
488 }
489 }
490 }
491
492 return ret;
493 }
494
495 /**
496 ************************************************************************************************************************
497 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
498 *
499 * @brief
500 * Interface function stub of AddrComputeCmaskAddrFromCoord
501 *
502 * @return
503 * ADDR_E_RETURNCODE
504 ************************************************************************************************************************
505 */
506 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
507 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
508 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
509 {
510 // Only support pipe aligned CMask
511 ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
512
513 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
514 input.size = sizeof(input);
515 input.cMaskFlags = pIn->cMaskFlags;
516 input.colorFlags = pIn->colorFlags;
517 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
518 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
519 input.numSlices = Max(pIn->numSlices, 1u);
520 input.swizzleMode = pIn->swizzleMode;
521 input.resourceType = pIn->resourceType;
522
523 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
524 output.size = sizeof(output);
525
526 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
527
528 if (returnCode == ADDR_OK)
529 {
530 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
531 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
532 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
533 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
534 const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? CMASK_VAR_RBPLUS_PATIDX :
535 (m_settings.supportRbPlus ? CMASK_64K_RBPLUS_PATIDX : CMASK_64K_PATIDX);
536
537 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
538 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
539 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(CMASK_SW_PATTERN[patIdxTable[index]],
540 blkSizeLog2 + 1, // +1 for nibble offset
541 pIn->x,
542 pIn->y,
543 pIn->slice,
544 0);
545 const UINT_32 xb = pIn->x / output.metaBlkWidth;
546 const UINT_32 yb = pIn->y / output.metaBlkHeight;
547 const UINT_32 pb = output.pitch / output.metaBlkWidth;
548 const UINT_32 blkIndex = (yb * pb) + xb;
549 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
550
551 pOut->addr = (output.sliceSize * pIn->slice) +
552 (blkIndex * (1 << blkSizeLog2)) +
553 ((blkOffset >> 1) ^ pipeXor);
554 pOut->bitPosition = (blkOffset & 1) << 2;
555 }
556
557 return returnCode;
558 }
559
560 /**
561 ************************************************************************************************************************
562 * Gfx10Lib::HwlComputeHtileAddrFromCoord
563 *
564 * @brief
565 * Interface function stub of AddrComputeHtileAddrFromCoord
566 *
567 * @return
568 * ADDR_E_RETURNCODE
569 ************************************************************************************************************************
570 */
571 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
572 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
573 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
574 {
575 ADDR_E_RETURNCODE returnCode = ADDR_OK;
576
577 if (pIn->numMipLevels > 1)
578 {
579 returnCode = ADDR_NOTIMPLEMENTED;
580 }
581 else
582 {
583 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
584 input.size = sizeof(input);
585 input.hTileFlags = pIn->hTileFlags;
586 input.depthFlags = pIn->depthflags;
587 input.swizzleMode = pIn->swizzleMode;
588 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
589 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
590 input.numSlices = Max(pIn->numSlices, 1u);
591 input.numMipLevels = 1;
592
593 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
594 output.size = sizeof(output);
595
596 returnCode = ComputeHtileInfo(&input, &output);
597
598 if (returnCode == ADDR_OK)
599 {
600 const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
601 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
602 const UINT_32 index = m_xmaskBaseIndex + numSampleLog2;
603 const UINT_8* patIdxTable = m_settings.supportRbPlus ? HTILE_RBPLUS_PATIDX : HTILE_PATIDX;
604
605 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
606 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
607 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(HTILE_SW_PATTERN[patIdxTable[index]],
608 blkSizeLog2 + 1, // +1 for nibble offset
609 pIn->x,
610 pIn->y,
611 pIn->slice,
612 0);
613 const UINT_32 xb = pIn->x / output.metaBlkWidth;
614 const UINT_32 yb = pIn->y / output.metaBlkHeight;
615 const UINT_32 pb = output.pitch / output.metaBlkWidth;
616 const UINT_32 blkIndex = (yb * pb) + xb;
617 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
618
619 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
620 (blkIndex * (1 << blkSizeLog2)) +
621 ((blkOffset >> 1) ^ pipeXor);
622 }
623 }
624
625 return returnCode;
626 }
627
628 /**
629 ************************************************************************************************************************
630 * Gfx10Lib::HwlComputeHtileCoordFromAddr
631 *
632 * @brief
633 * Interface function stub of AddrComputeHtileCoordFromAddr
634 *
635 * @return
636 * ADDR_E_RETURNCODE
637 ************************************************************************************************************************
638 */
639 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
640 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
641 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
642 {
643 ADDR_NOT_IMPLEMENTED();
644
645 return ADDR_OK;
646 }
647
648 /**
649 ************************************************************************************************************************
650 * Gfx10Lib::HwlComputeDccAddrFromCoord
651 *
652 * @brief
653 * Interface function stub of AddrComputeDccAddrFromCoord
654 *
655 * @return
656 * ADDR_E_RETURNCODE
657 ************************************************************************************************************************
658 */
659 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord(
660 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
661 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
662 {
663 ADDR_E_RETURNCODE returnCode = ADDR_OK;
664
665 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
666 (pIn->swizzleMode != ADDR_SW_64KB_R_X) ||
667 (pIn->dccKeyFlags.linear == TRUE) ||
668 (pIn->numFrags > 1) ||
669 (pIn->numMipLevels > 1) ||
670 (pIn->mipId > 0))
671 {
672 returnCode = ADDR_NOTSUPPORTED;
673 }
674 else
675 {
676 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
677 const UINT_32 numPipeLog2 = m_pipesLog2;
678 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
679 UINT_32 index = m_dccBaseIndex + elemLog2;
680 const UINT_8* patIdxTable;
681
682 if (m_settings.supportRbPlus)
683 {
684 patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
685
686 if (pIn->dccKeyFlags.pipeAligned)
687 {
688 index += MaxNumOfBpp;
689
690 if (m_numPkrLog2 < 2)
691 {
692 index += m_pipesLog2 * MaxNumOfBpp;
693 }
694 else
695 {
696 // 4 groups for "m_numPkrLog2 < 2" case
697 index += 4 * MaxNumOfBpp;
698
699 const UINT_32 dccPipePerPkr = 3;
700
701 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
702 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
703 }
704 }
705 }
706 else
707 {
708 patIdxTable = DCC_64K_R_X_PATIDX;
709
710 if (pIn->dccKeyFlags.pipeAligned)
711 {
712 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
713 }
714 else
715 {
716 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
717 }
718 }
719
720 const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
721 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
722 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
723 blkSizeLog2 + 1, // +1 for nibble offset
724 pIn->x,
725 pIn->y,
726 pIn->slice,
727 0);
728 const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
729 const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
730 const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
731 const UINT_32 blkIndex = (yb * pb) + xb;
732 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
733
734 pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
735 (blkIndex * (1 << blkSizeLog2)) +
736 ((blkOffset >> 1) ^ pipeXor);
737 }
738
739 return returnCode;
740 }
741
742 /**
743 ************************************************************************************************************************
744 * Gfx10Lib::HwlInitGlobalParams
745 *
746 * @brief
747 * Initializes global parameters
748 *
749 * @return
750 * TRUE if all settings are valid
751 *
752 ************************************************************************************************************************
753 */
754 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
755 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
756 {
757 BOOL_32 valid = TRUE;
758 GB_ADDR_CONFIG_gfx10 gbAddrConfig;
759
760 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
761
762 // These values are copied from CModel code
763 switch (gbAddrConfig.bits.NUM_PIPES)
764 {
765 case ADDR_CONFIG_1_PIPE:
766 m_pipes = 1;
767 m_pipesLog2 = 0;
768 break;
769 case ADDR_CONFIG_2_PIPE:
770 m_pipes = 2;
771 m_pipesLog2 = 1;
772 break;
773 case ADDR_CONFIG_4_PIPE:
774 m_pipes = 4;
775 m_pipesLog2 = 2;
776 break;
777 case ADDR_CONFIG_8_PIPE:
778 m_pipes = 8;
779 m_pipesLog2 = 3;
780 break;
781 case ADDR_CONFIG_16_PIPE:
782 m_pipes = 16;
783 m_pipesLog2 = 4;
784 break;
785 case ADDR_CONFIG_32_PIPE:
786 m_pipes = 32;
787 m_pipesLog2 = 5;
788 break;
789 case ADDR_CONFIG_64_PIPE:
790 m_pipes = 64;
791 m_pipesLog2 = 6;
792 break;
793 default:
794 ADDR_ASSERT_ALWAYS();
795 valid = FALSE;
796 break;
797 }
798
799 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
800 {
801 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
802 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
803 m_pipeInterleaveLog2 = 8;
804 break;
805 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
806 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
807 m_pipeInterleaveLog2 = 9;
808 break;
809 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
810 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
811 m_pipeInterleaveLog2 = 10;
812 break;
813 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
814 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
815 m_pipeInterleaveLog2 = 11;
816 break;
817 default:
818 ADDR_ASSERT_ALWAYS();
819 valid = FALSE;
820 break;
821 }
822
823 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
824 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
825 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
826 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
827
828 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
829 {
830 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
831 m_maxCompFrag = 1;
832 m_maxCompFragLog2 = 0;
833 break;
834 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
835 m_maxCompFrag = 2;
836 m_maxCompFragLog2 = 1;
837 break;
838 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
839 m_maxCompFrag = 4;
840 m_maxCompFragLog2 = 2;
841 break;
842 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
843 m_maxCompFrag = 8;
844 m_maxCompFragLog2 = 3;
845 break;
846 default:
847 ADDR_ASSERT_ALWAYS();
848 valid = FALSE;
849 break;
850 }
851
852 {
853 // Skip unaligned case
854 m_xmaskBaseIndex += MaxNumOfAA;
855
856 m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA;
857 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
858
859 if (m_settings.supportRbPlus)
860 {
861 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
862 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
863
864 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
865
866 ADDR_C_ASSERT(sizeof(HTILE_RBPLUS_PATIDX) / sizeof(HTILE_RBPLUS_PATIDX[0]) ==
867 sizeof(CMASK_64K_RBPLUS_PATIDX) / sizeof(CMASK_64K_RBPLUS_PATIDX[0]));
868
869 if (m_numPkrLog2 >= 2)
870 {
871 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
872 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
873 }
874 }
875 else
876 {
877 const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
878 static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) +
879 1;
880
881 ADDR_C_ASSERT(sizeof(HTILE_PATIDX) / sizeof(HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
882
883 ADDR_C_ASSERT(sizeof(HTILE_PATIDX) / sizeof(HTILE_PATIDX[0]) ==
884 sizeof(CMASK_64K_PATIDX) / sizeof(CMASK_64K_PATIDX[0]));
885 }
886 }
887
888 if (m_settings.supportRbPlus)
889 {
890 // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
891 // corresponding SW_64KB_* mode
892 m_blockVarSizeLog2 = m_pipesLog2 + 14;
893 }
894
895 if (valid)
896 {
897 InitEquationTable();
898 }
899
900 return valid;
901 }
902
903 /**
904 ************************************************************************************************************************
905 * Gfx10Lib::HwlConvertChipFamily
906 *
907 * @brief
908 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
909 * @return
910 * ChipFamily
911 ************************************************************************************************************************
912 */
913 ChipFamily Gfx10Lib::HwlConvertChipFamily(
914 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
915 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
916 {
917 ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
918
919 m_settings.dccUnsup3DSwDis = 1;
920
921 switch (chipFamily)
922 {
923 case FAMILY_NV:
924 m_settings.isDcn2 = 1;
925
926 if (ASICREV_IS_SIENNA_M(chipRevision))
927 {
928 m_settings.supportRbPlus = 1;
929 m_settings.dccUnsup3DSwDis = 0;
930 }
931 break;
932 default:
933 ADDR_ASSERT(!"Unknown chip family");
934 break;
935 }
936
937 m_settings.dsMipmapHtileFix = 1;
938
939 if (ASICREV_IS_NAVI10_P(chipRevision))
940 {
941 m_settings.dsMipmapHtileFix = 0;
942 }
943
944 m_configFlags.use32bppFor422Fmt = TRUE;
945
946 return family;
947 }
948
949 /**
950 ************************************************************************************************************************
951 * Gfx10Lib::GetBlk256SizeLog2
952 *
953 * @brief
954 * Get block 256 size
955 *
956 * @return
957 * N/A
958 ************************************************************************************************************************
959 */
960 void Gfx10Lib::GetBlk256SizeLog2(
961 AddrResourceType resourceType, ///< [in] Resource type
962 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
963 UINT_32 elemLog2, ///< [in] element size log2
964 UINT_32 numSamplesLog2, ///< [in] number of samples
965 Dim3d* pBlock ///< [out] block size
966 ) const
967 {
968 if (IsThin(resourceType, swizzleMode))
969 {
970 UINT_32 blockBits = 8 - elemLog2;
971
972 if (IsZOrderSwizzle(swizzleMode))
973 {
974 blockBits -= numSamplesLog2;
975 }
976
977 pBlock->w = (blockBits >> 1) + (blockBits & 1);
978 pBlock->h = (blockBits >> 1);
979 pBlock->d = 0;
980 }
981 else
982 {
983 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
984
985 UINT_32 blockBits = 8 - elemLog2;
986
987 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
988 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
989 pBlock->h = (blockBits / 3);
990 }
991 }
992
993 /**
994 ************************************************************************************************************************
995 * Gfx10Lib::GetCompressedBlockSizeLog2
996 *
997 * @brief
998 * Get compress block size
999 *
1000 * @return
1001 * N/A
1002 ************************************************************************************************************************
1003 */
1004 void Gfx10Lib::GetCompressedBlockSizeLog2(
1005 Gfx10DataType dataType, ///< [in] Data type
1006 AddrResourceType resourceType, ///< [in] Resource type
1007 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1008 UINT_32 elemLog2, ///< [in] element size log2
1009 UINT_32 numSamplesLog2, ///< [in] number of samples
1010 Dim3d* pBlock ///< [out] block size
1011 ) const
1012 {
1013 if (dataType == Gfx10DataColor)
1014 {
1015 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1016 }
1017 else
1018 {
1019 ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1020 pBlock->w = 3;
1021 pBlock->h = 3;
1022 pBlock->d = 0;
1023 }
1024 }
1025
1026 /**
1027 ************************************************************************************************************************
1028 * Gfx10Lib::GetMetaOverlapLog2
1029 *
1030 * @brief
1031 * Get meta block overlap
1032 *
1033 * @return
1034 * N/A
1035 ************************************************************************************************************************
1036 */
1037 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1038 Gfx10DataType dataType, ///< [in] Data type
1039 AddrResourceType resourceType, ///< [in] Resource type
1040 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1041 UINT_32 elemLog2, ///< [in] element size log2
1042 UINT_32 numSamplesLog2 ///< [in] number of samples
1043 ) const
1044 {
1045 Dim3d compBlock;
1046 Dim3d microBlock;
1047
1048 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1049 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
1050
1051 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
1052 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1053 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
1054 const INT_32 numPipesLog2 = GetEffectiveNumPipes();
1055 INT_32 overlap = numPipesLog2 - maxSizeLog2;
1056
1057 if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1058 {
1059 overlap++;
1060 }
1061
1062 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1063 if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1064 {
1065 overlap--;
1066 }
1067 overlap = Max(overlap, 0);
1068 return overlap;
1069 }
1070
1071 /**
1072 ************************************************************************************************************************
1073 * Gfx10Lib::Get3DMetaOverlapLog2
1074 *
1075 * @brief
1076 * Get 3d meta block overlap
1077 *
1078 * @return
1079 * N/A
1080 ************************************************************************************************************************
1081 */
1082 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1083 AddrResourceType resourceType, ///< [in] Resource type
1084 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1085 UINT_32 elemLog2 ///< [in] element size log2
1086 ) const
1087 {
1088 Dim3d microBlock;
1089 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
1090
1091 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1092
1093 if (m_settings.supportRbPlus)
1094 {
1095 overlap++;
1096 }
1097
1098 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1099 {
1100 overlap = 0;
1101 }
1102 return overlap;
1103 }
1104
1105 /**
1106 ************************************************************************************************************************
1107 * Gfx10Lib::GetPipeRotateAmount
1108 *
1109 * @brief
1110 * Get pipe rotate amount
1111 *
1112 * @return
1113 * Pipe rotate amount
1114 ************************************************************************************************************************
1115 */
1116
1117 INT_32 Gfx10Lib::GetPipeRotateAmount(
1118 AddrResourceType resourceType, ///< [in] Resource type
1119 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
1120 ) const
1121 {
1122 INT_32 amount = 0;
1123
1124 if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1125 {
1126 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1127 1 : m_pipesLog2 - (m_numSaLog2 + 1);
1128 }
1129
1130 return amount;
1131 }
1132
1133 /**
1134 ************************************************************************************************************************
1135 * Gfx10Lib::GetMetaBlkSize
1136 *
1137 * @brief
1138 * Get metadata block size
1139 *
1140 * @return
1141 * Meta block size
1142 ************************************************************************************************************************
1143 */
1144 UINT_32 Gfx10Lib::GetMetaBlkSize(
1145 Gfx10DataType dataType, ///< [in] Data type
1146 AddrResourceType resourceType, ///< [in] Resource type
1147 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1148 UINT_32 elemLog2, ///< [in] element size log2
1149 UINT_32 numSamplesLog2, ///< [in] number of samples
1150 BOOL_32 pipeAlign, ///< [in] pipe align
1151 Dim3d* pBlock ///< [out] block size
1152 ) const
1153 {
1154 INT_32 metablkSizeLog2;
1155 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
1156 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
1157 const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1158 const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1159 numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1160 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
1161 INT_32 numPipesLog2 = m_pipesLog2;
1162
1163 if (IsThin(resourceType, swizzleMode))
1164 {
1165 if ((pipeAlign == FALSE) ||
1166 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1167 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
1168 {
1169 if (pipeAlign)
1170 {
1171 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1172 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1173 }
1174 else
1175 {
1176 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1177 }
1178 }
1179 else
1180 {
1181 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1182 {
1183 numPipesLog2++;
1184 }
1185
1186 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1187
1188 if (numPipesLog2 >= 4)
1189 {
1190 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1191
1192 // In 16Bpe 8xaa, we have an extra overlap bit
1193 if ((pipeRotateLog2 > 0) &&
1194 (elemLog2 == 4) &&
1195 (numSamplesLog2 == 3) &&
1196 (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1197 {
1198 overlapLog2++;
1199 }
1200
1201 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1202 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1203
1204 if (m_settings.supportRbPlus &&
1205 IsRtOptSwizzle(swizzleMode) &&
1206 (numPipesLog2 == 6) &&
1207 (numSamplesLog2 == 3) &&
1208 (m_maxCompFragLog2 == 3) &&
1209 (metablkSizeLog2 < 15))
1210 {
1211 metablkSizeLog2 = 15;
1212 }
1213 }
1214 else
1215 {
1216 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1217 }
1218
1219 if (dataType == Gfx10DataDepthStencil)
1220 {
1221 // For htile surfaces, pad meta block size to 2K * num_pipes
1222 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1223 }
1224
1225 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1226
1227 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1228 {
1229 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1230
1231 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1232 }
1233 }
1234
1235 const INT_32 metablkBitsLog2 =
1236 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1237 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1238 pBlock->h = 1 << (metablkBitsLog2 >> 1);
1239 pBlock->d = 1;
1240 }
1241 else
1242 {
1243 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1244
1245 if (pipeAlign)
1246 {
1247 if (m_settings.supportRbPlus &&
1248 (m_pipesLog2 == m_numSaLog2 + 1) &&
1249 (m_pipesLog2 > 1) &&
1250 IsRbAligned(resourceType, swizzleMode))
1251 {
1252 numPipesLog2++;
1253 }
1254
1255 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1256
1257 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1258 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1259 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1260 }
1261 else
1262 {
1263 metablkSizeLog2 = 12;
1264 }
1265
1266 const INT_32 metablkBitsLog2 =
1267 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1268 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1269 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1270 pBlock->d = 1 << (metablkBitsLog2 / 3);
1271 }
1272
1273 return (1 << static_cast<UINT_32>(metablkSizeLog2));
1274 }
1275
1276 /**
1277 ************************************************************************************************************************
1278 * Gfx10Lib::ConvertSwizzlePatternToEquation
1279 *
1280 * @brief
1281 * Convert swizzle pattern to equation.
1282 *
1283 * @return
1284 * N/A
1285 ************************************************************************************************************************
1286 */
1287 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1288 UINT_32 elemLog2, ///< [in] element bytes log2
1289 AddrResourceType rsrcType, ///< [in] resource type
1290 AddrSwizzleMode swMode, ///< [in] swizzle mode
1291 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor
1292 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1293 const
1294 {
1295 ADDR_BIT_SETTING fullSwizzlePattern[20];
1296 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1297
1298 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
1299 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1300
1301 pEquation->numBits = blockSizeLog2;
1302 pEquation->stackedDepthSlices = FALSE;
1303
1304 for (UINT_32 i = 0; i < elemLog2; i++)
1305 {
1306 pEquation->addr[i].channel = 0;
1307 pEquation->addr[i].valid = 1;
1308 pEquation->addr[i].index = i;
1309 }
1310
1311 if (IsXor(swMode) == FALSE)
1312 {
1313 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1314 {
1315 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1316
1317 if (pSwizzle[i].x != 0)
1318 {
1319 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1320
1321 pEquation->addr[i].channel = 0;
1322 pEquation->addr[i].valid = 1;
1323 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1324 }
1325 else if (pSwizzle[i].y != 0)
1326 {
1327 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1328
1329 pEquation->addr[i].channel = 1;
1330 pEquation->addr[i].valid = 1;
1331 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1332 }
1333 else
1334 {
1335 ADDR_ASSERT(pSwizzle[i].z != 0);
1336 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1337
1338 pEquation->addr[i].channel = 2;
1339 pEquation->addr[i].valid = 1;
1340 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1341 }
1342
1343 pEquation->xor1[i].value = 0;
1344 pEquation->xor2[i].value = 0;
1345 }
1346 }
1347 else if (IsThin(rsrcType, swMode))
1348 {
1349 Dim3d dim;
1350 ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1351
1352 const UINT_32 blkXLog2 = Log2(dim.w);
1353 const UINT_32 blkYLog2 = Log2(dim.h);
1354 const UINT_32 blkXMask = dim.w - 1;
1355 const UINT_32 blkYMask = dim.h - 1;
1356
1357 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1358 UINT_32 xMask = 0;
1359 UINT_32 yMask = 0;
1360 UINT_32 bMask = (1 << elemLog2) - 1;
1361
1362 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1363 {
1364 if (IsPow2(pSwizzle[i].value))
1365 {
1366 if (pSwizzle[i].x != 0)
1367 {
1368 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1369 xMask |= pSwizzle[i].x;
1370
1371 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1372
1373 ADDR_ASSERT(xLog2 < blkXLog2);
1374
1375 pEquation->addr[i].channel = 0;
1376 pEquation->addr[i].valid = 1;
1377 pEquation->addr[i].index = xLog2 + elemLog2;
1378 }
1379 else
1380 {
1381 ADDR_ASSERT(pSwizzle[i].y != 0);
1382 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1383 yMask |= pSwizzle[i].y;
1384
1385 pEquation->addr[i].channel = 1;
1386 pEquation->addr[i].valid = 1;
1387 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1388
1389 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1390 }
1391
1392 swizzle[i].value = 0;
1393 bMask |= 1 << i;
1394 }
1395 else
1396 {
1397 if (pSwizzle[i].z != 0)
1398 {
1399 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1400
1401 pEquation->xor2[i].channel = 2;
1402 pEquation->xor2[i].valid = 1;
1403 pEquation->xor2[i].index = Log2(pSwizzle[i].z);
1404 }
1405
1406 swizzle[i].x = pSwizzle[i].x;
1407 swizzle[i].y = pSwizzle[i].y;
1408 swizzle[i].z = swizzle[i].s = 0;
1409
1410 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1411
1412 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1413
1414 if (xHi != 0)
1415 {
1416 ADDR_ASSERT(IsPow2(xHi));
1417 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1418
1419 pEquation->xor1[i].channel = 0;
1420 pEquation->xor1[i].valid = 1;
1421 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1422
1423 swizzle[i].x &= blkXMask;
1424 }
1425
1426 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1427
1428 if (yHi != 0)
1429 {
1430 ADDR_ASSERT(IsPow2(yHi));
1431
1432 if (xHi == 0)
1433 {
1434 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1435 pEquation->xor1[i].channel = 1;
1436 pEquation->xor1[i].valid = 1;
1437 pEquation->xor1[i].index = Log2(yHi);
1438 }
1439 else
1440 {
1441 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1442 pEquation->xor2[i].channel = 1;
1443 pEquation->xor2[i].valid = 1;
1444 pEquation->xor2[i].index = Log2(yHi);
1445 }
1446
1447 swizzle[i].y &= blkYMask;
1448 }
1449
1450 if (swizzle[i].value == 0)
1451 {
1452 bMask |= 1 << i;
1453 }
1454 }
1455 }
1456
1457 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1458 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1459
1460 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1461
1462 while (bMask != blockMask)
1463 {
1464 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1465 {
1466 if ((bMask & (1 << i)) == 0)
1467 {
1468 if (IsPow2(swizzle[i].value))
1469 {
1470 if (swizzle[i].x != 0)
1471 {
1472 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1473 xMask |= swizzle[i].x;
1474
1475 const UINT_32 xLog2 = Log2(swizzle[i].x);
1476
1477 ADDR_ASSERT(xLog2 < blkXLog2);
1478
1479 pEquation->addr[i].channel = 0;
1480 pEquation->addr[i].valid = 1;
1481 pEquation->addr[i].index = xLog2 + elemLog2;
1482 }
1483 else
1484 {
1485 ADDR_ASSERT(swizzle[i].y != 0);
1486 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1487 yMask |= swizzle[i].y;
1488
1489 pEquation->addr[i].channel = 1;
1490 pEquation->addr[i].valid = 1;
1491 pEquation->addr[i].index = Log2(swizzle[i].y);
1492
1493 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1494 }
1495
1496 swizzle[i].value = 0;
1497 bMask |= 1 << i;
1498 }
1499 else
1500 {
1501 const UINT_32 x = swizzle[i].x & xMask;
1502 const UINT_32 y = swizzle[i].y & yMask;
1503
1504 if (x != 0)
1505 {
1506 ADDR_ASSERT(IsPow2(x));
1507
1508 if (pEquation->xor1[i].value == 0)
1509 {
1510 pEquation->xor1[i].channel = 0;
1511 pEquation->xor1[i].valid = 1;
1512 pEquation->xor1[i].index = Log2(x) + elemLog2;
1513 }
1514 else
1515 {
1516 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1517 pEquation->xor2[i].channel = 0;
1518 pEquation->xor2[i].valid = 1;
1519 pEquation->xor2[i].index = Log2(x) + elemLog2;
1520 }
1521 }
1522
1523 if (y != 0)
1524 {
1525 ADDR_ASSERT(IsPow2(y));
1526
1527 if (pEquation->xor1[i].value == 0)
1528 {
1529 pEquation->xor1[i].channel = 1;
1530 pEquation->xor1[i].valid = 1;
1531 pEquation->xor1[i].index = Log2(y);
1532 }
1533 else
1534 {
1535 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1536 pEquation->xor2[i].channel = 1;
1537 pEquation->xor2[i].valid = 1;
1538 pEquation->xor2[i].index = Log2(y);
1539 }
1540 }
1541
1542 swizzle[i].x &= ~x;
1543 swizzle[i].y &= ~y;
1544 }
1545 }
1546 }
1547 }
1548
1549 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1550 }
1551 else
1552 {
1553 const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1554 const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1555 const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1556 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1557 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1558 const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1559
1560 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1561 UINT_32 xMask = 0;
1562 UINT_32 yMask = 0;
1563 UINT_32 zMask = 0;
1564 UINT_32 bMask = (1 << elemLog2) - 1;
1565
1566 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1567 {
1568 if (IsPow2(pSwizzle[i].value))
1569 {
1570 if (pSwizzle[i].x != 0)
1571 {
1572 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1573 xMask |= pSwizzle[i].x;
1574
1575 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1576
1577 ADDR_ASSERT(xLog2 < blkXLog2);
1578
1579 pEquation->addr[i].channel = 0;
1580 pEquation->addr[i].valid = 1;
1581 pEquation->addr[i].index = xLog2 + elemLog2;
1582 }
1583 else if (pSwizzle[i].y != 0)
1584 {
1585 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1586 yMask |= pSwizzle[i].y;
1587
1588 pEquation->addr[i].channel = 1;
1589 pEquation->addr[i].valid = 1;
1590 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1591
1592 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1593 }
1594 else
1595 {
1596 ADDR_ASSERT(pSwizzle[i].z != 0);
1597 ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1598 zMask |= pSwizzle[i].z;
1599
1600 pEquation->addr[i].channel = 2;
1601 pEquation->addr[i].valid = 1;
1602 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1603
1604 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1605 }
1606
1607 swizzle[i].value = 0;
1608 bMask |= 1 << i;
1609 }
1610 else
1611 {
1612 swizzle[i].x = pSwizzle[i].x;
1613 swizzle[i].y = pSwizzle[i].y;
1614 swizzle[i].z = pSwizzle[i].z;
1615 swizzle[i].s = 0;
1616
1617 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1618
1619 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1620 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1621 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1622
1623 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1624
1625 if (xHi != 0)
1626 {
1627 ADDR_ASSERT(IsPow2(xHi));
1628 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1629
1630 pEquation->xor1[i].channel = 0;
1631 pEquation->xor1[i].valid = 1;
1632 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1633
1634 swizzle[i].x &= blkXMask;
1635 }
1636
1637 if (yHi != 0)
1638 {
1639 ADDR_ASSERT(IsPow2(yHi));
1640
1641 if (pEquation->xor1[i].value == 0)
1642 {
1643 pEquation->xor1[i].channel = 1;
1644 pEquation->xor1[i].valid = 1;
1645 pEquation->xor1[i].index = Log2(yHi);
1646 }
1647 else
1648 {
1649 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1650 pEquation->xor2[i].channel = 1;
1651 pEquation->xor2[i].valid = 1;
1652 pEquation->xor2[i].index = Log2(yHi);
1653 }
1654
1655 swizzle[i].y &= blkYMask;
1656 }
1657
1658 if (zHi != 0)
1659 {
1660 ADDR_ASSERT(IsPow2(zHi));
1661
1662 if (pEquation->xor1[i].value == 0)
1663 {
1664 pEquation->xor1[i].channel = 2;
1665 pEquation->xor1[i].valid = 1;
1666 pEquation->xor1[i].index = Log2(zHi);
1667 }
1668 else
1669 {
1670 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1671 pEquation->xor2[i].channel = 2;
1672 pEquation->xor2[i].valid = 1;
1673 pEquation->xor2[i].index = Log2(zHi);
1674 }
1675
1676 swizzle[i].z &= blkZMask;
1677 }
1678
1679 if (swizzle[i].value == 0)
1680 {
1681 bMask |= 1 << i;
1682 }
1683 }
1684 }
1685
1686 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1687 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1688
1689 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1690
1691 while (bMask != blockMask)
1692 {
1693 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1694 {
1695 if ((bMask & (1 << i)) == 0)
1696 {
1697 if (IsPow2(swizzle[i].value))
1698 {
1699 if (swizzle[i].x != 0)
1700 {
1701 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1702 xMask |= swizzle[i].x;
1703
1704 const UINT_32 xLog2 = Log2(swizzle[i].x);
1705
1706 ADDR_ASSERT(xLog2 < blkXLog2);
1707
1708 pEquation->addr[i].channel = 0;
1709 pEquation->addr[i].valid = 1;
1710 pEquation->addr[i].index = xLog2 + elemLog2;
1711 }
1712 else if (swizzle[i].y != 0)
1713 {
1714 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1715 yMask |= swizzle[i].y;
1716
1717 pEquation->addr[i].channel = 1;
1718 pEquation->addr[i].valid = 1;
1719 pEquation->addr[i].index = Log2(swizzle[i].y);
1720
1721 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1722 }
1723 else
1724 {
1725 ADDR_ASSERT(swizzle[i].z != 0);
1726 ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1727 zMask |= swizzle[i].z;
1728
1729 pEquation->addr[i].channel = 2;
1730 pEquation->addr[i].valid = 1;
1731 pEquation->addr[i].index = Log2(swizzle[i].z);
1732
1733 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1734 }
1735
1736 swizzle[i].value = 0;
1737 bMask |= 1 << i;
1738 }
1739 else
1740 {
1741 const UINT_32 x = swizzle[i].x & xMask;
1742 const UINT_32 y = swizzle[i].y & yMask;
1743 const UINT_32 z = swizzle[i].z & zMask;
1744
1745 if (x != 0)
1746 {
1747 ADDR_ASSERT(IsPow2(x));
1748
1749 if (pEquation->xor1[i].value == 0)
1750 {
1751 pEquation->xor1[i].channel = 0;
1752 pEquation->xor1[i].valid = 1;
1753 pEquation->xor1[i].index = Log2(x) + elemLog2;
1754 }
1755 else
1756 {
1757 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1758 pEquation->xor2[i].channel = 0;
1759 pEquation->xor2[i].valid = 1;
1760 pEquation->xor2[i].index = Log2(x) + elemLog2;
1761 }
1762 }
1763
1764 if (y != 0)
1765 {
1766 ADDR_ASSERT(IsPow2(y));
1767
1768 if (pEquation->xor1[i].value == 0)
1769 {
1770 pEquation->xor1[i].channel = 1;
1771 pEquation->xor1[i].valid = 1;
1772 pEquation->xor1[i].index = Log2(y);
1773 }
1774 else
1775 {
1776 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1777 pEquation->xor2[i].channel = 1;
1778 pEquation->xor2[i].valid = 1;
1779 pEquation->xor2[i].index = Log2(y);
1780 }
1781 }
1782
1783 if (z != 0)
1784 {
1785 ADDR_ASSERT(IsPow2(z));
1786
1787 if (pEquation->xor1[i].value == 0)
1788 {
1789 pEquation->xor1[i].channel = 2;
1790 pEquation->xor1[i].valid = 1;
1791 pEquation->xor1[i].index = Log2(z);
1792 }
1793 else
1794 {
1795 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1796 pEquation->xor2[i].channel = 2;
1797 pEquation->xor2[i].valid = 1;
1798 pEquation->xor2[i].index = Log2(z);
1799 }
1800 }
1801
1802 swizzle[i].x &= ~x;
1803 swizzle[i].y &= ~y;
1804 swizzle[i].z &= ~z;
1805 }
1806 }
1807 }
1808 }
1809
1810 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1811 }
1812 }
1813
1814 /**
1815 ************************************************************************************************************************
1816 * Gfx10Lib::InitEquationTable
1817 *
1818 * @brief
1819 * Initialize Equation table.
1820 *
1821 * @return
1822 * N/A
1823 ************************************************************************************************************************
1824 */
1825 VOID Gfx10Lib::InitEquationTable()
1826 {
1827 memset(m_equationTable, 0, sizeof(m_equationTable));
1828
1829 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1830 {
1831 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1832
1833 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1834 {
1835 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1836
1837 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1838 {
1839 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1840 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1841
1842 if (pPatInfo != NULL)
1843 {
1844 ADDR_ASSERT(IsValidSwMode(swMode));
1845
1846 if (pPatInfo->maxItemCount <= 3)
1847 {
1848 ADDR_EQUATION equation = {};
1849
1850 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
1851
1852 equationIndex = m_numEquations;
1853 ADDR_ASSERT(equationIndex < EquationTableSize);
1854
1855 m_equationTable[equationIndex] = equation;
1856
1857 m_numEquations++;
1858 }
1859 else
1860 {
1861 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
1862 ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
1863 ADDR_ASSERT(rsrcTypeIdx == 1);
1864 ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
1865 ADDR_ASSERT(m_settings.supportRbPlus == 1);
1866 }
1867 }
1868
1869 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1870 }
1871 }
1872 }
1873 }
1874
1875 /**
1876 ************************************************************************************************************************
1877 * Gfx10Lib::HwlGetEquationIndex
1878 *
1879 * @brief
1880 * Interface function stub of GetEquationIndex
1881 *
1882 * @return
1883 * ADDR_E_RETURNCODE
1884 ************************************************************************************************************************
1885 */
1886 UINT_32 Gfx10Lib::HwlGetEquationIndex(
1887 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
1888 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
1889 ) const
1890 {
1891 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1892
1893 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1894 (pIn->resourceType == ADDR_RSRC_TEX_3D))
1895 {
1896 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1897 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
1898 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
1899
1900 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1901 }
1902
1903 if (pOut->pMipInfo != NULL)
1904 {
1905 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1906 {
1907 pOut->pMipInfo[i].equationIndex = equationIdx;
1908 }
1909 }
1910
1911 return equationIdx;
1912 }
1913
1914 /**
1915 ************************************************************************************************************************
1916 * Gfx10Lib::IsValidDisplaySwizzleMode
1917 *
1918 * @brief
1919 * Check if a swizzle mode is supported by display engine
1920 *
1921 * @return
1922 * TRUE is swizzle mode is supported by display engine
1923 ************************************************************************************************************************
1924 */
1925 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
1926 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
1927 ) const
1928 {
1929 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
1930
1931 BOOL_32 support = FALSE;
1932
1933 if (m_settings.isDcn2)
1934 {
1935 switch (pIn->swizzleMode)
1936 {
1937 case ADDR_SW_4KB_D:
1938 case ADDR_SW_4KB_D_X:
1939 case ADDR_SW_64KB_D:
1940 case ADDR_SW_64KB_D_T:
1941 case ADDR_SW_64KB_D_X:
1942 support = (pIn->bpp == 64);
1943 break;
1944
1945 case ADDR_SW_LINEAR:
1946 case ADDR_SW_4KB_S:
1947 case ADDR_SW_4KB_S_X:
1948 case ADDR_SW_64KB_S:
1949 case ADDR_SW_64KB_S_T:
1950 case ADDR_SW_64KB_S_X:
1951 case ADDR_SW_64KB_R_X:
1952 support = (pIn->bpp <= 64);
1953 break;
1954
1955 default:
1956 break;
1957 }
1958 }
1959 else
1960 {
1961 ADDR_NOT_IMPLEMENTED();
1962 }
1963
1964 return support;
1965 }
1966
1967 /**
1968 ************************************************************************************************************************
1969 * Gfx10Lib::GetMaxNumMipsInTail
1970 *
1971 * @brief
1972 * Return max number of mips in tails
1973 *
1974 * @return
1975 * Max number of mips in tails
1976 ************************************************************************************************************************
1977 */
1978 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
1979 UINT_32 blockSizeLog2, ///< block size log2
1980 BOOL_32 isThin ///< is thin or thick
1981 ) const
1982 {
1983 UINT_32 effectiveLog2 = blockSizeLog2;
1984
1985 if (isThin == FALSE)
1986 {
1987 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
1988 }
1989
1990 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
1991 }
1992
1993 /**
1994 ************************************************************************************************************************
1995 * Gfx10Lib::HwlComputePipeBankXor
1996 *
1997 * @brief
1998 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
1999 *
2000 * @return
2001 * PipeBankXor value
2002 ************************************************************************************************************************
2003 */
2004 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2005 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2006 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2007 ) const
2008 {
2009 if (IsNonPrtXor(pIn->swizzleMode))
2010 {
2011 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2012 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2013 const UINT_32 bankBits = GetBankXorBits(blockBits);
2014
2015 UINT_32 pipeXor = 0;
2016 UINT_32 bankXor = 0;
2017
2018 if (bankBits != 0)
2019 {
2020 if (blockBits == 16)
2021 {
2022 const UINT_32 XorPatternLen = 8;
2023 static const UINT_32 XorBank1b[XorPatternLen] = {0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80};
2024 static const UINT_32 XorBank2b[XorPatternLen] = {0x00, 0x80, 0x40, 0xC0, 0x80, 0x00, 0xC0, 0x40};
2025 static const UINT_32 XorBank3b[XorPatternLen] = {0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0};
2026
2027 const UINT_32 index = pIn->surfIndex % XorPatternLen;
2028
2029 if (bankBits == 1)
2030 {
2031 bankXor = XorBank1b[index];
2032 }
2033 else if (bankBits == 2)
2034 {
2035 bankXor = XorBank2b[index];
2036 }
2037 else
2038 {
2039 bankXor = XorBank3b[index];
2040
2041 if (bankBits == 4)
2042 {
2043 bankXor >>= (2 - pipeBits);
2044 }
2045 }
2046 }
2047 }
2048
2049 pOut->pipeBankXor = bankXor | pipeXor;
2050 }
2051 else
2052 {
2053 pOut->pipeBankXor = 0;
2054 }
2055
2056 return ADDR_OK;
2057 }
2058
2059 /**
2060 ************************************************************************************************************************
2061 * Gfx10Lib::HwlComputeSlicePipeBankXor
2062 *
2063 * @brief
2064 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2065 *
2066 * @return
2067 * PipeBankXor value
2068 ************************************************************************************************************************
2069 */
2070 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2071 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2072 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2073 ) const
2074 {
2075 if (IsNonPrtXor(pIn->swizzleMode))
2076 {
2077 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2078 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2079 const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2080
2081 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2082 }
2083 else
2084 {
2085 pOut->pipeBankXor = 0;
2086 }
2087
2088 return ADDR_OK;
2089 }
2090
2091 /**
2092 ************************************************************************************************************************
2093 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2094 *
2095 * @brief
2096 * Compute sub resource offset to support swizzle pattern
2097 *
2098 * @return
2099 * Offset
2100 ************************************************************************************************************************
2101 */
2102 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2103 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
2104 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
2105 ) const
2106 {
2107 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2108
2109 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2110
2111 return ADDR_OK;
2112 }
2113
2114 /**
2115 ************************************************************************************************************************
2116 * Gfx10Lib::ValidateNonSwModeParams
2117 *
2118 * @brief
2119 * Validate compute surface info params except swizzle mode
2120 *
2121 * @return
2122 * TRUE if parameters are valid, FALSE otherwise
2123 ************************************************************************************************************************
2124 */
2125 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2126 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2127 {
2128 BOOL_32 valid = TRUE;
2129
2130 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2131 {
2132 ADDR_ASSERT_ALWAYS();
2133 valid = FALSE;
2134 }
2135
2136 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2137 {
2138 ADDR_ASSERT_ALWAYS();
2139 valid = FALSE;
2140 }
2141
2142 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2143 const AddrResourceType rsrcType = pIn->resourceType;
2144 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2145 const BOOL_32 msaa = (pIn->numFrags > 1);
2146 const BOOL_32 display = flags.display;
2147 const BOOL_32 tex3d = IsTex3d(rsrcType);
2148 const BOOL_32 tex2d = IsTex2d(rsrcType);
2149 const BOOL_32 tex1d = IsTex1d(rsrcType);
2150 const BOOL_32 stereo = flags.qbStereo;
2151
2152 // Resource type check
2153 if (tex1d)
2154 {
2155 if (msaa || display || stereo)
2156 {
2157 ADDR_ASSERT_ALWAYS();
2158 valid = FALSE;
2159 }
2160 }
2161 else if (tex2d)
2162 {
2163 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2164 {
2165 ADDR_ASSERT_ALWAYS();
2166 valid = FALSE;
2167 }
2168 }
2169 else if (tex3d)
2170 {
2171 if (msaa || display || stereo)
2172 {
2173 ADDR_ASSERT_ALWAYS();
2174 valid = FALSE;
2175 }
2176 }
2177 else
2178 {
2179 ADDR_ASSERT_ALWAYS();
2180 valid = FALSE;
2181 }
2182
2183 return valid;
2184 }
2185
2186 /**
2187 ************************************************************************************************************************
2188 * Gfx10Lib::ValidateSwModeParams
2189 *
2190 * @brief
2191 * Validate compute surface info related to swizzle mode
2192 *
2193 * @return
2194 * TRUE if parameters are valid, FALSE otherwise
2195 ************************************************************************************************************************
2196 */
2197 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2198 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2199 {
2200 BOOL_32 valid = TRUE;
2201
2202 if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
2203 {
2204 ADDR_ASSERT_ALWAYS();
2205 valid = FALSE;
2206 }
2207
2208 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2209 const AddrResourceType rsrcType = pIn->resourceType;
2210 const AddrSwizzleMode swizzle = pIn->swizzleMode;
2211 const BOOL_32 msaa = (pIn->numFrags > 1);
2212 const BOOL_32 zbuffer = flags.depth || flags.stencil;
2213 const BOOL_32 color = flags.color;
2214 const BOOL_32 display = flags.display;
2215 const BOOL_32 tex3d = IsTex3d(rsrcType);
2216 const BOOL_32 tex2d = IsTex2d(rsrcType);
2217 const BOOL_32 tex1d = IsTex1d(rsrcType);
2218 const BOOL_32 thin3d = flags.view3dAs2dArray;
2219 const BOOL_32 linear = IsLinear(swizzle);
2220 const BOOL_32 blk256B = IsBlock256b(swizzle);
2221 const BOOL_32 blkVar = IsBlockVariable(swizzle);
2222 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2223 const BOOL_32 prt = flags.prt;
2224 const BOOL_32 fmask = flags.fmask;
2225
2226 // Misc check
2227 if ((pIn->numFrags > 1) &&
2228 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2229 {
2230 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2231 ADDR_ASSERT_ALWAYS();
2232 valid = FALSE;
2233 }
2234
2235 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2236 {
2237 ADDR_ASSERT_ALWAYS();
2238 valid = FALSE;
2239 }
2240
2241 if ((pIn->bpp == 96) && (linear == FALSE))
2242 {
2243 ADDR_ASSERT_ALWAYS();
2244 valid = FALSE;
2245 }
2246
2247 const UINT_32 swizzleMask = 1 << swizzle;
2248
2249 // Resource type check
2250 if (tex1d)
2251 {
2252 if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2253 {
2254 ADDR_ASSERT_ALWAYS();
2255 valid = FALSE;
2256 }
2257 }
2258 else if (tex2d)
2259 {
2260 if (((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0) ||
2261 (prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2262 (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2263 {
2264 ADDR_ASSERT_ALWAYS();
2265 valid = FALSE;
2266 }
2267 }
2268 else if (tex3d)
2269 {
2270 if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2271 (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2272 (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2273 {
2274 ADDR_ASSERT_ALWAYS();
2275 valid = FALSE;
2276 }
2277 }
2278
2279 // Swizzle type check
2280 if (linear)
2281 {
2282 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2283 {
2284 ADDR_ASSERT_ALWAYS();
2285 valid = FALSE;
2286 }
2287 }
2288 else if (IsZOrderSwizzle(swizzle))
2289 {
2290 if ((pIn->bpp > 64) ||
2291 (msaa && (color || (pIn->bpp > 32))) ||
2292 ElemLib::IsBlockCompressed(pIn->format) ||
2293 ElemLib::IsMacroPixelPacked(pIn->format))
2294 {
2295 ADDR_ASSERT_ALWAYS();
2296 valid = FALSE;
2297 }
2298 }
2299 else if (IsStandardSwizzle(rsrcType, swizzle))
2300 {
2301 if (zbuffer || msaa)
2302 {
2303 ADDR_ASSERT_ALWAYS();
2304 valid = FALSE;
2305 }
2306 }
2307 else if (IsDisplaySwizzle(rsrcType, swizzle))
2308 {
2309 if (zbuffer || msaa)
2310 {
2311 ADDR_ASSERT_ALWAYS();
2312 valid = FALSE;
2313 }
2314 }
2315 else if (IsRtOptSwizzle(swizzle))
2316 {
2317 if (zbuffer)
2318 {
2319 ADDR_ASSERT_ALWAYS();
2320 valid = FALSE;
2321 }
2322 }
2323 else
2324 {
2325 ADDR_ASSERT_ALWAYS();
2326 valid = FALSE;
2327 }
2328
2329 // Block type check
2330 if (blk256B)
2331 {
2332 if (zbuffer || tex3d || msaa)
2333 {
2334 ADDR_ASSERT_ALWAYS();
2335 valid = FALSE;
2336 }
2337 }
2338 else if (blkVar)
2339 {
2340 if (m_blockVarSizeLog2 == 0)
2341 {
2342 ADDR_ASSERT_ALWAYS();
2343 valid = FALSE;
2344 }
2345 }
2346
2347 return valid;
2348 }
2349
2350 /**
2351 ************************************************************************************************************************
2352 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2353 *
2354 * @brief
2355 * Compute surface info sanity check
2356 *
2357 * @return
2358 * Offset
2359 ************************************************************************************************************************
2360 */
2361 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2362 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2363 ) const
2364 {
2365 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2366 }
2367
2368 /**
2369 ************************************************************************************************************************
2370 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2371 *
2372 * @brief
2373 * Internal function to get suggested surface information for cliet to use
2374 *
2375 * @return
2376 * ADDR_E_RETURNCODE
2377 ************************************************************************************************************************
2378 */
2379 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2380 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2381 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2382 ) const
2383 {
2384 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2385
2386 if (pIn->flags.fmask)
2387 {
2388 const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2389 const BOOL_32 forbidVarBlockType = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2390
2391 if (forbid64KbBlockType && forbidVarBlockType)
2392 {
2393 // Invalid combination...
2394 ADDR_ASSERT_ALWAYS();
2395 returnCode = ADDR_INVALIDPARAMS;
2396 }
2397 else
2398 {
2399 pOut->resourceType = ADDR_RSRC_TEX_2D;
2400 pOut->validBlockSet.value = 0;
2401 pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1;
2402 pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1;
2403 pOut->validSwModeSet.value = 0;
2404 pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1;
2405 pOut->validSwModeSet.swVar_Z_X = forbidVarBlockType ? 0 : 1;
2406 pOut->canXor = TRUE;
2407 pOut->validSwTypeSet.value = AddrSwSetZ;
2408 pOut->clientPreferredSwSet = pOut->validSwTypeSet;
2409
2410 BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2411
2412 if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2413 {
2414 const UINT_8 maxFmaskSwizzleModeType = 2;
2415 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2416 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2417 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2418 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2419 const UINT_32 width = Max(pIn->width, 1u);
2420 const UINT_32 height = Max(pIn->height, 1u);
2421 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2422
2423 AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2424 Dim3d blkDim[maxFmaskSwizzleModeType] = {{0}, {0}};
2425 Dim3d padDim[maxFmaskSwizzleModeType] = {{0}, {0}};
2426 UINT_64 padSize[maxFmaskSwizzleModeType] = {0};
2427
2428 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2429 {
2430 ComputeBlockDimensionForSurf(&blkDim[i].w,
2431 &blkDim[i].h,
2432 &blkDim[i].d,
2433 fmaskBpp,
2434 1,
2435 pOut->resourceType,
2436 swMode[i]);
2437
2438 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2439 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2440 }
2441
2442 if (GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0]))
2443 {
2444 if ((padSize[1] * ratioHi) <= (padSize[0] * ratioLow))
2445 {
2446 use64KbBlockType = FALSE;
2447 }
2448 }
2449 else
2450 {
2451 if ((padSize[1] * ratioLow) < (padSize[0] * ratioHi))
2452 {
2453 use64KbBlockType = FALSE;
2454 }
2455 }
2456 }
2457 else if (forbidVarBlockType)
2458 {
2459 use64KbBlockType = TRUE;
2460 }
2461
2462 if (use64KbBlockType)
2463 {
2464 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2465 }
2466 else
2467 {
2468 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2469 }
2470 }
2471 }
2472 else
2473 {
2474 UINT_32 bpp = pIn->bpp;
2475 UINT_32 width = Max(pIn->width, 1u);
2476 UINT_32 height = Max(pIn->height, 1u);
2477
2478 // Set format to INVALID will skip this conversion
2479 if (pIn->format != ADDR_FMT_INVALID)
2480 {
2481 ElemMode elemMode = ADDR_UNCOMPRESSED;
2482 UINT_32 expandX, expandY;
2483
2484 // Get compression/expansion factors and element mode which indicates compression/expansion
2485 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2486 &elemMode,
2487 &expandX,
2488 &expandY);
2489
2490 UINT_32 basePitch = 0;
2491 GetElemLib()->AdjustSurfaceInfo(elemMode,
2492 expandX,
2493 expandY,
2494 &bpp,
2495 &basePitch,
2496 &width,
2497 &height);
2498 }
2499
2500 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2501 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2502 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2503 const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2504 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
2505
2506 // Pre sanity check on non swizzle mode parameters
2507 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2508 localIn.flags = pIn->flags;
2509 localIn.resourceType = pIn->resourceType;
2510 localIn.format = pIn->format;
2511 localIn.bpp = bpp;
2512 localIn.width = width;
2513 localIn.height = height;
2514 localIn.numSlices = numSlices;
2515 localIn.numMipLevels = numMipLevels;
2516 localIn.numSamples = numSamples;
2517 localIn.numFrags = numFrags;
2518
2519 if (ValidateNonSwModeParams(&localIn))
2520 {
2521 // Forbid swizzle mode(s) by client setting
2522 ADDR2_SWMODE_SET allowedSwModeSet = {};
2523 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2524 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;
2525 allowedSwModeSet.value |=
2526 pIn->forbiddenBlock.macroThin4KB ? 0 :
2527 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2528 allowedSwModeSet.value |=
2529 pIn->forbiddenBlock.macroThick4KB ? 0 :
2530 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2531 allowedSwModeSet.value |=
2532 pIn->forbiddenBlock.macroThin64KB ? 0 :
2533 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2534 allowedSwModeSet.value |=
2535 pIn->forbiddenBlock.macroThick64KB ? 0 :
2536 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2537 allowedSwModeSet.value |=
2538 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2539
2540 if (pIn->preferredSwSet.value != 0)
2541 {
2542 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2543 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2544 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2545 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2546 }
2547
2548 if (pIn->noXor)
2549 {
2550 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2551 }
2552
2553 if (pIn->maxAlign > 0)
2554 {
2555 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2556 {
2557 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2558 }
2559
2560 if (pIn->maxAlign < Size64K)
2561 {
2562 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2563 }
2564
2565 if (pIn->maxAlign < Size4K)
2566 {
2567 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2568 }
2569
2570 if (pIn->maxAlign < Size256)
2571 {
2572 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2573 }
2574 }
2575
2576 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2577 switch (pIn->resourceType)
2578 {
2579 case ADDR_RSRC_TEX_1D:
2580 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2581 break;
2582
2583 case ADDR_RSRC_TEX_2D:
2584 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2585 break;
2586
2587 case ADDR_RSRC_TEX_3D:
2588 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2589
2590 if (pIn->flags.view3dAs2dArray)
2591 {
2592 allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2593 }
2594 break;
2595
2596 default:
2597 ADDR_ASSERT_ALWAYS();
2598 allowedSwModeSet.value = 0;
2599 break;
2600 }
2601
2602 if (ElemLib::IsBlockCompressed(pIn->format) ||
2603 ElemLib::IsMacroPixelPacked(pIn->format) ||
2604 (bpp > 64) ||
2605 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2606 {
2607 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2608 }
2609
2610 if (pIn->format == ADDR_FMT_32_32_32)
2611 {
2612 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2613 }
2614
2615 if (msaa)
2616 {
2617 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2618 }
2619
2620 if (pIn->flags.depth || pIn->flags.stencil)
2621 {
2622 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2623 }
2624
2625 if (pIn->flags.display)
2626 {
2627 if (m_settings.isDcn2)
2628 {
2629 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
2630 }
2631 else
2632 {
2633 ADDR_NOT_IMPLEMENTED();
2634 }
2635 }
2636
2637 if (allowedSwModeSet.value != 0)
2638 {
2639 #if DEBUG
2640 // Post sanity check, at least AddrLib should accept the output generated by its own
2641 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2642
2643 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2644 {
2645 if (validateSwModeSet & 1)
2646 {
2647 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2648 ADDR_ASSERT(ValidateSwModeParams(&localIn));
2649 }
2650
2651 validateSwModeSet >>= 1;
2652 }
2653 #endif
2654
2655 pOut->resourceType = pIn->resourceType;
2656 pOut->validSwModeSet = allowedSwModeSet;
2657 pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
2658 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2659 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
2660
2661 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2662
2663 if (pOut->clientPreferredSwSet.value == 0)
2664 {
2665 pOut->clientPreferredSwSet.value = AddrSwSetAll;
2666 }
2667
2668 // Apply optional restrictions
2669 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
2670 {
2671 if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
2672 {
2673 // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
2674 // the GL2 in VAR mode, so it should be avoided.
2675 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2676 }
2677 else
2678 {
2679 // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
2680 // But we have to suffer from low performance because there is no other choice...
2681 ADDR_ASSERT_ALWAYS();
2682 }
2683 }
2684
2685 if (pIn->flags.needEquation)
2686 {
2687 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
2688 }
2689
2690 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
2691 {
2692 pOut->swizzleMode = ADDR_SW_LINEAR;
2693 }
2694 else
2695 {
2696 // Always ignore linear swizzle mode if there is other choice.
2697 allowedSwModeSet.swLinear = 0;
2698
2699 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2700
2701 // Determine block size if there is 2 or more block type candidates
2702 if (IsPow2(allowedBlockSet.value) == FALSE)
2703 {
2704 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR };
2705
2706 if (m_blockVarSizeLog2 != 0)
2707 {
2708 swMode[AddrBlockVar] = ADDR_SW_VAR_R_X;
2709 }
2710
2711 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
2712 {
2713 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
2714 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X;
2715 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
2716 }
2717 else
2718 {
2719 swMode[AddrBlockMicro] = ADDR_SW_256B_S;
2720 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_S;
2721 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
2722 }
2723
2724 Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
2725 Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
2726 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
2727
2728 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2729 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2730 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2731 UINT_32 minSizeBlk = AddrBlockMicro;
2732 UINT_64 minSize = 0;
2733
2734 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2735 {
2736 if (allowedBlockSet.value & (1 << i))
2737 {
2738 ComputeBlockDimensionForSurf(&blkDim[i].w,
2739 &blkDim[i].h,
2740 &blkDim[i].d,
2741 bpp,
2742 numFrags,
2743 pOut->resourceType,
2744 swMode[i]);
2745
2746 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2747 padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
2748
2749 if (minSize == 0)
2750 {
2751 minSize = padSize[i];
2752 minSizeBlk = i;
2753 }
2754 else
2755 {
2756 // Due to the fact that VAR block size = 16KB * m_pipes, it is possible that VAR
2757 // block size < 64KB. And ratio[Hi/Low] logic implicitly requires iterating from
2758 // smaller block type to bigger block type. So we have to correct comparing logic
2759 // according to the size of existing "minimun block" and size of coming/comparing
2760 // block. The new logic can also be useful to any future change about AddrBlockType.
2761 if (GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk]))
2762 {
2763 if ((padSize[i] * ratioHi) <= (minSize * ratioLow))
2764 {
2765 minSize = padSize[i];
2766 minSizeBlk = i;
2767 }
2768 }
2769 else
2770 {
2771 if ((padSize[i] * ratioLow) < (minSize * ratioHi))
2772 {
2773 minSize = padSize[i];
2774 minSizeBlk = i;
2775 }
2776 }
2777 }
2778 }
2779 }
2780
2781 if ((allowedBlockSet.micro == TRUE) &&
2782 (width <= blkDim[AddrBlockMicro].w) &&
2783 (height <= blkDim[AddrBlockMicro].h))
2784 {
2785 minSizeBlk = AddrBlockMicro;
2786 }
2787
2788 if (minSizeBlk == AddrBlockMicro)
2789 {
2790 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2791 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
2792 }
2793 else if (minSizeBlk == AddrBlockThick4KB)
2794 {
2795 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2796 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
2797 }
2798 else if (minSizeBlk == AddrBlockThin4KB)
2799 {
2800 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2801 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
2802 }
2803 else if (minSizeBlk == AddrBlockThick64KB)
2804 {
2805 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2806 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
2807 }
2808 else if (minSizeBlk == AddrBlockThin64KB)
2809 {
2810 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2811 Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
2812 }
2813 else
2814 {
2815 ADDR_ASSERT(minSizeBlk == AddrBlockVar);
2816 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
2817 }
2818 }
2819
2820 // Block type should be determined.
2821 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
2822
2823 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
2824
2825 // Determine swizzle type if there is 2 or more swizzle type candidates
2826 if (IsPow2(allowedSwSet.value) == FALSE)
2827 {
2828 if (ElemLib::IsBlockCompressed(pIn->format))
2829 {
2830 if (allowedSwSet.sw_D)
2831 {
2832 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2833 }
2834 else if (allowedSwSet.sw_S)
2835 {
2836 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2837 }
2838 else
2839 {
2840 ADDR_ASSERT(allowedSwSet.sw_R);
2841 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2842 }
2843 }
2844 else if (ElemLib::IsMacroPixelPacked(pIn->format))
2845 {
2846 if (allowedSwSet.sw_S)
2847 {
2848 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2849 }
2850 else if (allowedSwSet.sw_D)
2851 {
2852 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2853 }
2854 else
2855 {
2856 ADDR_ASSERT(allowedSwSet.sw_R);
2857 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2858 }
2859 }
2860 else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2861 {
2862 if (pIn->flags.color &&
2863 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
2864 allowedSwSet.sw_D)
2865 {
2866 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2867 }
2868 else if (allowedSwSet.sw_S)
2869 {
2870 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2871 }
2872 else if (allowedSwSet.sw_R)
2873 {
2874 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2875 }
2876 else
2877 {
2878 ADDR_ASSERT(allowedSwSet.sw_Z);
2879 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2880 }
2881 }
2882 else
2883 {
2884 if (allowedSwSet.sw_R)
2885 {
2886 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2887 }
2888 else if (allowedSwSet.sw_D)
2889 {
2890 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2891 }
2892 else if (allowedSwSet.sw_S)
2893 {
2894 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2895 }
2896 else
2897 {
2898 ADDR_ASSERT(allowedSwSet.sw_Z);
2899 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2900 }
2901 }
2902 }
2903
2904 // Swizzle type should be determined.
2905 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
2906
2907 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2908 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2909 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2910 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
2911 }
2912 }
2913 else
2914 {
2915 // Invalid combination...
2916 ADDR_ASSERT_ALWAYS();
2917 returnCode = ADDR_INVALIDPARAMS;
2918 }
2919 }
2920 else
2921 {
2922 // Invalid combination...
2923 ADDR_ASSERT_ALWAYS();
2924 returnCode = ADDR_INVALIDPARAMS;
2925 }
2926 }
2927
2928 return returnCode;
2929 }
2930
2931 /**
2932 ************************************************************************************************************************
2933 * Gfx10Lib::ComputeStereoInfo
2934 *
2935 * @brief
2936 * Compute height alignment and right eye pipeBankXor for stereo surface
2937 *
2938 * @return
2939 * Error code
2940 *
2941 ************************************************************************************************************************
2942 */
2943 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
2944 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
2945 UINT_32 blkHeight, ///< Block height
2946 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
2947 UINT_32* pRightXor ///< Right eye xor
2948 ) const
2949 {
2950 ADDR_E_RETURNCODE ret = ADDR_OK;
2951
2952 *pAlignY = 1;
2953 *pRightXor = 0;
2954
2955 if (IsNonPrtXor(pIn->swizzleMode))
2956 {
2957 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
2958 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
2959 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
2960 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
2961 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
2962
2963 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
2964 {
2965 UINT_32 yMax = 0;
2966 UINT_32 yPos = 0;
2967
2968 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
2969 {
2970 if (m_equationTable[eqIndex].xor1[i].value == 0)
2971 {
2972 break;