amd/addrlib: fix another C++ one definition rule violation
[mesa.git] / src / amd / addrlib / src / gfx10 / gfx10addrlib.cpp
1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx10addrlib.cpp
30 * @brief Contain the implementation for the Gfx10Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx10addrlib.h"
35 #include "gfx10_gb_reg.h"
36
37 #include "amdgpu_asic_addr.h"
38
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
41
42 namespace Addr
43 {
44 /**
45 ************************************************************************************************************************
46 * Gfx10HwlInit
47 *
48 * @brief
49 * Creates an Gfx10Lib object.
50 *
51 * @return
52 * Returns an Gfx10Lib object pointer.
53 ************************************************************************************************************************
54 */
55 Addr::Lib* Gfx10HwlInit(const Client* pClient)
56 {
57 return V2::Gfx10Lib::CreateObj(pClient);
58 }
59
60 namespace V2
61 {
62
63 ////////////////////////////////////////////////////////////////////////////////////////////////////
64 // Static Const Member
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66
67 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
68 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
69 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
70 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
71 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
72 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
73
74 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
75 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
76 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
77 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
78
79 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
80 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
81 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
82 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
83
84 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
85 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
86 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
87 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
88
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
91 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
92 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
93
94 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
95 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_X
96 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_X
97 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
98
99 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
100 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
101 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
102 {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_64KB_R_X
103
104 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_Z_X
105 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
106 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
107 {0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_VAR_R_X
108 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
109 };
110
111 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
112
113 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114 const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
115
116 /**
117 ************************************************************************************************************************
118 * Gfx10Lib::Gfx10Lib
119 *
120 * @brief
121 * Constructor
122 *
123 ************************************************************************************************************************
124 */
125 Gfx10Lib::Gfx10Lib(const Client* pClient)
126 :
127 Lib(pClient),
128 m_colorBaseIndex(0),
129 m_xmaskBaseIndex(0),
130 m_dccBaseIndex(0)
131 {
132 m_class = AI_ADDRLIB;
133 memset(&m_settings, 0, sizeof(m_settings));
134 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
135 }
136
137 /**
138 ************************************************************************************************************************
139 * Gfx10Lib::~Gfx10Lib
140 *
141 * @brief
142 * Destructor
143 ************************************************************************************************************************
144 */
145 Gfx10Lib::~Gfx10Lib()
146 {
147 }
148
149 /**
150 ************************************************************************************************************************
151 * Gfx10Lib::HwlComputeHtileInfo
152 *
153 * @brief
154 * Interface function stub of AddrComputeHtilenfo
155 *
156 * @return
157 * ADDR_E_RETURNCODE
158 ************************************************************************************************************************
159 */
160 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
161 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
162 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
163 ) const
164 {
165 ADDR_E_RETURNCODE ret = ADDR_OK;
166
167 if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
168 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
169 (pIn->hTileFlags.pipeAligned != TRUE))
170 {
171 ret = ADDR_INVALIDPARAMS;
172 }
173 else
174 {
175 Dim3d metaBlk = {0};
176 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
177 ADDR_RSRC_TEX_2D,
178 pIn->swizzleMode,
179 0,
180 0,
181 TRUE,
182 &metaBlk);
183
184 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
185 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
186 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
187 pOut->metaBlkWidth = metaBlk.w;
188 pOut->metaBlkHeight = metaBlk.h;
189
190 if (pIn->numMipLevels > 1)
191 {
192 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
193
194 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
195
196 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
197 {
198 UINT_32 mipWidth, mipHeight;
199
200 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
201
202 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
203 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
204
205 const UINT_32 pitchInM = mipWidth / metaBlk.w;
206 const UINT_32 heightInM = mipHeight / metaBlk.h;
207 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
208
209 if (pOut->pMipInfo != NULL)
210 {
211 pOut->pMipInfo[i].inMiptail = FALSE;
212 pOut->pMipInfo[i].offset = offset;
213 pOut->pMipInfo[i].sliceSize = mipSliceSize;
214 }
215
216 offset += mipSliceSize;
217 }
218
219 pOut->sliceSize = offset;
220 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
221 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
222
223 if (pOut->pMipInfo != NULL)
224 {
225 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
226 {
227 pOut->pMipInfo[i].inMiptail = TRUE;
228 pOut->pMipInfo[i].offset = 0;
229 pOut->pMipInfo[i].sliceSize = 0;
230 }
231
232 if (pIn->firstMipIdInTail != pIn->numMipLevels)
233 {
234 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
235 }
236 }
237 }
238 else
239 {
240 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
241 const UINT_32 heightInM = pOut->height / metaBlk.h;
242
243 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
244 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
245 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
246
247 if (pOut->pMipInfo != NULL)
248 {
249 pOut->pMipInfo[0].inMiptail = FALSE;
250 pOut->pMipInfo[0].offset = 0;
251 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
252 }
253 }
254 }
255
256 return ret;
257 }
258
259 /**
260 ************************************************************************************************************************
261 * Gfx10Lib::HwlComputeCmaskInfo
262 *
263 * @brief
264 * Interface function stub of AddrComputeCmaskInfo
265 *
266 * @return
267 * ADDR_E_RETURNCODE
268 ************************************************************************************************************************
269 */
270 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
271 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
272 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
273 ) const
274 {
275 ADDR_E_RETURNCODE ret = ADDR_OK;
276
277 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
278 (pIn->cMaskFlags.pipeAligned != TRUE) ||
279 ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
280 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
281 {
282 ret = ADDR_INVALIDPARAMS;
283 }
284 else
285 {
286 Dim3d metaBlk = {0};
287 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
288 ADDR_RSRC_TEX_2D,
289 pIn->swizzleMode,
290 0,
291 0,
292 TRUE,
293 &metaBlk);
294
295 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
296 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
297 pOut->baseAlign = metaBlkSize;
298 pOut->metaBlkWidth = metaBlk.w;
299 pOut->metaBlkHeight = metaBlk.h;
300
301 if (pIn->numMipLevels > 1)
302 {
303 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
304
305 UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
306
307 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
308 {
309 UINT_32 mipWidth, mipHeight;
310
311 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
312
313 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
314 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
315
316 const UINT_32 pitchInM = mipWidth / metaBlk.w;
317 const UINT_32 heightInM = mipHeight / metaBlk.h;
318
319 if (pOut->pMipInfo != NULL)
320 {
321 pOut->pMipInfo[i].inMiptail = FALSE;
322 pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize;
323 pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
324 }
325
326 metaBlkPerSlice += pitchInM * heightInM;
327 }
328
329 pOut->metaBlkNumPerSlice = metaBlkPerSlice;
330
331 if (pOut->pMipInfo != NULL)
332 {
333 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
334 {
335 pOut->pMipInfo[i].inMiptail = TRUE;
336 pOut->pMipInfo[i].offset = 0;
337 pOut->pMipInfo[i].sliceSize = 0;
338 }
339
340 if (pIn->firstMipIdInTail != pIn->numMipLevels)
341 {
342 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
343 }
344 }
345 }
346 else
347 {
348 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
349 const UINT_32 heightInM = pOut->height / metaBlk.h;
350
351 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
352
353 if (pOut->pMipInfo != NULL)
354 {
355 pOut->pMipInfo[0].inMiptail = FALSE;
356 pOut->pMipInfo[0].offset = 0;
357 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
358 }
359 }
360
361 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
362 pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
363 }
364
365 return ret;
366 }
367
368 /**
369 ************************************************************************************************************************
370 * Gfx10Lib::HwlComputeDccInfo
371 *
372 * @brief
373 * Interface function to compute DCC key info
374 *
375 * @return
376 * ADDR_E_RETURNCODE
377 ************************************************************************************************************************
378 */
379 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
380 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
381 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
382 ) const
383 {
384 ADDR_E_RETURNCODE ret = ADDR_OK;
385
386 if (pIn->swizzleMode != ADDR_SW_64KB_Z_X && pIn->swizzleMode != ADDR_SW_64KB_R_X)
387 {
388 // Hardware does not support DCC for this swizzle mode.
389 ret = ADDR_INVALIDPARAMS;
390 }
391 else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
392 {
393 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
394 ret = ADDR_INVALIDPARAMS;
395 }
396 else
397 {
398 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
399 ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
400
401 Dim3d metaBlk = {0};
402 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
403 const UINT_32 numFragLog2 = Log2(pIn->numFrags);
404 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
405 pIn->resourceType,
406 pIn->swizzleMode,
407 elemLog2,
408 numFragLog2,
409 pIn->dccKeyFlags.pipeAligned,
410 &metaBlk);
411 const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
412
413 pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
414 pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
415 pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;
416
417 pOut->dccRamBaseAlign = metaBlkSize;
418 pOut->metaBlkWidth = metaBlk.w;
419 pOut->metaBlkHeight = metaBlk.h;
420 pOut->metaBlkDepth = metaBlk.d;
421
422 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
423 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
424 pOut->depth = PowTwoAlign(pIn->numSlices, metaBlk.d);
425
426 if (pIn->numMipLevels > 1)
427 {
428 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
429
430 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
431
432 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
433 {
434 UINT_32 mipWidth, mipHeight;
435
436 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
437
438 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
439 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
440
441 const UINT_32 pitchInM = mipWidth / metaBlk.w;
442 const UINT_32 heightInM = mipHeight / metaBlk.h;
443 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
444
445 if (pOut->pMipInfo != NULL)
446 {
447 pOut->pMipInfo[i].inMiptail = FALSE;
448 pOut->pMipInfo[i].offset = offset;
449 pOut->pMipInfo[i].sliceSize = mipSliceSize;
450 }
451
452 offset += mipSliceSize;
453 }
454
455 pOut->dccRamSliceSize = offset;
456 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
457 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
458
459 if (pOut->pMipInfo != NULL)
460 {
461 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
462 {
463 pOut->pMipInfo[i].inMiptail = TRUE;
464 pOut->pMipInfo[i].offset = 0;
465 pOut->pMipInfo[i].sliceSize = 0;
466 }
467
468 if (pIn->firstMipIdInTail != pIn->numMipLevels)
469 {
470 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
471 }
472 }
473 }
474 else
475 {
476 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
477 const UINT_32 heightInM = pOut->height / metaBlk.h;
478
479 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
480 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
481 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
482
483 if (pOut->pMipInfo != NULL)
484 {
485 pOut->pMipInfo[0].inMiptail = FALSE;
486 pOut->pMipInfo[0].offset = 0;
487 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
488 }
489 }
490 }
491
492 return ret;
493 }
494
495 /**
496 ************************************************************************************************************************
497 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
498 *
499 * @brief
500 * Interface function stub of AddrComputeCmaskAddrFromCoord
501 *
502 * @return
503 * ADDR_E_RETURNCODE
504 ************************************************************************************************************************
505 */
506 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
507 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
508 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
509 {
510 // Only support pipe aligned CMask
511 ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
512
513 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
514 input.size = sizeof(input);
515 input.cMaskFlags = pIn->cMaskFlags;
516 input.colorFlags = pIn->colorFlags;
517 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
518 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
519 input.numSlices = Max(pIn->numSlices, 1u);
520 input.swizzleMode = pIn->swizzleMode;
521 input.resourceType = pIn->resourceType;
522
523 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
524 output.size = sizeof(output);
525
526 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
527
528 if (returnCode == ADDR_OK)
529 {
530 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
531 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
532 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
533 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
534 const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? CMASK_VAR_RBPLUS_PATIDX :
535 (m_settings.supportRbPlus ? CMASK_64K_RBPLUS_PATIDX : CMASK_64K_PATIDX);
536
537 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
538 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
539 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(CMASK_SW_PATTERN[patIdxTable[index]],
540 blkSizeLog2 + 1, // +1 for nibble offset
541 pIn->x,
542 pIn->y,
543 pIn->slice,
544 0);
545 const UINT_32 xb = pIn->x / output.metaBlkWidth;
546 const UINT_32 yb = pIn->y / output.metaBlkHeight;
547 const UINT_32 pb = output.pitch / output.metaBlkWidth;
548 const UINT_32 blkIndex = (yb * pb) + xb;
549 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
550
551 pOut->addr = (output.sliceSize * pIn->slice) +
552 (blkIndex * (1 << blkSizeLog2)) +
553 ((blkOffset >> 1) ^ pipeXor);
554 pOut->bitPosition = (blkOffset & 1) << 2;
555 }
556
557 return returnCode;
558 }
559
560 /**
561 ************************************************************************************************************************
562 * Gfx10Lib::HwlComputeHtileAddrFromCoord
563 *
564 * @brief
565 * Interface function stub of AddrComputeHtileAddrFromCoord
566 *
567 * @return
568 * ADDR_E_RETURNCODE
569 ************************************************************************************************************************
570 */
571 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
572 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
573 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
574 {
575 ADDR_E_RETURNCODE returnCode = ADDR_OK;
576
577 if (pIn->numMipLevels > 1)
578 {
579 returnCode = ADDR_NOTIMPLEMENTED;
580 }
581 else
582 {
583 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
584 input.size = sizeof(input);
585 input.hTileFlags = pIn->hTileFlags;
586 input.depthFlags = pIn->depthflags;
587 input.swizzleMode = pIn->swizzleMode;
588 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
589 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
590 input.numSlices = Max(pIn->numSlices, 1u);
591 input.numMipLevels = 1;
592
593 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
594 output.size = sizeof(output);
595
596 returnCode = ComputeHtileInfo(&input, &output);
597
598 if (returnCode == ADDR_OK)
599 {
600 const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
601 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
602 const UINT_32 index = m_xmaskBaseIndex + numSampleLog2;
603 const UINT_8* patIdxTable = m_settings.supportRbPlus ? HTILE_RBPLUS_PATIDX : HTILE_PATIDX;
604
605 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
606 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
607 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(HTILE_SW_PATTERN[patIdxTable[index]],
608 blkSizeLog2 + 1, // +1 for nibble offset
609 pIn->x,
610 pIn->y,
611 pIn->slice,
612 0);
613 const UINT_32 xb = pIn->x / output.metaBlkWidth;
614 const UINT_32 yb = pIn->y / output.metaBlkHeight;
615 const UINT_32 pb = output.pitch / output.metaBlkWidth;
616 const UINT_32 blkIndex = (yb * pb) + xb;
617 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
618
619 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
620 (blkIndex * (1 << blkSizeLog2)) +
621 ((blkOffset >> 1) ^ pipeXor);
622 }
623 }
624
625 return returnCode;
626 }
627
628 /**
629 ************************************************************************************************************************
630 * Gfx10Lib::HwlComputeHtileCoordFromAddr
631 *
632 * @brief
633 * Interface function stub of AddrComputeHtileCoordFromAddr
634 *
635 * @return
636 * ADDR_E_RETURNCODE
637 ************************************************************************************************************************
638 */
639 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
640 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
641 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
642 {
643 ADDR_NOT_IMPLEMENTED();
644
645 return ADDR_OK;
646 }
647
648 /**
649 ************************************************************************************************************************
650 * Gfx10Lib::HwlComputeDccAddrFromCoord
651 *
652 * @brief
653 * Interface function stub of AddrComputeDccAddrFromCoord
654 *
655 * @return
656 * ADDR_E_RETURNCODE
657 ************************************************************************************************************************
658 */
659 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord(
660 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
661 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
662 {
663 ADDR_E_RETURNCODE returnCode = ADDR_OK;
664
665 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
666 (pIn->swizzleMode != ADDR_SW_64KB_R_X) ||
667 (pIn->dccKeyFlags.linear == TRUE) ||
668 (pIn->numFrags > 1) ||
669 (pIn->numMipLevels > 1) ||
670 (pIn->mipId > 0))
671 {
672 returnCode = ADDR_NOTSUPPORTED;
673 }
674 else
675 {
676 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
677 const UINT_32 numPipeLog2 = m_pipesLog2;
678 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
679 UINT_32 index = m_dccBaseIndex + elemLog2;
680 const UINT_8* patIdxTable;
681
682 if (m_settings.supportRbPlus)
683 {
684 patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
685
686 if (pIn->dccKeyFlags.pipeAligned)
687 {
688 index += MaxNumOfBpp;
689
690 if (m_numPkrLog2 < 2)
691 {
692 index += m_pipesLog2 * MaxNumOfBpp;
693 }
694 else
695 {
696 // 4 groups for "m_numPkrLog2 < 2" case
697 index += 4 * MaxNumOfBpp;
698
699 const UINT_32 dccPipePerPkr = 3;
700
701 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
702 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
703 }
704 }
705 }
706 else
707 {
708 patIdxTable = DCC_64K_R_X_PATIDX;
709
710 if (pIn->dccKeyFlags.pipeAligned)
711 {
712 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
713 }
714 else
715 {
716 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
717 }
718 }
719
720 const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
721 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
722 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
723 blkSizeLog2 + 1, // +1 for nibble offset
724 pIn->x,
725 pIn->y,
726 pIn->slice,
727 0);
728 const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
729 const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
730 const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
731 const UINT_32 blkIndex = (yb * pb) + xb;
732 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
733
734 pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
735 (blkIndex * (1 << blkSizeLog2)) +
736 ((blkOffset >> 1) ^ pipeXor);
737 }
738
739 return returnCode;
740 }
741
742 /**
743 ************************************************************************************************************************
744 * Gfx10Lib::HwlInitGlobalParams
745 *
746 * @brief
747 * Initializes global parameters
748 *
749 * @return
750 * TRUE if all settings are valid
751 *
752 ************************************************************************************************************************
753 */
754 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
755 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
756 {
757 BOOL_32 valid = TRUE;
758 GB_ADDR_CONFIG_gfx10 gbAddrConfig;
759
760 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
761
762 // These values are copied from CModel code
763 switch (gbAddrConfig.bits.NUM_PIPES)
764 {
765 case ADDR_CONFIG_1_PIPE:
766 m_pipes = 1;
767 m_pipesLog2 = 0;
768 break;
769 case ADDR_CONFIG_2_PIPE:
770 m_pipes = 2;
771 m_pipesLog2 = 1;
772 break;
773 case ADDR_CONFIG_4_PIPE:
774 m_pipes = 4;
775 m_pipesLog2 = 2;
776 break;
777 case ADDR_CONFIG_8_PIPE:
778 m_pipes = 8;
779 m_pipesLog2 = 3;
780 break;
781 case ADDR_CONFIG_16_PIPE:
782 m_pipes = 16;
783 m_pipesLog2 = 4;
784 break;
785 case ADDR_CONFIG_32_PIPE:
786 m_pipes = 32;
787 m_pipesLog2 = 5;
788 break;
789 case ADDR_CONFIG_64_PIPE:
790 m_pipes = 64;
791 m_pipesLog2 = 6;
792 break;
793 default:
794 ADDR_ASSERT_ALWAYS();
795 valid = FALSE;
796 break;
797 }
798
799 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
800 {
801 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
802 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
803 m_pipeInterleaveLog2 = 8;
804 break;
805 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
806 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
807 m_pipeInterleaveLog2 = 9;
808 break;
809 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
810 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
811 m_pipeInterleaveLog2 = 10;
812 break;
813 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
814 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
815 m_pipeInterleaveLog2 = 11;
816 break;
817 default:
818 ADDR_ASSERT_ALWAYS();
819 valid = FALSE;
820 break;
821 }
822
823 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
824 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
825 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
826 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
827
828 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
829 {
830 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
831 m_maxCompFrag = 1;
832 m_maxCompFragLog2 = 0;
833 break;
834 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
835 m_maxCompFrag = 2;
836 m_maxCompFragLog2 = 1;
837 break;
838 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
839 m_maxCompFrag = 4;
840 m_maxCompFragLog2 = 2;
841 break;
842 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
843 m_maxCompFrag = 8;
844 m_maxCompFragLog2 = 3;
845 break;
846 default:
847 ADDR_ASSERT_ALWAYS();
848 valid = FALSE;
849 break;
850 }
851
852 {
853 // Skip unaligned case
854 m_xmaskBaseIndex += MaxNumOfAA;
855
856 m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA;
857 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
858
859 if (m_settings.supportRbPlus)
860 {
861 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
862 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
863
864 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
865
866 ADDR_C_ASSERT(sizeof(HTILE_RBPLUS_PATIDX) / sizeof(HTILE_RBPLUS_PATIDX[0]) ==
867 sizeof(CMASK_64K_RBPLUS_PATIDX) / sizeof(CMASK_64K_RBPLUS_PATIDX[0]));
868
869 if (m_numPkrLog2 >= 2)
870 {
871 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
872 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
873 }
874 }
875 else
876 {
877 const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
878 static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) +
879 1;
880
881 ADDR_C_ASSERT(sizeof(HTILE_PATIDX) / sizeof(HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
882
883 ADDR_C_ASSERT(sizeof(HTILE_PATIDX) / sizeof(HTILE_PATIDX[0]) ==
884 sizeof(CMASK_64K_PATIDX) / sizeof(CMASK_64K_PATIDX[0]));
885 }
886 }
887
888 if (m_settings.supportRbPlus)
889 {
890 // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
891 // corresponding SW_64KB_* mode
892 m_blockVarSizeLog2 = m_pipesLog2 + 14;
893 }
894
895 if (valid)
896 {
897 InitEquationTable();
898 }
899
900 return valid;
901 }
902
903 /**
904 ************************************************************************************************************************
905 * Gfx10Lib::HwlConvertChipFamily
906 *
907 * @brief
908 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
909 * @return
910 * ChipFamily
911 ************************************************************************************************************************
912 */
913 ChipFamily Gfx10Lib::HwlConvertChipFamily(
914 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
915 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
916 {
917 ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
918
919 m_settings.dccUnsup3DSwDis = 1;
920
921 switch (chipFamily)
922 {
923 case FAMILY_NV:
924 m_settings.isDcn2 = 1;
925
926 if (ASICREV_IS_SIENNA_M(chipRevision))
927 {
928 m_settings.supportRbPlus = 1;
929 m_settings.dccUnsup3DSwDis = 0;
930 }
931 break;
932 default:
933 ADDR_ASSERT(!"Unknown chip family");
934 break;
935 }
936
937 m_settings.dsMipmapHtileFix = 1;
938
939 if (ASICREV_IS_NAVI10_P(chipRevision))
940 {
941 m_settings.dsMipmapHtileFix = 0;
942 }
943
944 m_configFlags.use32bppFor422Fmt = TRUE;
945
946 return family;
947 }
948
949 /**
950 ************************************************************************************************************************
951 * Gfx10Lib::GetBlk256SizeLog2
952 *
953 * @brief
954 * Get block 256 size
955 *
956 * @return
957 * N/A
958 ************************************************************************************************************************
959 */
960 void Gfx10Lib::GetBlk256SizeLog2(
961 AddrResourceType resourceType, ///< [in] Resource type
962 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
963 UINT_32 elemLog2, ///< [in] element size log2
964 UINT_32 numSamplesLog2, ///< [in] number of samples
965 Dim3d* pBlock ///< [out] block size
966 ) const
967 {
968 if (IsThin(resourceType, swizzleMode))
969 {
970 UINT_32 blockBits = 8 - elemLog2;
971
972 if (IsZOrderSwizzle(swizzleMode))
973 {
974 blockBits -= numSamplesLog2;
975 }
976
977 pBlock->w = (blockBits >> 1) + (blockBits & 1);
978 pBlock->h = (blockBits >> 1);
979 pBlock->d = 0;
980 }
981 else
982 {
983 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
984
985 UINT_32 blockBits = 8 - elemLog2;
986
987 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
988 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
989 pBlock->h = (blockBits / 3);
990 }
991 }
992
993 /**
994 ************************************************************************************************************************
995 * Gfx10Lib::GetCompressedBlockSizeLog2
996 *
997 * @brief
998 * Get compress block size
999 *
1000 * @return
1001 * N/A
1002 ************************************************************************************************************************
1003 */
1004 void Gfx10Lib::GetCompressedBlockSizeLog2(
1005 Gfx10DataType dataType, ///< [in] Data type
1006 AddrResourceType resourceType, ///< [in] Resource type
1007 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1008 UINT_32 elemLog2, ///< [in] element size log2
1009 UINT_32 numSamplesLog2, ///< [in] number of samples
1010 Dim3d* pBlock ///< [out] block size
1011 ) const
1012 {
1013 if (dataType == Gfx10DataColor)
1014 {
1015 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1016 }
1017 else
1018 {
1019 ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1020 pBlock->w = 3;
1021 pBlock->h = 3;
1022 pBlock->d = 0;
1023 }
1024 }
1025
1026 /**
1027 ************************************************************************************************************************
1028 * Gfx10Lib::GetMetaOverlapLog2
1029 *
1030 * @brief
1031 * Get meta block overlap
1032 *
1033 * @return
1034 * N/A
1035 ************************************************************************************************************************
1036 */
1037 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1038 Gfx10DataType dataType, ///< [in] Data type
1039 AddrResourceType resourceType, ///< [in] Resource type
1040 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1041 UINT_32 elemLog2, ///< [in] element size log2
1042 UINT_32 numSamplesLog2 ///< [in] number of samples
1043 ) const
1044 {
1045 Dim3d compBlock;
1046 Dim3d microBlock;
1047
1048 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1049 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
1050
1051 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
1052 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1053 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
1054 const INT_32 numPipesLog2 = GetEffectiveNumPipes();
1055 INT_32 overlap = numPipesLog2 - maxSizeLog2;
1056
1057 if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1058 {
1059 overlap++;
1060 }
1061
1062 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1063 if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1064 {
1065 overlap--;
1066 }
1067 overlap = Max(overlap, 0);
1068 return overlap;
1069 }
1070
1071 /**
1072 ************************************************************************************************************************
1073 * Gfx10Lib::Get3DMetaOverlapLog2
1074 *
1075 * @brief
1076 * Get 3d meta block overlap
1077 *
1078 * @return
1079 * N/A
1080 ************************************************************************************************************************
1081 */
1082 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1083 AddrResourceType resourceType, ///< [in] Resource type
1084 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1085 UINT_32 elemLog2 ///< [in] element size log2
1086 ) const
1087 {
1088 Dim3d microBlock;
1089 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
1090
1091 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1092
1093 if (m_settings.supportRbPlus)
1094 {
1095 overlap++;
1096 }
1097
1098 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1099 {
1100 overlap = 0;
1101 }
1102 return overlap;
1103 }
1104
1105 /**
1106 ************************************************************************************************************************
1107 * Gfx10Lib::GetPipeRotateAmount
1108 *
1109 * @brief
1110 * Get pipe rotate amount
1111 *
1112 * @return
1113 * Pipe rotate amount
1114 ************************************************************************************************************************
1115 */
1116
1117 INT_32 Gfx10Lib::GetPipeRotateAmount(
1118 AddrResourceType resourceType, ///< [in] Resource type
1119 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
1120 ) const
1121 {
1122 INT_32 amount = 0;
1123
1124 if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1125 {
1126 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1127 1 : m_pipesLog2 - (m_numSaLog2 + 1);
1128 }
1129
1130 return amount;
1131 }
1132
1133 /**
1134 ************************************************************************************************************************
1135 * Gfx10Lib::GetMetaBlkSize
1136 *
1137 * @brief
1138 * Get metadata block size
1139 *
1140 * @return
1141 * Meta block size
1142 ************************************************************************************************************************
1143 */
1144 UINT_32 Gfx10Lib::GetMetaBlkSize(
1145 Gfx10DataType dataType, ///< [in] Data type
1146 AddrResourceType resourceType, ///< [in] Resource type
1147 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1148 UINT_32 elemLog2, ///< [in] element size log2
1149 UINT_32 numSamplesLog2, ///< [in] number of samples
1150 BOOL_32 pipeAlign, ///< [in] pipe align
1151 Dim3d* pBlock ///< [out] block size
1152 ) const
1153 {
1154 INT_32 metablkSizeLog2;
1155 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
1156 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
1157 const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1158 const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1159 numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1160 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
1161 INT_32 numPipesLog2 = m_pipesLog2;
1162
1163 if (IsThin(resourceType, swizzleMode))
1164 {
1165 if ((pipeAlign == FALSE) ||
1166 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1167 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
1168 {
1169 if (pipeAlign)
1170 {
1171 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1172 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1173 }
1174 else
1175 {
1176 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1177 }
1178 }
1179 else
1180 {
1181 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1182 {
1183 numPipesLog2++;
1184 }
1185
1186 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1187
1188 if (numPipesLog2 >= 4)
1189 {
1190 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1191
1192 // In 16Bpe 8xaa, we have an extra overlap bit
1193 if ((pipeRotateLog2 > 0) &&
1194 (elemLog2 == 4) &&
1195 (numSamplesLog2 == 3) &&
1196 (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1197 {
1198 overlapLog2++;
1199 }
1200
1201 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1202 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1203
1204 if (m_settings.supportRbPlus &&
1205 IsRtOptSwizzle(swizzleMode) &&
1206 (numPipesLog2 == 6) &&
1207 (numSamplesLog2 == 3) &&
1208 (m_maxCompFragLog2 == 3) &&
1209 (metablkSizeLog2 < 15))
1210 {
1211 metablkSizeLog2 = 15;
1212 }
1213 }
1214 else
1215 {
1216 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1217 }
1218
1219 if (dataType == Gfx10DataDepthStencil)
1220 {
1221 // For htile surfaces, pad meta block size to 2K * num_pipes
1222 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1223 }
1224
1225 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1226
1227 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1228 {
1229 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1230
1231 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1232 }
1233 }
1234
1235 const INT_32 metablkBitsLog2 =
1236 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1237 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1238 pBlock->h = 1 << (metablkBitsLog2 >> 1);
1239 pBlock->d = 1;
1240 }
1241 else
1242 {
1243 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1244
1245 if (pipeAlign)
1246 {
1247 if (m_settings.supportRbPlus &&
1248 (m_pipesLog2 == m_numSaLog2 + 1) &&
1249 (m_pipesLog2 > 1) &&
1250 IsRbAligned(resourceType, swizzleMode))
1251 {
1252 numPipesLog2++;
1253 }
1254
1255 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1256
1257 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1258 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1259 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1260 }
1261 else
1262 {
1263 metablkSizeLog2 = 12;
1264 }
1265
1266 const INT_32 metablkBitsLog2 =
1267 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1268 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1269 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1270 pBlock->d = 1 << (metablkBitsLog2 / 3);
1271 }
1272
1273 return (1 << static_cast<UINT_32>(metablkSizeLog2));
1274 }
1275
1276 /**
1277 ************************************************************************************************************************
1278 * Gfx10Lib::ConvertSwizzlePatternToEquation
1279 *
1280 * @brief
1281 * Convert swizzle pattern to equation.
1282 *
1283 * @return
1284 * N/A
1285 ************************************************************************************************************************
1286 */
1287 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1288 UINT_32 elemLog2, ///< [in] element bytes log2
1289 AddrResourceType rsrcType, ///< [in] resource type
1290 AddrSwizzleMode swMode, ///< [in] swizzle mode
1291 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor
1292 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1293 const
1294 {
1295 ADDR_BIT_SETTING fullSwizzlePattern[20];
1296 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1297
1298 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
1299 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1300
1301 pEquation->numBits = blockSizeLog2;
1302 pEquation->stackedDepthSlices = FALSE;
1303
1304 for (UINT_32 i = 0; i < elemLog2; i++)
1305 {
1306 pEquation->addr[i].channel = 0;
1307 pEquation->addr[i].valid = 1;
1308 pEquation->addr[i].index = i;
1309 }
1310
1311 if (IsXor(swMode) == FALSE)
1312 {
1313 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1314 {
1315 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1316
1317 if (pSwizzle[i].x != 0)
1318 {
1319 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1320
1321 pEquation->addr[i].channel = 0;
1322 pEquation->addr[i].valid = 1;
1323 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1324 }
1325 else if (pSwizzle[i].y != 0)
1326 {
1327 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1328
1329 pEquation->addr[i].channel = 1;
1330 pEquation->addr[i].valid = 1;
1331 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1332 }
1333 else
1334 {
1335 ADDR_ASSERT(pSwizzle[i].z != 0);
1336 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1337
1338 pEquation->addr[i].channel = 2;
1339 pEquation->addr[i].valid = 1;
1340 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1341 }
1342
1343 pEquation->xor1[i].value = 0;
1344 pEquation->xor2[i].value = 0;
1345 }
1346 }
1347 else if (IsThin(rsrcType, swMode))
1348 {
1349 Dim3d dim;
1350 ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1351
1352 const UINT_32 blkXLog2 = Log2(dim.w);
1353 const UINT_32 blkYLog2 = Log2(dim.h);
1354 const UINT_32 blkXMask = dim.w - 1;
1355 const UINT_32 blkYMask = dim.h - 1;
1356
1357 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1358 UINT_32 xMask = 0;
1359 UINT_32 yMask = 0;
1360 UINT_32 bMask = (1 << elemLog2) - 1;
1361
1362 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1363 {
1364 if (IsPow2(pSwizzle[i].value))
1365 {
1366 if (pSwizzle[i].x != 0)
1367 {
1368 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1369 xMask |= pSwizzle[i].x;
1370
1371 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1372
1373 ADDR_ASSERT(xLog2 < blkXLog2);
1374
1375 pEquation->addr[i].channel = 0;
1376 pEquation->addr[i].valid = 1;
1377 pEquation->addr[i].index = xLog2 + elemLog2;
1378 }
1379 else
1380 {
1381 ADDR_ASSERT(pSwizzle[i].y != 0);
1382 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1383 yMask |= pSwizzle[i].y;
1384
1385 pEquation->addr[i].channel = 1;
1386 pEquation->addr[i].valid = 1;
1387 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1388
1389 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1390 }
1391
1392 swizzle[i].value = 0;
1393 bMask |= 1 << i;
1394 }
1395 else
1396 {
1397 if (pSwizzle[i].z != 0)
1398 {
1399 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1400
1401 pEquation->xor2[i].channel = 2;
1402 pEquation->xor2[i].valid = 1;
1403 pEquation->xor2[i].index = Log2(pSwizzle[i].z);
1404 }
1405
1406 swizzle[i].x = pSwizzle[i].x;
1407 swizzle[i].y = pSwizzle[i].y;
1408 swizzle[i].z = swizzle[i].s = 0;
1409
1410 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1411
1412 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1413
1414 if (xHi != 0)
1415 {
1416 ADDR_ASSERT(IsPow2(xHi));
1417 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1418
1419 pEquation->xor1[i].channel = 0;
1420 pEquation->xor1[i].valid = 1;
1421 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1422
1423 swizzle[i].x &= blkXMask;
1424 }
1425
1426 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1427
1428 if (yHi != 0)
1429 {
1430 ADDR_ASSERT(IsPow2(yHi));
1431
1432 if (xHi == 0)
1433 {
1434 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1435 pEquation->xor1[i].channel = 1;
1436 pEquation->xor1[i].valid = 1;
1437 pEquation->xor1[i].index = Log2(yHi);
1438 }
1439 else
1440 {
1441 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1442 pEquation->xor2[i].channel = 1;
1443 pEquation->xor2[i].valid = 1;
1444 pEquation->xor2[i].index = Log2(yHi);
1445 }
1446
1447 swizzle[i].y &= blkYMask;
1448 }
1449
1450 if (swizzle[i].value == 0)
1451 {
1452 bMask |= 1 << i;
1453 }
1454 }
1455 }
1456
1457 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1458 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1459
1460 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1461
1462 while (bMask != blockMask)
1463 {
1464 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1465 {
1466 if ((bMask & (1 << i)) == 0)
1467 {
1468 if (IsPow2(swizzle[i].value))
1469 {
1470 if (swizzle[i].x != 0)
1471 {
1472 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1473 xMask |= swizzle[i].x;
1474
1475 const UINT_32 xLog2 = Log2(swizzle[i].x);
1476
1477 ADDR_ASSERT(xLog2 < blkXLog2);
1478
1479 pEquation->addr[i].channel = 0;
1480 pEquation->addr[i].valid = 1;
1481 pEquation->addr[i].index = xLog2 + elemLog2;
1482 }
1483 else
1484 {
1485 ADDR_ASSERT(swizzle[i].y != 0);
1486 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1487 yMask |= swizzle[i].y;
1488
1489 pEquation->addr[i].channel = 1;
1490 pEquation->addr[i].valid = 1;
1491 pEquation->addr[i].index = Log2(swizzle[i].y);
1492
1493 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1494 }
1495
1496 swizzle[i].value = 0;
1497 bMask |= 1 << i;
1498 }
1499 else
1500 {
1501 const UINT_32 x = swizzle[i].x & xMask;
1502 const UINT_32 y = swizzle[i].y & yMask;
1503
1504 if (x != 0)
1505 {
1506 ADDR_ASSERT(IsPow2(x));
1507
1508 if (pEquation->xor1[i].value == 0)
1509 {
1510 pEquation->xor1[i].channel = 0;
1511 pEquation->xor1[i].valid = 1;
1512 pEquation->xor1[i].index = Log2(x) + elemLog2;
1513 }
1514 else
1515 {
1516 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1517 pEquation->xor2[i].channel = 0;
1518 pEquation->xor2[i].valid = 1;
1519 pEquation->xor2[i].index = Log2(x) + elemLog2;
1520 }
1521 }
1522
1523 if (y != 0)
1524 {
1525 ADDR_ASSERT(IsPow2(y));
1526
1527 if (pEquation->xor1[i].value == 0)
1528 {
1529 pEquation->xor1[i].channel = 1;
1530 pEquation->xor1[i].valid = 1;
1531 pEquation->xor1[i].index = Log2(y);
1532 }
1533 else
1534 {
1535 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1536 pEquation->xor2[i].channel = 1;
1537 pEquation->xor2[i].valid = 1;
1538 pEquation->xor2[i].index = Log2(y);
1539 }
1540 }
1541
1542 swizzle[i].x &= ~x;
1543 swizzle[i].y &= ~y;
1544 }
1545 }
1546 }
1547 }
1548
1549 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1550 }
1551 else
1552 {
1553 const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1554 const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1555 const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1556 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1557 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1558 const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1559
1560 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1561 UINT_32 xMask = 0;
1562 UINT_32 yMask = 0;
1563 UINT_32 zMask = 0;
1564 UINT_32 bMask = (1 << elemLog2) - 1;
1565
1566 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1567 {
1568 if (IsPow2(pSwizzle[i].value))
1569 {
1570 if (pSwizzle[i].x != 0)
1571 {
1572 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1573 xMask |= pSwizzle[i].x;
1574
1575 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1576
1577 ADDR_ASSERT(xLog2 < blkXLog2);
1578
1579 pEquation->addr[i].channel = 0;
1580 pEquation->addr[i].valid = 1;
1581 pEquation->addr[i].index = xLog2 + elemLog2;
1582 }
1583 else if (pSwizzle[i].y != 0)
1584 {
1585 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1586 yMask |= pSwizzle[i].y;
1587
1588 pEquation->addr[i].channel = 1;
1589 pEquation->addr[i].valid = 1;
1590 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1591
1592 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1593 }
1594 else
1595 {
1596 ADDR_ASSERT(pSwizzle[i].z != 0);
1597 ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1598 zMask |= pSwizzle[i].z;
1599
1600 pEquation->addr[i].channel = 2;
1601 pEquation->addr[i].valid = 1;
1602 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1603
1604 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1605 }
1606
1607 swizzle[i].value = 0;
1608 bMask |= 1 << i;
1609 }
1610 else
1611 {
1612 swizzle[i].x = pSwizzle[i].x;
1613 swizzle[i].y = pSwizzle[i].y;
1614 swizzle[i].z = pSwizzle[i].z;
1615 swizzle[i].s = 0;
1616
1617 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1618
1619 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1620 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1621 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1622
1623 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1624
1625 if (xHi != 0)
1626 {
1627 ADDR_ASSERT(IsPow2(xHi));
1628 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1629
1630 pEquation->xor1[i].channel = 0;
1631 pEquation->xor1[i].valid = 1;
1632 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1633
1634 swizzle[i].x &= blkXMask;
1635 }
1636
1637 if (yHi != 0)
1638 {
1639 ADDR_ASSERT(IsPow2(yHi));
1640
1641 if (pEquation->xor1[i].value == 0)
1642 {
1643 pEquation->xor1[i].channel = 1;
1644 pEquation->xor1[i].valid = 1;
1645 pEquation->xor1[i].index = Log2(yHi);
1646 }
1647 else
1648 {
1649 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1650 pEquation->xor2[i].channel = 1;
1651 pEquation->xor2[i].valid = 1;
1652 pEquation->xor2[i].index = Log2(yHi);
1653 }
1654
1655 swizzle[i].y &= blkYMask;
1656 }
1657
1658 if (zHi != 0)
1659 {
1660 ADDR_ASSERT(IsPow2(zHi));
1661
1662 if (pEquation->xor1[i].value == 0)
1663 {
1664 pEquation->xor1[i].channel = 2;
1665 pEquation->xor1[i].valid = 1;
1666 pEquation->xor1[i].index = Log2(zHi);
1667 }
1668 else
1669 {
1670 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1671 pEquation->xor2[i].channel = 2;
1672 pEquation->xor2[i].valid = 1;
1673 pEquation->xor2[i].index = Log2(zHi);
1674 }
1675
1676 swizzle[i].z &= blkZMask;
1677 }
1678
1679 if (swizzle[i].value == 0)
1680 {
1681 bMask |= 1 << i;
1682 }
1683 }
1684 }
1685
1686 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1687 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1688
1689 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1690
1691 while (bMask != blockMask)
1692 {
1693 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1694 {
1695 if ((bMask & (1 << i)) == 0)
1696 {
1697 if (IsPow2(swizzle[i].value))
1698 {
1699 if (swizzle[i].x != 0)
1700 {
1701 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1702 xMask |= swizzle[i].x;
1703
1704 const UINT_32 xLog2 = Log2(swizzle[i].x);
1705
1706 ADDR_ASSERT(xLog2 < blkXLog2);
1707
1708 pEquation->addr[i].channel = 0;
1709 pEquation->addr[i].valid = 1;
1710 pEquation->addr[i].index = xLog2 + elemLog2;
1711 }
1712 else if (swizzle[i].y != 0)
1713 {
1714 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1715 yMask |= swizzle[i].y;
1716
1717 pEquation->addr[i].channel = 1;
1718 pEquation->addr[i].valid = 1;
1719 pEquation->addr[i].index = Log2(swizzle[i].y);
1720
1721 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1722 }
1723 else
1724 {
1725 ADDR_ASSERT(swizzle[i].z != 0);
1726 ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1727 zMask |= swizzle[i].z;
1728
1729 pEquation->addr[i].channel = 2;
1730 pEquation->addr[i].valid = 1;
1731 pEquation->addr[i].index = Log2(swizzle[i].z);
1732
1733 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1734 }
1735
1736 swizzle[i].value = 0;
1737 bMask |= 1 << i;
1738 }
1739 else
1740 {
1741 const UINT_32 x = swizzle[i].x & xMask;
1742 const UINT_32 y = swizzle[i].y & yMask;
1743 const UINT_32 z = swizzle[i].z & zMask;
1744
1745 if (x != 0)
1746 {
1747 ADDR_ASSERT(IsPow2(x));
1748
1749 if (pEquation->xor1[i].value == 0)
1750 {
1751 pEquation->xor1[i].channel = 0;
1752 pEquation->xor1[i].valid = 1;
1753 pEquation->xor1[i].index = Log2(x) + elemLog2;
1754 }
1755 else
1756 {
1757 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1758 pEquation->xor2[i].channel = 0;
1759 pEquation->xor2[i].valid = 1;
1760 pEquation->xor2[i].index = Log2(x) + elemLog2;
1761 }
1762 }
1763
1764 if (y != 0)
1765 {
1766 ADDR_ASSERT(IsPow2(y));
1767
1768 if (pEquation->xor1[i].value == 0)
1769 {
1770 pEquation->xor1[i].channel = 1;
1771 pEquation->xor1[i].valid = 1;
1772 pEquation->xor1[i].index = Log2(y);
1773 }
1774 else
1775 {
1776 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1777 pEquation->xor2[i].channel = 1;
1778 pEquation->xor2[i].valid = 1;
1779 pEquation->xor2[i].index = Log2(y);
1780 }
1781 }
1782
1783 if (z != 0)
1784 {
1785 ADDR_ASSERT(IsPow2(z));
1786
1787 if (pEquation->xor1[i].value == 0)
1788 {
1789 pEquation->xor1[i].channel = 2;
1790 pEquation->xor1[i].valid = 1;
1791 pEquation->xor1[i].index = Log2(z);
1792 }
1793 else
1794 {
1795 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1796 pEquation->xor2[i].channel = 2;
1797 pEquation->xor2[i].valid = 1;
1798 pEquation->xor2[i].index = Log2(z);
1799 }
1800 }
1801
1802 swizzle[i].x &= ~x;
1803 swizzle[i].y &= ~y;
1804 swizzle[i].z &= ~z;
1805 }
1806 }
1807 }
1808 }
1809
1810 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1811 }
1812 }
1813
1814 /**
1815 ************************************************************************************************************************
1816 * Gfx10Lib::InitEquationTable
1817 *
1818 * @brief
1819 * Initialize Equation table.
1820 *
1821 * @return
1822 * N/A
1823 ************************************************************************************************************************
1824 */
1825 VOID Gfx10Lib::InitEquationTable()
1826 {
1827 memset(m_equationTable, 0, sizeof(m_equationTable));
1828
1829 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1830 {
1831 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1832
1833 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1834 {
1835 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1836
1837 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1838 {
1839 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1840 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1841
1842 if (pPatInfo != NULL)
1843 {
1844 ADDR_ASSERT(IsValidSwMode(swMode));
1845
1846 if (pPatInfo->maxItemCount <= 3)
1847 {
1848 ADDR_EQUATION equation = {};
1849
1850 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
1851
1852 equationIndex = m_numEquations;
1853 ADDR_ASSERT(equationIndex < EquationTableSize);
1854
1855 m_equationTable[equationIndex] = equation;
1856
1857 m_numEquations++;
1858 }
1859 else
1860 {
1861 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
1862 ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
1863 ADDR_ASSERT(rsrcTypeIdx == 1);
1864 ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
1865 ADDR_ASSERT(m_settings.supportRbPlus == 1);
1866 }
1867 }
1868
1869 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1870 }
1871 }
1872 }
1873 }
1874
1875 /**
1876 ************************************************************************************************************************
1877 * Gfx10Lib::HwlGetEquationIndex
1878 *
1879 * @brief
1880 * Interface function stub of GetEquationIndex
1881 *
1882 * @return
1883 * ADDR_E_RETURNCODE
1884 ************************************************************************************************************************
1885 */
1886 UINT_32 Gfx10Lib::HwlGetEquationIndex(
1887 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
1888 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
1889 ) const
1890 {
1891 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1892
1893 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1894 (pIn->resourceType == ADDR_RSRC_TEX_3D))
1895 {
1896 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1897 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
1898 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
1899
1900 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1901 }
1902
1903 if (pOut->pMipInfo != NULL)
1904 {
1905 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1906 {
1907 pOut->pMipInfo[i].equationIndex = equationIdx;
1908 }
1909 }
1910
1911 return equationIdx;
1912 }
1913
1914 /**
1915 ************************************************************************************************************************
1916 * Gfx10Lib::IsValidDisplaySwizzleMode
1917 *
1918 * @brief
1919 * Check if a swizzle mode is supported by display engine
1920 *
1921 * @return
1922 * TRUE is swizzle mode is supported by display engine
1923 ************************************************************************************************************************
1924 */
1925 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
1926 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
1927 ) const
1928 {
1929 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
1930
1931 BOOL_32 support = FALSE;
1932
1933 if (m_settings.isDcn2)
1934 {
1935 switch (pIn->swizzleMode)
1936 {
1937 case ADDR_SW_4KB_D:
1938 case ADDR_SW_4KB_D_X:
1939 case ADDR_SW_64KB_D:
1940 case ADDR_SW_64KB_D_T:
1941 case ADDR_SW_64KB_D_X:
1942 support = (pIn->bpp == 64);
1943 break;
1944
1945 case ADDR_SW_LINEAR:
1946 case ADDR_SW_4KB_S:
1947 case ADDR_SW_4KB_S_X:
1948 case ADDR_SW_64KB_S:
1949 case ADDR_SW_64KB_S_T:
1950 case ADDR_SW_64KB_S_X:
1951 case ADDR_SW_64KB_R_X:
1952 support = (pIn->bpp <= 64);
1953 break;
1954
1955 default:
1956 break;
1957 }
1958 }
1959 else
1960 {
1961 ADDR_NOT_IMPLEMENTED();
1962 }
1963
1964 return support;
1965 }
1966
1967 /**
1968 ************************************************************************************************************************
1969 * Gfx10Lib::GetMaxNumMipsInTail
1970 *
1971 * @brief
1972 * Return max number of mips in tails
1973 *
1974 * @return
1975 * Max number of mips in tails
1976 ************************************************************************************************************************
1977 */
1978 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
1979 UINT_32 blockSizeLog2, ///< block size log2
1980 BOOL_32 isThin ///< is thin or thick
1981 ) const
1982 {
1983 UINT_32 effectiveLog2 = blockSizeLog2;
1984
1985 if (isThin == FALSE)
1986 {
1987 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
1988 }
1989
1990 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
1991 }
1992
1993 /**
1994 ************************************************************************************************************************
1995 * Gfx10Lib::HwlComputePipeBankXor
1996 *
1997 * @brief
1998 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
1999 *
2000 * @return
2001 * PipeBankXor value
2002 ************************************************************************************************************************
2003 */
2004 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2005 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2006 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2007 ) const
2008 {
2009 if (IsNonPrtXor(pIn->swizzleMode))
2010 {
2011 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2012 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2013 const UINT_32 bankBits = GetBankXorBits(blockBits);
2014
2015 UINT_32 pipeXor = 0;
2016 UINT_32 bankXor = 0;
2017
2018 if (bankBits != 0)
2019 {
2020 if (blockBits == 16)
2021 {
2022 const UINT_32 XorPatternLen = 8;
2023 static const UINT_32 XorBank1b[XorPatternLen] = {0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80};
2024 static const UINT_32 XorBank2b[XorPatternLen] = {0x00, 0x80, 0x40, 0xC0, 0x80, 0x00, 0xC0, 0x40};
2025 static const UINT_32 XorBank3b[XorPatternLen] = {0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0};
2026
2027 const UINT_32 index = pIn->surfIndex % XorPatternLen;
2028
2029 if (bankBits == 1)
2030 {
2031 bankXor = XorBank1b[index];
2032 }
2033 else if (bankBits == 2)
2034 {
2035 bankXor = XorBank2b[index];
2036 }
2037 else
2038 {
2039 bankXor = XorBank3b[index];
2040
2041 if (bankBits == 4)
2042 {
2043 bankXor >>= (2 - pipeBits);
2044 }
2045 }
2046 }
2047 }
2048
2049 pOut->pipeBankXor = bankXor | pipeXor;
2050 }
2051 else
2052 {
2053 pOut->pipeBankXor = 0;
2054 }
2055
2056 return ADDR_OK;
2057 }
2058
2059 /**
2060 ************************************************************************************************************************
2061 * Gfx10Lib::HwlComputeSlicePipeBankXor
2062 *
2063 * @brief
2064 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2065 *
2066 * @return
2067 * PipeBankXor value
2068 ************************************************************************************************************************
2069 */
2070 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2071 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2072 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2073 ) const
2074 {
2075 if (IsNonPrtXor(pIn->swizzleMode))
2076 {
2077 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2078 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2079 const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2080
2081 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2082 }
2083 else
2084 {
2085 pOut->pipeBankXor = 0;
2086 }
2087
2088 return ADDR_OK;
2089 }
2090
2091 /**
2092 ************************************************************************************************************************
2093 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2094 *
2095 * @brief
2096 * Compute sub resource offset to support swizzle pattern
2097 *
2098 * @return
2099 * Offset
2100 ************************************************************************************************************************
2101 */
2102 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2103 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
2104 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
2105 ) const
2106 {
2107 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2108
2109 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2110
2111 return ADDR_OK;
2112 }
2113
2114 /**
2115 ************************************************************************************************************************
2116 * Gfx10Lib::ValidateNonSwModeParams
2117 *
2118 * @brief
2119 * Validate compute surface info params except swizzle mode
2120 *
2121 * @return
2122 * TRUE if parameters are valid, FALSE otherwise
2123 ************************************************************************************************************************
2124 */
2125 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2126 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2127 {
2128 BOOL_32 valid = TRUE;
2129
2130 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2131 {
2132 ADDR_ASSERT_ALWAYS();
2133 valid = FALSE;
2134 }
2135
2136 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2137 {
2138 ADDR_ASSERT_ALWAYS();
2139 valid = FALSE;
2140 }
2141
2142 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2143 const AddrResourceType rsrcType = pIn->resourceType;
2144 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2145 const BOOL_32 msaa = (pIn->numFrags > 1);
2146 const BOOL_32 display = flags.display;
2147 const BOOL_32 tex3d = IsTex3d(rsrcType);
2148 const BOOL_32 tex2d = IsTex2d(rsrcType);
2149 const BOOL_32 tex1d = IsTex1d(rsrcType);
2150 const BOOL_32 stereo = flags.qbStereo;
2151
2152 // Resource type check
2153 if (tex1d)
2154 {
2155 if (msaa || display || stereo)
2156 {
2157 ADDR_ASSERT_ALWAYS();
2158 valid = FALSE;
2159 }
2160 }
2161 else if (tex2d)
2162 {
2163 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2164 {
2165 ADDR_ASSERT_ALWAYS();
2166 valid = FALSE;
2167 }
2168 }
2169 else if (tex3d)
2170 {
2171 if (msaa || display || stereo)
2172 {
2173 ADDR_ASSERT_ALWAYS();
2174 valid = FALSE;
2175 }
2176 }
2177 else
2178 {
2179 ADDR_ASSERT_ALWAYS();
2180 valid = FALSE;
2181 }
2182
2183 return valid;
2184 }
2185
2186 /**
2187 ************************************************************************************************************************
2188 * Gfx10Lib::ValidateSwModeParams
2189 *
2190 * @brief
2191 * Validate compute surface info related to swizzle mode
2192 *
2193 * @return
2194 * TRUE if parameters are valid, FALSE otherwise
2195 ************************************************************************************************************************
2196 */
2197 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2198 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2199 {
2200 BOOL_32 valid = TRUE;
2201
2202 if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
2203 {
2204 ADDR_ASSERT_ALWAYS();
2205 valid = FALSE;
2206 }
2207
2208 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2209 const AddrResourceType rsrcType = pIn->resourceType;
2210 const AddrSwizzleMode swizzle = pIn->swizzleMode;
2211 const BOOL_32 msaa = (pIn->numFrags > 1);
2212 const BOOL_32 zbuffer = flags.depth || flags.stencil;
2213 const BOOL_32 color = flags.color;
2214 const BOOL_32 display = flags.display;
2215 const BOOL_32 tex3d = IsTex3d(rsrcType);
2216 const BOOL_32 tex2d = IsTex2d(rsrcType);
2217 const BOOL_32 tex1d = IsTex1d(rsrcType);
2218 const BOOL_32 thin3d = flags.view3dAs2dArray;
2219 const BOOL_32 linear = IsLinear(swizzle);
2220 const BOOL_32 blk256B = IsBlock256b(swizzle);
2221 const BOOL_32 blkVar = IsBlockVariable(swizzle);
2222 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2223 const BOOL_32 prt = flags.prt;
2224 const BOOL_32 fmask = flags.fmask;
2225
2226 // Misc check
2227 if ((pIn->numFrags > 1) &&
2228 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2229 {
2230 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2231 ADDR_ASSERT_ALWAYS();
2232 valid = FALSE;
2233 }
2234
2235 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2236 {
2237 ADDR_ASSERT_ALWAYS();
2238 valid = FALSE;
2239 }
2240
2241 if ((pIn->bpp == 96) && (linear == FALSE))
2242 {
2243 ADDR_ASSERT_ALWAYS();
2244 valid = FALSE;
2245 }
2246
2247 const UINT_32 swizzleMask = 1 << swizzle;
2248
2249 // Resource type check
2250 if (tex1d)
2251 {
2252 if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2253 {
2254 ADDR_ASSERT_ALWAYS();
2255 valid = FALSE;
2256 }
2257 }
2258 else if (tex2d)
2259 {
2260 if (((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0) ||
2261 (prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2262 (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2263 {
2264 ADDR_ASSERT_ALWAYS();
2265 valid = FALSE;
2266 }
2267 }
2268 else if (tex3d)
2269 {
2270 if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2271 (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2272 (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2273 {
2274 ADDR_ASSERT_ALWAYS();
2275 valid = FALSE;
2276 }
2277 }
2278
2279 // Swizzle type check
2280 if (linear)
2281 {
2282 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2283 {
2284 ADDR_ASSERT_ALWAYS();
2285 valid = FALSE;
2286 }
2287 }
2288 else if (IsZOrderSwizzle(swizzle))
2289 {
2290 if ((pIn->bpp > 64) ||
2291 (msaa && (color || (pIn->bpp > 32))) ||
2292 ElemLib::IsBlockCompressed(pIn->format) ||
2293 ElemLib::IsMacroPixelPacked(pIn->format))
2294 {
2295 ADDR_ASSERT_ALWAYS();
2296 valid = FALSE;
2297 }
2298 }
2299 else if (IsStandardSwizzle(rsrcType, swizzle))
2300 {
2301 if (zbuffer || msaa)
2302 {
2303 ADDR_ASSERT_ALWAYS();
2304 valid = FALSE;
2305 }
2306 }
2307 else if (IsDisplaySwizzle(rsrcType, swizzle))
2308 {
2309 if (zbuffer || msaa)
2310 {
2311 ADDR_ASSERT_ALWAYS();
2312 valid = FALSE;
2313 }
2314 }
2315 else if (IsRtOptSwizzle(swizzle))
2316 {
2317 if (zbuffer)
2318 {
2319 ADDR_ASSERT_ALWAYS();
2320 valid = FALSE;
2321 }
2322 }
2323 else
2324 {
2325 ADDR_ASSERT_ALWAYS();
2326 valid = FALSE;
2327 }
2328
2329 // Block type check
2330 if (blk256B)
2331 {
2332 if (zbuffer || tex3d || msaa)
2333 {
2334 ADDR_ASSERT_ALWAYS();
2335 valid = FALSE;
2336 }
2337 }
2338 else if (blkVar)
2339 {
2340 if (m_blockVarSizeLog2 == 0)
2341 {
2342 ADDR_ASSERT_ALWAYS();
2343 valid = FALSE;
2344 }
2345 }
2346
2347 return valid;
2348 }
2349
2350 /**
2351 ************************************************************************************************************************
2352 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2353 *
2354 * @brief
2355 * Compute surface info sanity check
2356 *
2357 * @return
2358 * Offset
2359 ************************************************************************************************************************
2360 */
2361 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2362 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2363 ) const
2364 {
2365 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2366 }
2367
2368 /**
2369 ************************************************************************************************************************
2370 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2371 *
2372 * @brief
2373 * Internal function to get suggested surface information for cliet to use
2374 *
2375 * @return
2376 * ADDR_E_RETURNCODE
2377 ************************************************************************************************************************
2378 */
2379 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2380 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2381 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2382 ) const
2383 {
2384 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2385
2386 if (pIn->flags.fmask)
2387 {
2388 const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2389 const BOOL_32 forbidVarBlockType = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2390
2391 if (forbid64KbBlockType && forbidVarBlockType)
2392 {
2393 // Invalid combination...
2394 ADDR_ASSERT_ALWAYS();
2395 returnCode = ADDR_INVALIDPARAMS;
2396 }
2397 else
2398 {
2399 pOut->resourceType = ADDR_RSRC_TEX_2D;
2400 pOut->validBlockSet.value = 0;
2401 pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1;
2402 pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1;
2403 pOut->validSwModeSet.value = 0;
2404 pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1;
2405 pOut->validSwModeSet.swVar_Z_X = forbidVarBlockType ? 0 : 1;
2406 pOut->canXor = TRUE;
2407 pOut->validSwTypeSet.value = AddrSwSetZ;
2408 pOut->clientPreferredSwSet = pOut->validSwTypeSet;
2409
2410 BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2411
2412 if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2413 {
2414 const UINT_8 maxFmaskSwizzleModeType = 2;
2415 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2416 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2417 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2418 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2419 const UINT_32 width = Max(pIn->width, 1u);
2420 const UINT_32 height = Max(pIn->height, 1u);
2421 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2422
2423 AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2424 Dim3d blkDim[maxFmaskSwizzleModeType] = {{0}, {0}};
2425 Dim3d padDim[maxFmaskSwizzleModeType] = {{0}, {0}};
2426 UINT_64 padSize[maxFmaskSwizzleModeType] = {0};
2427
2428 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2429 {
2430 ComputeBlockDimensionForSurf(&blkDim[i].w,
2431 &blkDim[i].h,
2432 &blkDim[i].d,
2433 fmaskBpp,
2434 1,
2435 pOut->resourceType,
2436 swMode[i]);
2437
2438 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2439 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2440 }
2441
2442 if (GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0]))
2443 {
2444 if ((padSize[1] * ratioHi) <= (padSize[0] * ratioLow))
2445 {
2446 use64KbBlockType = FALSE;
2447 }
2448 }
2449 else
2450 {
2451 if ((padSize[1] * ratioLow) < (padSize[0] * ratioHi))
2452 {
2453 use64KbBlockType = FALSE;
2454 }
2455 }
2456 }
2457 else if (forbidVarBlockType)
2458 {
2459 use64KbBlockType = TRUE;
2460 }
2461
2462 if (use64KbBlockType)
2463 {
2464 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2465 }
2466 else
2467 {
2468 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2469 }
2470 }
2471 }
2472 else
2473 {
2474 UINT_32 bpp = pIn->bpp;
2475 UINT_32 width = Max(pIn->width, 1u);
2476 UINT_32 height = Max(pIn->height, 1u);
2477
2478 // Set format to INVALID will skip this conversion
2479 if (pIn->format != ADDR_FMT_INVALID)
2480 {
2481 ElemMode elemMode = ADDR_UNCOMPRESSED;
2482 UINT_32 expandX, expandY;
2483
2484 // Get compression/expansion factors and element mode which indicates compression/expansion
2485 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2486 &elemMode,
2487 &expandX,
2488 &expandY);
2489
2490 UINT_32 basePitch = 0;
2491 GetElemLib()->AdjustSurfaceInfo(elemMode,
2492 expandX,
2493 expandY,
2494 &bpp,
2495 &basePitch,
2496 &width,
2497 &height);
2498 }
2499
2500 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2501 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2502 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2503 const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2504 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
2505
2506 // Pre sanity check on non swizzle mode parameters
2507 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2508 localIn.flags = pIn->flags;
2509 localIn.resourceType = pIn->resourceType;
2510 localIn.format = pIn->format;
2511 localIn.bpp = bpp;
2512 localIn.width = width;
2513 localIn.height = height;
2514 localIn.numSlices = numSlices;
2515 localIn.numMipLevels = numMipLevels;
2516 localIn.numSamples = numSamples;
2517 localIn.numFrags = numFrags;
2518
2519 if (ValidateNonSwModeParams(&localIn))
2520 {
2521 // Forbid swizzle mode(s) by client setting
2522 ADDR2_SWMODE_SET allowedSwModeSet = {};
2523 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2524 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;
2525 allowedSwModeSet.value |=
2526 pIn->forbiddenBlock.macroThin4KB ? 0 :
2527 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2528 allowedSwModeSet.value |=
2529 pIn->forbiddenBlock.macroThick4KB ? 0 :
2530 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2531 allowedSwModeSet.value |=
2532 pIn->forbiddenBlock.macroThin64KB ? 0 :
2533 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2534 allowedSwModeSet.value |=
2535 pIn->forbiddenBlock.macroThick64KB ? 0 :
2536 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2537 allowedSwModeSet.value |=
2538 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2539
2540 if (pIn->preferredSwSet.value != 0)
2541 {
2542 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2543 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2544 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2545 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2546 }
2547
2548 if (pIn->noXor)
2549 {
2550 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2551 }
2552
2553 if (pIn->maxAlign > 0)
2554 {
2555 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2556 {
2557 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2558 }
2559
2560 if (pIn->maxAlign < Size64K)
2561 {
2562 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2563 }
2564
2565 if (pIn->maxAlign < Size4K)
2566 {
2567 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2568 }
2569
2570 if (pIn->maxAlign < Size256)
2571 {
2572 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2573 }
2574 }
2575
2576 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2577 switch (pIn->resourceType)
2578 {
2579 case ADDR_RSRC_TEX_1D:
2580 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2581 break;
2582
2583 case ADDR_RSRC_TEX_2D:
2584 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2585 break;
2586
2587 case ADDR_RSRC_TEX_3D:
2588 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2589
2590 if (pIn->flags.view3dAs2dArray)
2591 {
2592 allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2593 }
2594 break;
2595
2596 default:
2597 ADDR_ASSERT_ALWAYS();
2598 allowedSwModeSet.value = 0;
2599 break;
2600 }
2601
2602 if (ElemLib::IsBlockCompressed(pIn->format) ||
2603 ElemLib::IsMacroPixelPacked(pIn->format) ||
2604 (bpp > 64) ||
2605 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2606 {
2607 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2608 }
2609
2610 if (pIn->format == ADDR_FMT_32_32_32)
2611 {
2612 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2613 }
2614
2615 if (msaa)
2616 {
2617 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2618 }
2619
2620 if (pIn->flags.depth || pIn->flags.stencil)
2621 {
2622 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2623 }
2624
2625 if (pIn->flags.display)
2626 {
2627 if (m_settings.isDcn2)
2628 {
2629 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
2630 }
2631 else
2632 {
2633 ADDR_NOT_IMPLEMENTED();
2634 }
2635 }
2636
2637 if (allowedSwModeSet.value != 0)
2638 {
2639 #if DEBUG
2640 // Post sanity check, at least AddrLib should accept the output generated by its own
2641 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2642
2643 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2644 {
2645 if (validateSwModeSet & 1)
2646 {
2647 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2648 ADDR_ASSERT(ValidateSwModeParams(&localIn));
2649 }
2650
2651 validateSwModeSet >>= 1;
2652 }
2653 #endif
2654
2655 pOut->resourceType = pIn->resourceType;
2656 pOut->validSwModeSet = allowedSwModeSet;
2657 pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
2658 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2659 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
2660
2661 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2662
2663 if (pOut->clientPreferredSwSet.value == 0)
2664 {
2665 pOut->clientPreferredSwSet.value = AddrSwSetAll;
2666 }
2667
2668 // Apply optional restrictions
2669 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
2670 {
2671 if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
2672 {
2673 // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
2674 // the GL2 in VAR mode, so it should be avoided.
2675 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2676 }
2677 else
2678 {
2679 // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
2680 // But we have to suffer from low performance because there is no other choice...
2681 ADDR_ASSERT_ALWAYS();
2682 }
2683 }
2684
2685 if (pIn->flags.needEquation)
2686 {
2687 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
2688 }
2689
2690 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
2691 {
2692 pOut->swizzleMode = ADDR_SW_LINEAR;
2693 }
2694 else
2695 {
2696 // Always ignore linear swizzle mode if there is other choice.
2697 allowedSwModeSet.swLinear = 0;
2698
2699 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2700
2701 // Determine block size if there is 2 or more block type candidates
2702 if (IsPow2(allowedBlockSet.value) == FALSE)
2703 {
2704 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR };
2705
2706 if (m_blockVarSizeLog2 != 0)
2707 {
2708 swMode[AddrBlockVar] = ADDR_SW_VAR_R_X;
2709 }
2710
2711 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
2712 {
2713 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
2714 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X;
2715 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
2716 }
2717 else
2718 {
2719 swMode[AddrBlockMicro] = ADDR_SW_256B_S;
2720 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_S;
2721 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
2722 }
2723
2724 Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
2725 Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
2726 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
2727
2728 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2729 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2730 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2731 UINT_32 minSizeBlk = AddrBlockMicro;
2732 UINT_64 minSize = 0;
2733
2734 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2735 {
2736 if (allowedBlockSet.value & (1 << i))
2737 {
2738 ComputeBlockDimensionForSurf(&blkDim[i].w,
2739 &blkDim[i].h,
2740 &blkDim[i].d,
2741 bpp,
2742 numFrags,
2743 pOut->resourceType,
2744 swMode[i]);
2745
2746 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2747 padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
2748
2749 if (minSize == 0)
2750 {
2751 minSize = padSize[i];
2752 minSizeBlk = i;
2753 }
2754 else
2755 {
2756 // Due to the fact that VAR block size = 16KB * m_pipes, it is possible that VAR
2757 // block size < 64KB. And ratio[Hi/Low] logic implicitly requires iterating from
2758 // smaller block type to bigger block type. So we have to correct comparing logic
2759 // according to the size of existing "minimun block" and size of coming/comparing
2760 // block. The new logic can also be useful to any future change about AddrBlockType.
2761 if (GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk]))
2762 {
2763 if ((padSize[i] * ratioHi) <= (minSize * ratioLow))
2764 {
2765 minSize = padSize[i];
2766 minSizeBlk = i;
2767 }
2768 }
2769 else
2770 {
2771 if ((padSize[i] * ratioLow) < (minSize * ratioHi))
2772 {
2773 minSize = padSize[i];
2774 minSizeBlk = i;
2775 }
2776 }
2777 }
2778 }
2779 }
2780
2781 if ((allowedBlockSet.micro == TRUE) &&
2782 (width <= blkDim[AddrBlockMicro].w) &&
2783 (height <= blkDim[AddrBlockMicro].h))
2784 {
2785 minSizeBlk = AddrBlockMicro;
2786 }
2787
2788 if (minSizeBlk == AddrBlockMicro)
2789 {
2790 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2791 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
2792 }
2793 else if (minSizeBlk == AddrBlockThick4KB)
2794 {
2795 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2796 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
2797 }
2798 else if (minSizeBlk == AddrBlockThin4KB)
2799 {
2800 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2801 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
2802 }
2803 else if (minSizeBlk == AddrBlockThick64KB)
2804 {
2805 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2806 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
2807 }
2808 else if (minSizeBlk == AddrBlockThin64KB)
2809 {
2810 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2811 Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
2812 }
2813 else
2814 {
2815 ADDR_ASSERT(minSizeBlk == AddrBlockVar);
2816 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
2817 }
2818 }
2819
2820 // Block type should be determined.
2821 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
2822
2823 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
2824
2825 // Determine swizzle type if there is 2 or more swizzle type candidates
2826 if (IsPow2(allowedSwSet.value) == FALSE)
2827 {
2828 if (ElemLib::IsBlockCompressed(pIn->format))
2829 {
2830 if (allowedSwSet.sw_D)
2831 {
2832 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2833 }
2834 else if (allowedSwSet.sw_S)
2835 {
2836 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2837 }
2838 else
2839 {
2840 ADDR_ASSERT(allowedSwSet.sw_R);
2841 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2842 }
2843 }
2844 else if (ElemLib::IsMacroPixelPacked(pIn->format))
2845 {
2846 if (allowedSwSet.sw_S)
2847 {
2848 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2849 }
2850 else if (allowedSwSet.sw_D)
2851 {
2852 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2853 }
2854 else
2855 {
2856 ADDR_ASSERT(allowedSwSet.sw_R);
2857 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2858 }
2859 }
2860 else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2861 {
2862 if (pIn->flags.color &&
2863 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
2864 allowedSwSet.sw_D)
2865 {
2866 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2867 }
2868 else if (allowedSwSet.sw_S)
2869 {
2870 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2871 }
2872 else if (allowedSwSet.sw_R)
2873 {
2874 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2875 }
2876 else
2877 {
2878 ADDR_ASSERT(allowedSwSet.sw_Z);
2879 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2880 }
2881 }
2882 else
2883 {
2884 if (allowedSwSet.sw_R)
2885 {
2886 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2887 }
2888 else if (allowedSwSet.sw_D)
2889 {
2890 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2891 }
2892 else if (allowedSwSet.sw_S)
2893 {
2894 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2895 }
2896 else
2897 {
2898 ADDR_ASSERT(allowedSwSet.sw_Z);
2899 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2900 }
2901 }
2902 }
2903
2904 // Swizzle type should be determined.
2905 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
2906
2907 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2908 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2909 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2910 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
2911 }
2912 }
2913 else
2914 {
2915 // Invalid combination...
2916 ADDR_ASSERT_ALWAYS();
2917 returnCode = ADDR_INVALIDPARAMS;
2918 }
2919 }
2920 else
2921 {
2922 // Invalid combination...
2923 ADDR_ASSERT_ALWAYS();
2924 returnCode = ADDR_INVALIDPARAMS;
2925 }
2926 }
2927
2928 return returnCode;
2929 }
2930
2931 /**
2932 ************************************************************************************************************************
2933 * Gfx10Lib::ComputeStereoInfo
2934 *
2935 * @brief
2936 * Compute height alignment and right eye pipeBankXor for stereo surface
2937 *
2938 * @return
2939 * Error code
2940 *
2941 ************************************************************************************************************************
2942 */
2943 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
2944 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
2945 UINT_32 blkHeight, ///< Block height
2946 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
2947 UINT_32* pRightXor ///< Right eye xor
2948 ) const
2949 {
2950 ADDR_E_RETURNCODE ret = ADDR_OK;
2951
2952 *pAlignY = 1;
2953 *pRightXor = 0;
2954
2955 if (IsNonPrtXor(pIn->swizzleMode))
2956 {
2957 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
2958 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
2959 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
2960 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
2961 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
2962
2963 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
2964 {
2965 UINT_32 yMax = 0;
2966 UINT_32 yPos = 0;
2967
2968 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
2969 {
2970 if (m_equationTable[eqIndex].xor1[i].value == 0)
2971 {
2972 break;
2973 }
2974
2975 ADDR_ASSERT(m_equationTable[eqIndex].xor1[i].valid == 1);
2976
2977 if ((m_equationTable[eqIndex].xor1[i].channel == 1) &&
2978 (m_equationTable[eqIndex].xor1[i].index > yMax))
2979 {
2980 yMax = m_equationTable[eqIndex].xor1[i].index;
2981 yPos = i;
2982 }
2983 }
2984
2985 const UINT_32 additionalAlign = 1 << yMax;
2986
2987 if (additionalAlign >= blkHeight)
2988 {
2989 *pAlignY *= (additionalAlign / blkHeight);
2990
2991 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
2992
2993 if ((alignedHeight >> yMax) & 1)
2994 {
2995 *pRightXor = 1 << (yPos - m_pipeInterleaveLog2);
2996 }
2997 }
2998 }
2999 else
3000 {
3001 ret = ADDR_INVALIDPARAMS;
3002 }
3003 }
3004
3005 return ret;
3006 }
3007
3008 /**
3009 ************************************************************************************************************************
3010 * Gfx10Lib::HwlComputeSurfaceInfoTiled
3011 *
3012 * @brief
3013 * Internal function to calculate alignment for tiled surface
3014 *
3015 * @return
3016 * ADDR_E_RETURNCODE
3017 ************************************************************************************************************************
3018 */
3019 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3020 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3021 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3022 ) const
3023 {
3024 ADDR_E_RETURNCODE ret;
3025
3026 if (IsBlock256b(pIn->swizzleMode))
3027 {
3028 ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3029 }
3030 else
3031 {
3032 ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3033 }
3034
3035 return ret;
3036 }
3037
3038 /**
3039 ************************************************************************************************************************
3040 * Gfx10Lib::ComputeSurfaceInfoMicroTiled
3041 *
3042 * @brief
3043 * Internal function to calculate alignment for micro tiled surface
3044 *
3045 * @return
3046 * ADDR_E_RETURNCODE
3047 ************************************************************************************************************************
3048 */
3049 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3050 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3051 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3052 ) const
3053 {
3054 ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3055 &pOut->blockHeight,
3056 &pOut->blockSlices,
3057 pIn->bpp,
3058 pIn->numFrags,
3059 pIn->resourceType,
3060 pIn->swizzleMode);
3061
3062 if (ret == ADDR_OK)
3063 {
3064 pOut->mipChainPitch = 0;
3065 pOut->mipChainHeight = 0;
3066 pOut->mipChainSlice = 0;
3067 pOut->epitchIsHeight = FALSE;
3068 pOut->mipChainInTail = FALSE;
3069 pOut->firstMipIdInTail = pIn->numMipLevels;
3070
3071 const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3072
3073 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3074 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3075 pOut->numSlices = pIn->numSlices;
3076 pOut->baseAlign = blockSize;
3077
3078 if (pIn->numMipLevels > 1)
3079 {
3080 const UINT_32 mip0Width = pIn->width;
3081 const UINT_32 mip0Height = pIn->height;
3082 UINT_64 mipSliceSize = 0;
3083
3084 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3085 {
3086 UINT_32 mipWidth, mipHeight;
3087
3088 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3089
3090 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);
3091 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3092
3093 if (pOut->pMipInfo != NULL)
3094 {
3095 pOut->pMipInfo[i].pitch = mipActualWidth;
3096 pOut->pMipInfo[i].height = mipActualHeight;
3097 pOut->pMipInfo[i].depth = 1;
3098 pOut->pMipInfo[i].offset = mipSliceSize;
3099 pOut->pMipInfo[i].mipTailOffset = 0;
3100 pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3101 }
3102
3103 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3104 }
3105
3106 pOut->sliceSize = mipSliceSize;
3107 pOut->surfSize = mipSliceSize * pOut->numSlices;
3108 }
3109 else
3110 {
3111 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3112 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3113
3114 if (pOut->pMipInfo != NULL)
3115 {
3116 pOut->pMipInfo[0].pitch = pOut->pitch;
3117 pOut->pMipInfo[0].height = pOut->height;
3118 pOut->pMipInfo[0].depth = 1;
3119 pOut->pMipInfo[0].offset = 0;
3120 pOut->pMipInfo[0].mipTailOffset = 0;
3121 pOut->pMipInfo[0].macroBlockOffset = 0;
3122 }
3123 }
3124
3125 }
3126
3127 return ret;
3128 }
3129
3130 /**
3131 ************************************************************************************************************************
3132 * Gfx10Lib::ComputeSurfaceInfoMacroTiled
3133 *
3134 * @brief
3135 * Internal function to calculate alignment for macro tiled surface
3136 *
3137 * @return
3138 * ADDR_E_RETURNCODE
3139 ************************************************************************************************************************
3140 */
3141 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3142 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3143 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3144 ) const
3145 {
3146 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3147 &pOut->blockHeight,
3148 &pOut->blockSlices,
3149 pIn->bpp,
3150 pIn->numFrags,
3151 pIn->resourceType,
3152 pIn->swizzleMode);
3153
3154 if (returnCode == ADDR_OK)
3155 {
3156 UINT_32 heightAlign = pOut->blockHeight;
3157
3158 if (pIn->flags.qbStereo)
3159 {
3160 UINT_32 rightXor = 0;
3161 UINT_32 alignY = 1;
3162
3163 returnCode = ComputeStereoInfo(pIn, heightAlign, &alignY, &rightXor);
3164
3165 if (returnCode == ADDR_OK)
3166 {
3167 pOut->pStereoInfo->rightSwizzle = rightXor;
3168
3169 heightAlign *= alignY;
3170 }
3171 }
3172
3173 if (returnCode == ADDR_OK)
3174 {
3175 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3176 pOut->mipChainPitch = 0;
3177 pOut->mipChainHeight = 0;
3178 pOut->mipChainSlice = 0;
3179 pOut->epitchIsHeight = FALSE;
3180 pOut->mipChainInTail = FALSE;
3181 pOut->firstMipIdInTail = pIn->numMipLevels;
3182
3183 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3184 const UINT_32 blockSize = 1 << blockSizeLog2;
3185
3186 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3187 pOut->height = PowTwoAlign(pIn->height, heightAlign);
3188 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3189 pOut->baseAlign = blockSize;
3190
3191 if (pIn->numMipLevels > 1)
3192 {
3193 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3194 pIn->swizzleMode,
3195 pOut->blockWidth,
3196 pOut->blockHeight,
3197 pOut->blockSlices);
3198 const UINT_32 mip0Width = pIn->width;
3199 const UINT_32 mip0Height = pIn->height;
3200 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3201 const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;
3202 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3203 const UINT_32 index = Log2(pIn->bpp >> 3);
3204 UINT_32 firstMipInTail = pIn->numMipLevels;
3205 UINT_64 mipChainSliceSize = 0;
3206 UINT_64 mipSize[MaxMipLevels];
3207 UINT_64 mipSliceSize[MaxMipLevels];
3208
3209 Dim3d fixedTailMaxDim = tailMaxDim;
3210
3211 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3212 {
3213 fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3214 fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3215 }
3216
3217 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3218 {
3219 UINT_32 mipWidth, mipHeight, mipDepth;
3220
3221 GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3222
3223 if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3224 {
3225 firstMipInTail = i;
3226 mipChainSliceSize += blockSize / pOut->blockSlices;
3227 break;
3228 }
3229 else
3230 {
3231 const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);
3232 const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);
3233 const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);
3234 const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3235
3236 mipSize[i] = sliceSize * depth;
3237 mipSliceSize[i] = sliceSize * pOut->blockSlices;
3238 mipChainSliceSize += sliceSize;
3239
3240 if (pOut->pMipInfo != NULL)
3241 {
3242 pOut->pMipInfo[i].pitch = pitch;
3243 pOut->pMipInfo[i].height = height;
3244 pOut->pMipInfo[i].depth = depth;
3245 }
3246 }
3247 }
3248
3249 pOut->sliceSize = mipChainSliceSize;
3250 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
3251 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
3252 pOut->firstMipIdInTail = firstMipInTail;
3253
3254 if (pOut->pMipInfo != NULL)
3255 {
3256 UINT_64 offset = 0;
3257 UINT_64 macroBlkOffset = 0;
3258 UINT_32 tailMaxDepth = 0;
3259
3260 if (firstMipInTail != pIn->numMipLevels)
3261 {
3262 UINT_32 mipWidth, mipHeight;
3263
3264 GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3265 &mipWidth, &mipHeight, &tailMaxDepth);
3266
3267 offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3268 macroBlkOffset = blockSize;
3269 }
3270
3271 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3272 {
3273 pOut->pMipInfo[i].offset = offset;
3274 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3275 pOut->pMipInfo[i].mipTailOffset = 0;
3276
3277 offset += mipSize[i];
3278 macroBlkOffset += mipSliceSize[i];
3279 }
3280
3281 UINT_32 pitch = tailMaxDim.w;
3282 UINT_32 height = tailMaxDim.h;
3283 UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3284
3285 tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3286
3287 for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3288 {
3289 const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);
3290 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3291
3292 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
3293 pOut->pMipInfo[i].mipTailOffset = mipOffset;
3294 pOut->pMipInfo[i].macroBlockOffset = 0;
3295
3296 pOut->pMipInfo[i].pitch = pitch;
3297 pOut->pMipInfo[i].height = height;
3298 pOut->pMipInfo[i].depth = depth;
3299
3300 UINT_32 mipX = ((mipOffset >> 9) & 1) |
3301 ((mipOffset >> 10) & 2) |
3302 ((mipOffset >> 11) & 4) |
3303 ((mipOffset >> 12) & 8) |
3304 ((mipOffset >> 13) & 16) |
3305 ((mipOffset >> 14) & 32);
3306 UINT_32 mipY = ((mipOffset >> 8) & 1) |
3307 ((mipOffset >> 9) & 2) |
3308 ((mipOffset >> 10) & 4) |
3309 ((mipOffset >> 11) & 8) |
3310 ((mipOffset >> 12) & 16) |
3311 ((mipOffset >> 13) & 32);
3312
3313 if (blockSizeLog2 & 1)
3314 {
3315 const UINT_32 temp = mipX;
3316 mipX = mipY;
3317 mipY = temp;
3318
3319 if (index & 1)
3320 {
3321 mipY = (mipY << 1) | (mipX & 1);
3322 mipX = mipX >> 1;
3323 }
3324 }
3325
3326 if (isThin)
3327 {
3328 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3329 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3330 pOut->pMipInfo[i].mipTailCoordZ = 0;
3331
3332 pitch = Max(pitch >> 1, Block256_2d[index].w);
3333 height = Max(height >> 1, Block256_2d[index].h);
3334 depth = 1;
3335 }
3336 else
3337 {
3338 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3339 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3340 pOut->pMipInfo[i].mipTailCoordZ = 0;
3341
3342 pitch = Max(pitch >> 1, Block256_3d[index].w);
3343 height = Max(height >> 1, Block256_3d[index].h);
3344 depth = PowTwoAlign(Max(depth >> 1, 1u), Block256_3d[index].d);
3345 }
3346 }
3347 }
3348 }
3349 else
3350 {
3351 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3352 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3353
3354 if (pOut->pMipInfo != NULL)
3355 {
3356 pOut->pMipInfo[0].pitch = pOut->pitch;
3357 pOut->pMipInfo[0].height = pOut->height;
3358 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3359 pOut->pMipInfo[0].offset = 0;
3360 pOut->pMipInfo[0].mipTailOffset = 0;
3361 pOut->pMipInfo[0].macroBlockOffset = 0;
3362 pOut->pMipInfo[0].mipTailCoordX = 0;
3363 pOut->pMipInfo[0].mipTailCoordY = 0;
3364 pOut->pMipInfo[0].mipTailCoordZ = 0;
3365 }
3366 }
3367 }
3368 }
3369
3370 return returnCode;
3371 }
3372
3373 /**
3374 ************************************************************************************************************************
3375 * Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3376 *
3377 * @brief
3378 * Internal function to calculate address from coord for tiled swizzle surface
3379 *
3380 * @return
3381 * ADDR_E_RETURNCODE
3382 ************************************************************************************************************************
3383 */
3384 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3385 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3386 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3387 ) const
3388 {
3389 ADDR_E_RETURNCODE ret;
3390
3391 if (IsBlock256b(pIn->swizzleMode))
3392 {
3393 ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3394 }
3395 else
3396 {
3397 ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3398 }
3399
3400 return ret;
3401 }
3402
3403 /**
3404 ************************************************************************************************************************
3405 * Gfx10Lib::ComputeOffsetFromEquation
3406 *
3407 * @brief
3408 * Compute offset from equation
3409 *
3410 * @return
3411 * Offset
3412 ************************************************************************************************************************
3413 */
3414 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3415 const ADDR_EQUATION* pEq, ///< Equation
3416 UINT_32 x, ///< x coord in bytes
3417 UINT_32 y, ///< y coord in pixel
3418 UINT_32 z ///< z coord in slice
3419 ) const
3420 {
3421 UINT_32 offset = 0;
3422
3423 for (UINT_32 i = 0; i < pEq->numBits; i++)
3424 {
3425 UINT_32 v = 0;
3426
3427 if (pEq->addr[i].valid)
3428 {
3429 if (pEq->addr[i].channel == 0)
3430 {
3431 v ^= (x >> pEq->addr[i].index) & 1;
3432 }
3433 else if (pEq->addr[i].channel == 1)
3434 {
3435 v ^= (y >> pEq->addr[i].index) & 1;
3436 }
3437 else
3438 {
3439 ADDR_ASSERT(pEq->addr[i].channel == 2);
3440 v ^= (z >> pEq->addr[i].index) & 1;
3441 }
3442 }
3443
3444 if (pEq->xor1[i].valid)
3445 {
3446 if (pEq->xor1[i].channel == 0)
3447 {
3448 v ^= (x >> pEq->xor1[i].index) & 1;
3449 }
3450 else if (pEq->xor1[i].channel == 1)
3451 {
3452 v ^= (y >> pEq->xor1[i].index) & 1;
3453 }
3454 else
3455 {
3456 ADDR_ASSERT(pEq->xor1[i].channel == 2);
3457 v ^= (z >> pEq->xor1[i].index) & 1;
3458 }
3459 }
3460
3461 if (pEq->xor2[i].valid)
3462 {
3463 if (pEq->xor2[i].channel == 0)
3464 {
3465 v ^= (x >> pEq->xor2[i].index) & 1;
3466 }
3467 else if (pEq->xor2[i].channel == 1)
3468 {
3469 v ^= (y >> pEq->xor2[i].index) & 1;
3470 }
3471 else
3472 {
3473 ADDR_ASSERT(pEq->xor2[i].channel == 2);
3474 v ^= (z >> pEq->xor2[i].index) & 1;
3475 }
3476 }
3477
3478 offset |= (v << i);
3479 }
3480
3481 return offset;
3482 }
3483
3484 /**
3485 ************************************************************************************************************************
3486 * Gfx10Lib::ComputeOffsetFromSwizzlePattern
3487 *
3488 * @brief
3489 * Compute offset from swizzle pattern
3490 *
3491 * @return
3492 * Offset
3493 ************************************************************************************************************************
3494 */
3495 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3496 const UINT_64* pPattern, ///< Swizzle pattern
3497 UINT_32 numBits, ///< Number of bits in pattern
3498 UINT_32 x, ///< x coord in pixel
3499 UINT_32 y, ///< y coord in pixel
3500 UINT_32 z, ///< z coord in slice
3501 UINT_32 s ///< sample id
3502 ) const
3503 {
3504 UINT_32 offset = 0;
3505 const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3506
3507 for (UINT_32 i = 0; i < numBits; i++)
3508 {
3509 UINT_32 v = 0;
3510
3511 if (pSwizzlePattern[i].x != 0)
3512 {
3513 UINT_16 mask = pSwizzlePattern[i].x;
3514 UINT_32 xBits = x;
3515
3516 while (mask != 0)
3517 {
3518 if (mask & 1)
3519 {
3520 v ^= xBits & 1;
3521 }
3522
3523 xBits >>= 1;
3524 mask >>= 1;
3525 }
3526 }
3527
3528 if (pSwizzlePattern[i].y != 0)
3529 {
3530 UINT_16 mask = pSwizzlePattern[i].y;
3531 UINT_32 yBits = y;
3532
3533 while (mask != 0)
3534 {
3535 if (mask & 1)
3536 {
3537 v ^= yBits & 1;
3538 }
3539
3540 yBits >>= 1;
3541 mask >>= 1;
3542 }
3543 }
3544
3545 if (pSwizzlePattern[i].z != 0)
3546 {
3547 UINT_16 mask = pSwizzlePattern[i].z;
3548 UINT_32 zBits = z;
3549
3550 while (mask != 0)
3551 {
3552 if (mask & 1)
3553 {
3554 v ^= zBits & 1;
3555 }
3556
3557 zBits >>= 1;
3558 mask >>= 1;
3559 }
3560 }
3561
3562 if (pSwizzlePattern[i].s != 0)
3563 {
3564 UINT_16 mask = pSwizzlePattern[i].s;
3565 UINT_32 sBits = s;
3566
3567 while (mask != 0)
3568 {
3569 if (mask & 1)
3570 {
3571 v ^= sBits & 1;
3572 }
3573
3574 sBits >>= 1;
3575 mask >>= 1;
3576 }
3577 }
3578
3579 offset |= (v << i);
3580 }
3581
3582 return offset;
3583 }
3584
3585 /**
3586 ************************************************************************************************************************
3587 * Gfx10Lib::GetSwizzlePatternInfo
3588 *
3589 * @brief
3590 * Get swizzle pattern
3591 *
3592 * @return
3593 * Swizzle pattern information
3594 ************************************************************************************************************************
3595 */
3596 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
3597 AddrSwizzleMode swizzleMode, ///< Swizzle mode
3598 AddrResourceType resourceType, ///< Resource type
3599 UINT_32 elemLog2, ///< Element size in bytes log2
3600 UINT_32 numFrag ///< Number of fragment
3601 ) const
3602 {
3603 const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
3604 const ADDR_SW_PATINFO* patInfo = NULL;
3605 const UINT_32 swizzleMask = 1 << swizzleMode;
3606
3607 if (IsLinear(swizzleMode) == FALSE)
3608 {
3609 if (IsBlockVariable(swizzleMode))
3610 {
3611 if (m_blockVarSizeLog2 != 0)
3612 {
3613 ADDR_ASSERT(m_settings.supportRbPlus);
3614
3615 if (IsRtOptSwizzle(swizzleMode))
3616 {
3617 if (numFrag == 1)
3618 {
3619 patInfo = SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
3620 }
3621 else if (numFrag == 2)
3622 {
3623 patInfo = SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
3624 }
3625 else if (numFrag == 4)
3626 {
3627 patInfo = SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
3628 }
3629 else
3630 {
3631 ADDR_ASSERT(numFrag == 8);
3632 patInfo = SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
3633 }
3634 }
3635 else if (IsZOrderSwizzle(swizzleMode))
3636 {
3637 if (numFrag == 1)
3638 {
3639 patInfo = SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
3640 }
3641 else if (numFrag == 2)
3642 {
3643 patInfo = SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
3644 }
3645 else if (numFrag == 4)
3646 {
3647 patInfo = SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
3648 }
3649 else
3650 {
3651 ADDR_ASSERT(numFrag == 8);
3652 patInfo = SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
3653 }
3654 }
3655 }
3656 }
3657 else if (resourceType == ADDR_RSRC_TEX_3D)
3658 {
3659 ADDR_ASSERT(numFrag == 1);
3660
3661 if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
3662 {
3663 if (IsRtOptSwizzle(swizzleMode))
3664 {
3665 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_1xaa_RBPLUS_PATINFO : SW_64K_R_X_1xaa_PATINFO;
3666 }
3667 else if (IsZOrderSwizzle(swizzleMode))
3668 {
3669 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_1xaa_RBPLUS_PATINFO : SW_64K_Z_X_1xaa_PATINFO;
3670 }
3671 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3672 {
3673 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
3674 patInfo = m_settings.supportRbPlus ? SW_64K_D3_X_RBPLUS_PATINFO : SW_64K_D3_X_PATINFO;
3675 }
3676 else
3677 {
3678 ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
3679
3680 if (IsBlock4kb(swizzleMode))
3681 {
3682 if (swizzleMode == ADDR_SW_4KB_S)
3683 {
3684 patInfo = m_settings.supportRbPlus ? SW_4K_S3_RBPLUS_PATINFO : SW_4K_S3_PATINFO;
3685 }
3686 else
3687 {
3688 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3689 patInfo = m_settings.supportRbPlus ? SW_4K_S3_X_RBPLUS_PATINFO : SW_4K_S3_X_PATINFO;
3690 }
3691 }
3692 else
3693 {
3694 if (swizzleMode == ADDR_SW_64KB_S)
3695 {
3696 patInfo = m_settings.supportRbPlus ? SW_64K_S3_RBPLUS_PATINFO : SW_64K_S3_PATINFO;
3697 }
3698 else if (swizzleMode == ADDR_SW_64KB_S_X)
3699 {
3700 patInfo = m_settings.supportRbPlus ? SW_64K_S3_X_RBPLUS_PATINFO : SW_64K_S3_X_PATINFO;
3701 }
3702 else
3703 {
3704 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3705 patInfo = m_settings.supportRbPlus ? SW_64K_S3_T_RBPLUS_PATINFO : SW_64K_S3_T_PATINFO;
3706 }
3707 }
3708 }
3709 }
3710 }
3711 else
3712 {
3713 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
3714 {
3715 if (IsBlock256b(swizzleMode))
3716 {
3717 if (swizzleMode == ADDR_SW_256B_S)
3718 {
3719 patInfo = m_settings.supportRbPlus ? SW_256_S_RBPLUS_PATINFO : SW_256_S_PATINFO;
3720 }
3721 else
3722 {
3723 ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
3724 patInfo = m_settings.supportRbPlus ? SW_256_D_RBPLUS_PATINFO : SW_256_D_PATINFO;
3725 }
3726 }
3727 else if (IsBlock4kb(swizzleMode))
3728 {
3729 if (IsStandardSwizzle(resourceType, swizzleMode))
3730 {
3731 if (swizzleMode == ADDR_SW_4KB_S)
3732 {
3733 patInfo = m_settings.supportRbPlus ? SW_4K_S_RBPLUS_PATINFO : SW_4K_S_PATINFO;
3734 }
3735 else
3736 {
3737 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3738 patInfo = m_settings.supportRbPlus ? SW_4K_S_X_RBPLUS_PATINFO : SW_4K_S_X_PATINFO;
3739 }
3740 }
3741 else
3742 {
3743 if (swizzleMode == ADDR_SW_4KB_D)
3744 {
3745 patInfo = m_settings.supportRbPlus ? SW_4K_D_RBPLUS_PATINFO : SW_4K_D_PATINFO;
3746 }
3747 else
3748 {
3749 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
3750 patInfo = m_settings.supportRbPlus ? SW_4K_D_X_RBPLUS_PATINFO : SW_4K_D_X_PATINFO;
3751 }
3752 }
3753 }
3754 else
3755 {
3756 if (IsRtOptSwizzle(swizzleMode))
3757 {
3758 if (numFrag == 1)
3759 {
3760 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_1xaa_RBPLUS_PATINFO : SW_64K_R_X_1xaa_PATINFO;
3761 }
3762 else if (numFrag == 2)
3763 {
3764 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_2xaa_RBPLUS_PATINFO : SW_64K_R_X_2xaa_PATINFO;
3765 }
3766 else if (numFrag == 4)
3767 {
3768 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_4xaa_RBPLUS_PATINFO : SW_64K_R_X_4xaa_PATINFO;
3769 }
3770 else
3771 {
3772 ADDR_ASSERT(numFrag == 8);
3773 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_8xaa_RBPLUS_PATINFO : SW_64K_R_X_8xaa_PATINFO;
3774 }
3775 }
3776 else if (IsZOrderSwizzle(swizzleMode))
3777 {
3778 if (numFrag == 1)
3779 {
3780 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_1xaa_RBPLUS_PATINFO : SW_64K_Z_X_1xaa_PATINFO;
3781 }
3782 else if (numFrag == 2)
3783 {
3784 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_2xaa_RBPLUS_PATINFO : SW_64K_Z_X_2xaa_PATINFO;
3785 }
3786 else if (numFrag == 4)
3787 {
3788 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_4xaa_RBPLUS_PATINFO : SW_64K_Z_X_4xaa_PATINFO;
3789 }
3790 else
3791 {
3792 ADDR_ASSERT(numFrag == 8);
3793 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_8xaa_RBPLUS_PATINFO : SW_64K_Z_X_8xaa_PATINFO;
3794 }
3795 }
3796 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3797 {
3798 if (swizzleMode == ADDR_SW_64KB_D)
3799 {
3800 patInfo = m_settings.supportRbPlus ? SW_64K_D_RBPLUS_PATINFO : SW_64K_D_PATINFO;
3801 }
3802 else if (swizzleMode == ADDR_SW_64KB_D_X)
3803 {
3804 patInfo = m_settings.supportRbPlus ? SW_64K_D_X_RBPLUS_PATINFO : SW_64K_D_X_PATINFO;
3805 }
3806 else
3807 {
3808 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
3809 patInfo = m_settings.supportRbPlus ? SW_64K_D_T_RBPLUS_PATINFO : SW_64K_D_T_PATINFO;
3810 }
3811 }
3812 else
3813 {
3814 if (swizzleMode == ADDR_SW_64KB_S)
3815 {
3816 patInfo = m_settings.supportRbPlus ? SW_64K_S_RBPLUS_PATINFO : SW_64K_S_PATINFO;
3817 }
3818 else if (swizzleMode == ADDR_SW_64KB_S_X)
3819 {
3820 patInfo = m_settings.supportRbPlus ? SW_64K_S_X_RBPLUS_PATINFO : SW_64K_S_X_PATINFO;
3821 }
3822 else
3823 {
3824 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3825 patInfo = m_settings.supportRbPlus ? SW_64K_S_T_RBPLUS_PATINFO : SW_64K_S_T_PATINFO;
3826 }
3827 }
3828 }
3829 }
3830 }
3831 }
3832
3833 return (patInfo != NULL) ? &patInfo[index] : NULL;
3834 }
3835
3836 /**
3837 ************************************************************************************************************************
3838 * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
3839 *
3840 * @brief
3841 * Internal function to calculate address from coord for micro tiled swizzle surface
3842 *
3843 * @return
3844 * ADDR_E_RETURNCODE
3845 ************************************************************************************************************************
3846 */
3847 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
3848 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3849 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3850 ) const
3851 {
3852 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3853 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3854 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
3855
3856 localIn.swizzleMode = pIn->swizzleMode;
3857 localIn.flags = pIn->flags;
3858 localIn.resourceType = pIn->resourceType;
3859 localIn.bpp = pIn->bpp;
3860 localIn.width = Max(pIn->unalignedWidth, 1u);
3861 localIn.height = Max(pIn->unalignedHeight, 1u);
3862 localIn.numSlices = Max(pIn->numSlices, 1u);
3863 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3864 localIn.numSamples = Max(pIn->numSamples, 1u);
3865 localIn.numFrags = Max(pIn->numFrags, 1u);
3866 localOut.pMipInfo = mipInfo;
3867
3868 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
3869
3870 if (ret == ADDR_OK)
3871 {
3872 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3873 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3874 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3875 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3876
3877 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3878 {
3879 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3880 const UINT_32 yb = pIn->y / localOut.blockHeight;
3881 const UINT_32 xb = pIn->x / localOut.blockWidth;
3882 const UINT_32 blockIndex = yb * pb + xb;
3883 const UINT_32 blockSize = 256;
3884 const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3885 pIn->x << elemLog2,
3886 pIn->y,
3887 0);
3888 pOut->addr = localOut.sliceSize * pIn->slice +
3889 mipInfo[pIn->mipId].macroBlockOffset +
3890 (blockIndex * blockSize) +
3891 blk256Offset;
3892 }
3893 else
3894 {
3895 ret = ADDR_INVALIDPARAMS;
3896 }
3897 }
3898
3899 return ret;
3900 }
3901
3902 /**
3903 ************************************************************************************************************************
3904 * Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
3905 *
3906 * @brief
3907 * Internal function to calculate address from coord for macro tiled swizzle surface
3908 *
3909 * @return
3910 * ADDR_E_RETURNCODE
3911 ************************************************************************************************************************
3912 */
3913 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
3914 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3915 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3916 ) const
3917 {
3918 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3919 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3920 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
3921
3922 localIn.swizzleMode = pIn->swizzleMode;
3923 localIn.flags = pIn->flags;
3924 localIn.resourceType = pIn->resourceType;
3925 localIn.bpp = pIn->bpp;
3926 localIn.width = Max(pIn->unalignedWidth, 1u);
3927 localIn.height = Max(pIn->unalignedHeight, 1u);
3928 localIn.numSlices = Max(pIn->numSlices, 1u);
3929 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3930 localIn.numSamples = Max(pIn->numSamples, 1u);
3931 localIn.numFrags = Max(pIn->numFrags, 1u);
3932 localOut.pMipInfo = mipInfo;
3933
3934 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
3935
3936 if (ret == ADDR_OK)
3937 {
3938 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3939 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3940 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
3941 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
3942 const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
3943 const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
3944 (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
3945
3946 if (localIn.numFrags > 1)
3947 {
3948 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
3949 pIn->resourceType,
3950 elemLog2,
3951 localIn.numFrags);
3952
3953 if (pPatInfo != NULL)
3954 {
3955 const UINT_32 pb = localOut.pitch / localOut.blockWidth;
3956 const UINT_32 yb = pIn->y / localOut.blockHeight;
3957 const UINT_32 xb = pIn->x / localOut.blockWidth;
3958 const UINT_64 blkIdx = yb * pb + xb;
3959
3960 ADDR_BIT_SETTING fullSwizzlePattern[20];
3961 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
3962
3963 const UINT_32 blkOffset =
3964 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
3965 blkSizeLog2,
3966 pIn->x,
3967 pIn->y,
3968 pIn->slice,
3969 pIn->sample);
3970
3971 pOut->addr = (localOut.sliceSize * pIn->slice) +
3972 (blkIdx << blkSizeLog2) +
3973 (blkOffset ^ pipeBankXor);
3974 }
3975 else
3976 {
3977 ret = ADDR_INVALIDPARAMS;
3978 }
3979 }
3980 else
3981 {
3982 const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
3983 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3984 const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
3985
3986 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3987 {
3988 const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
3989 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3990 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
3991 const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
3992 const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
3993 const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
3994 const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
3995 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3996 const UINT_32 yb = pIn->y / localOut.blockHeight;
3997 const UINT_32 xb = pIn->x / localOut.blockWidth;
3998 const UINT_64 blkIdx = yb * pb + xb;
3999 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4000 x << elemLog2,
4001 y,
4002 z);
4003 pOut->addr = sliceSize * sliceId +
4004 mipInfo[pIn->mipId].macroBlockOffset +
4005 (blkIdx << blkSizeLog2) +
4006 (blkOffset ^ pipeBankXor);
4007 }
4008 else
4009 {
4010 ret = ADDR_INVALIDPARAMS;
4011 }
4012 }
4013 }
4014
4015 return ret;
4016 }
4017
4018 /**
4019 ************************************************************************************************************************
4020 * Gfx10Lib::HwlComputeMaxBaseAlignments
4021 *
4022 * @brief
4023 * Gets maximum alignments
4024 * @return
4025 * maximum alignments
4026 ************************************************************************************************************************
4027 */
4028 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4029 {
4030 return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4031 }
4032
4033 /**
4034 ************************************************************************************************************************
4035 * Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4036 *
4037 * @brief
4038 * Gets maximum alignments for metadata
4039 * @return
4040 * maximum alignments for metadata
4041 ************************************************************************************************************************
4042 */
4043 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4044 {
4045 Dim3d metaBlk;
4046
4047 const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4048 {
4049 ADDR_SW_64KB_Z_X,
4050 m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4051 };
4052
4053 UINT_32 maxBaseAlignHtile = 0;
4054 UINT_32 maxBaseAlignCmask = 0;
4055
4056 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4057 {
4058 for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4059 {
4060 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4061 {
4062 // Max base alignment for Htile
4063 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4064 ADDR_RSRC_TEX_2D,
4065 ValidSwizzleModeForXmask[swIdx],
4066 bppLog2,
4067 numFragLog2,
4068 TRUE,
4069 &metaBlk);
4070
4071 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4072 }
4073 }
4074
4075 // Max base alignment for Cmask
4076 const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4077 ADDR_RSRC_TEX_2D,
4078 ValidSwizzleModeForXmask[swIdx],
4079 0,
4080 0,
4081 TRUE,
4082 &metaBlk);
4083
4084 maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4085 }
4086
4087 // Max base alignment for 2D Dcc
4088 const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4089 {
4090 ADDR_SW_64KB_S_X,
4091 ADDR_SW_64KB_D_X,
4092 ADDR_SW_64KB_R_X,
4093 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4094 };
4095
4096 UINT_32 maxBaseAlignDcc2D = 0;
4097
4098 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4099 {
4100 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4101 {
4102 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4103 {
4104 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4105 ADDR_RSRC_TEX_2D,
4106 ValidSwizzleModeForDcc2D[swIdx],
4107 bppLog2,
4108 numFragLog2,
4109 TRUE,
4110 &metaBlk);
4111
4112 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4113 }
4114 }
4115 }
4116
4117 // Max base alignment for 3D Dcc
4118 const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4119 {
4120 ADDR_SW_64KB_Z_X,
4121 ADDR_SW_64KB_S_X,
4122 ADDR_SW_64KB_D_X,
4123 ADDR_SW_64KB_R_X,
4124 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4125 };
4126
4127 UINT_32 maxBaseAlignDcc3D = 0;
4128
4129 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4130 {
4131 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4132 {
4133 const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4134 ADDR_RSRC_TEX_3D,
4135 ValidSwizzleModeForDcc3D[swIdx],
4136 bppLog2,
4137 0,
4138 TRUE,
4139 &metaBlk);
4140
4141 maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4142 }
4143 }
4144
4145 return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4146 }
4147
4148 /**
4149 ************************************************************************************************************************
4150 * Gfx10Lib::GetMetaElementSizeLog2
4151 *
4152 * @brief
4153 * Gets meta data element size log2
4154 * @return
4155 * Meta data element size log2
4156 ************************************************************************************************************************
4157 */
4158 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4159 Gfx10DataType dataType) ///< Data surface type
4160 {
4161 INT_32 elemSizeLog2 = 0;
4162
4163 if (dataType == Gfx10DataColor)
4164 {
4165 elemSizeLog2 = 0;
4166 }
4167 else if (dataType == Gfx10DataDepthStencil)
4168 {
4169 elemSizeLog2 = 2;
4170 }
4171 else
4172 {
4173 ADDR_ASSERT(dataType == Gfx10DataFmask);
4174 elemSizeLog2 = -1;
4175 }
4176
4177 return elemSizeLog2;
4178 }
4179
4180 /**
4181 ************************************************************************************************************************
4182 * Gfx10Lib::GetMetaCacheSizeLog2
4183 *
4184 * @brief
4185 * Gets meta data cache line size log2
4186 * @return
4187 * Meta data cache line size log2
4188 ************************************************************************************************************************
4189 */
4190 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4191 Gfx10DataType dataType) ///< Data surface type
4192 {
4193 INT_32 cacheSizeLog2 = 0;
4194
4195 if (dataType == Gfx10DataColor)
4196 {
4197 cacheSizeLog2 = 6;
4198 }
4199 else if (dataType == Gfx10DataDepthStencil)
4200 {
4201 cacheSizeLog2 = 8;
4202 }
4203 else
4204 {
4205 ADDR_ASSERT(dataType == Gfx10DataFmask);
4206 cacheSizeLog2 = 8;
4207 }
4208 return cacheSizeLog2;
4209 }
4210
4211 /**
4212 ************************************************************************************************************************
4213 * Gfx10Lib::HwlComputeSurfaceInfoLinear
4214 *
4215 * @brief
4216 * Internal function to calculate alignment for linear surface
4217 *
4218 * @return
4219 * ADDR_E_RETURNCODE
4220 ************************************************************************************************************************
4221 */
4222 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4223 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4224 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4225 ) const
4226 {
4227 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4228
4229 if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4230 {
4231 returnCode = ADDR_INVALIDPARAMS;
4232 }
4233 else
4234 {
4235 const UINT_32 elementBytes = pIn->bpp >> 3;
4236 const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4237 const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4238 UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);
4239 UINT_32 actualHeight = pIn->height;
4240 UINT_64 sliceSize = 0;
4241
4242 if (pIn->numMipLevels > 1)
4243 {
4244 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4245 {
4246 UINT_32 mipWidth, mipHeight;
4247
4248 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4249
4250 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4251
4252 if (pOut->pMipInfo != NULL)
4253 {
4254 pOut->pMipInfo[i].pitch = mipActualWidth;
4255 pOut->pMipInfo[i].height = mipHeight;
4256 pOut->pMipInfo[i].depth = mipDepth;
4257 pOut->pMipInfo[i].offset = sliceSize;
4258 pOut->pMipInfo[i].mipTailOffset = 0;
4259 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4260 }
4261
4262 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4263 }
4264 }
4265 else
4266 {
4267 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4268
4269 if (returnCode == ADDR_OK)
4270 {
4271 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4272
4273 if (pOut->pMipInfo != NULL)
4274 {
4275 pOut->pMipInfo[0].pitch = pitch;
4276 pOut->pMipInfo[0].height = actualHeight;
4277 pOut->pMipInfo[0].depth = mipDepth;
4278 pOut->pMipInfo[0].offset = 0;
4279 pOut->pMipInfo[0].mipTailOffset = 0;
4280 pOut->pMipInfo[0].macroBlockOffset = 0;
4281 }
4282 }
4283 }
4284
4285 if (returnCode == ADDR_OK)
4286 {
4287 pOut->pitch = pitch;
4288 pOut->height = actualHeight;
4289 pOut->numSlices = pIn->numSlices;
4290 pOut->sliceSize = sliceSize;
4291 pOut->surfSize = sliceSize * pOut->numSlices;
4292 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4293 pOut->blockWidth = pitchAlign;
4294 pOut->blockHeight = 1;
4295 pOut->blockSlices = 1;
4296
4297 // Following members are useless on GFX10
4298 pOut->mipChainPitch = 0;
4299 pOut->mipChainHeight = 0;
4300 pOut->mipChainSlice = 0;
4301 pOut->epitchIsHeight = FALSE;
4302
4303 // Post calculation validate
4304 ADDR_ASSERT(pOut->sliceSize > 0);
4305 }
4306 }
4307
4308 return returnCode;
4309 }
4310
4311 } // V2
4312 } // Addr