amd/addrlib: update to the latest version
[mesa.git] / src / amd / addrlib / src / gfx10 / gfx10addrlib.cpp
1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx10addrlib.cpp
30 * @brief Contain the implementation for the Gfx10Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx10addrlib.h"
35 #include "gfx10_gb_reg.h"
36
37 #include "amdgpu_asic_addr.h"
38
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
41
42 namespace Addr
43 {
44 /**
45 ************************************************************************************************************************
46 * Gfx10HwlInit
47 *
48 * @brief
49 * Creates an Gfx10Lib object.
50 *
51 * @return
52 * Returns an Gfx10Lib object pointer.
53 ************************************************************************************************************************
54 */
55 Addr::Lib* Gfx10HwlInit(const Client* pClient)
56 {
57 return V2::Gfx10Lib::CreateObj(pClient);
58 }
59
60 namespace V2
61 {
62
63 ////////////////////////////////////////////////////////////////////////////////////////////////////
64 // Static Const Member
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66
67 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
68 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
69 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
70 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
71 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
72 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
73
74 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
75 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
76 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
77 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
78
79 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
80 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
81 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
82 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
83
84 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
85 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
86 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
87 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
88
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
91 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
92 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
93
94 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
95 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_X
96 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_X
97 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
98
99 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
100 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
101 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
102 {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_64KB_R_X
103
104 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_Z_X
105 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
106 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
107 {0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_VAR_R_X
108 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
109 };
110
111 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
112
113 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114 const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
115
116 /**
117 ************************************************************************************************************************
118 * Gfx10Lib::Gfx10Lib
119 *
120 * @brief
121 * Constructor
122 *
123 ************************************************************************************************************************
124 */
125 Gfx10Lib::Gfx10Lib(const Client* pClient)
126 :
127 Lib(pClient),
128 m_colorBaseIndex(0),
129 m_xmaskBaseIndex(0),
130 m_dccBaseIndex(0)
131 {
132 m_class = AI_ADDRLIB;
133 memset(&m_settings, 0, sizeof(m_settings));
134 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
135 }
136
137 /**
138 ************************************************************************************************************************
139 * Gfx10Lib::~Gfx10Lib
140 *
141 * @brief
142 * Destructor
143 ************************************************************************************************************************
144 */
145 Gfx10Lib::~Gfx10Lib()
146 {
147 }
148
149 /**
150 ************************************************************************************************************************
151 * Gfx10Lib::HwlComputeHtileInfo
152 *
153 * @brief
154 * Interface function stub of AddrComputeHtilenfo
155 *
156 * @return
157 * ADDR_E_RETURNCODE
158 ************************************************************************************************************************
159 */
160 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
161 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
162 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
163 ) const
164 {
165 ADDR_E_RETURNCODE ret = ADDR_OK;
166
167 if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
168 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
169 (pIn->hTileFlags.pipeAligned != TRUE))
170 {
171 ret = ADDR_INVALIDPARAMS;
172 }
173 else
174 {
175 Dim3d metaBlk = {0};
176 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
177 ADDR_RSRC_TEX_2D,
178 pIn->swizzleMode,
179 0,
180 0,
181 TRUE,
182 &metaBlk);
183
184 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
185 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
186 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
187 pOut->metaBlkWidth = metaBlk.w;
188 pOut->metaBlkHeight = metaBlk.h;
189
190 if (pIn->numMipLevels > 1)
191 {
192 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
193
194 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
195
196 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
197 {
198 UINT_32 mipWidth, mipHeight;
199
200 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
201
202 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
203 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
204
205 const UINT_32 pitchInM = mipWidth / metaBlk.w;
206 const UINT_32 heightInM = mipHeight / metaBlk.h;
207 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
208
209 if (pOut->pMipInfo != NULL)
210 {
211 pOut->pMipInfo[i].inMiptail = FALSE;
212 pOut->pMipInfo[i].offset = offset;
213 pOut->pMipInfo[i].sliceSize = mipSliceSize;
214 }
215
216 offset += mipSliceSize;
217 }
218
219 pOut->sliceSize = offset;
220 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
221 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
222
223 if (pOut->pMipInfo != NULL)
224 {
225 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
226 {
227 pOut->pMipInfo[i].inMiptail = TRUE;
228 pOut->pMipInfo[i].offset = 0;
229 pOut->pMipInfo[i].sliceSize = 0;
230 }
231
232 if (pIn->firstMipIdInTail != pIn->numMipLevels)
233 {
234 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
235 }
236 }
237 }
238 else
239 {
240 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
241 const UINT_32 heightInM = pOut->height / metaBlk.h;
242
243 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
244 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
245 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
246
247 if (pOut->pMipInfo != NULL)
248 {
249 pOut->pMipInfo[0].inMiptail = FALSE;
250 pOut->pMipInfo[0].offset = 0;
251 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
252 }
253 }
254 }
255
256 return ret;
257 }
258
259 /**
260 ************************************************************************************************************************
261 * Gfx10Lib::HwlComputeCmaskInfo
262 *
263 * @brief
264 * Interface function stub of AddrComputeCmaskInfo
265 *
266 * @return
267 * ADDR_E_RETURNCODE
268 ************************************************************************************************************************
269 */
270 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
271 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
272 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
273 ) const
274 {
275 ADDR_E_RETURNCODE ret = ADDR_OK;
276
277 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
278 (pIn->cMaskFlags.pipeAligned != TRUE) ||
279 ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
280 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
281 {
282 ret = ADDR_INVALIDPARAMS;
283 }
284 else
285 {
286 Dim3d metaBlk = {0};
287 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
288 ADDR_RSRC_TEX_2D,
289 pIn->swizzleMode,
290 0,
291 0,
292 TRUE,
293 &metaBlk);
294
295 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
296 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
297 pOut->baseAlign = metaBlkSize;
298 pOut->metaBlkWidth = metaBlk.w;
299 pOut->metaBlkHeight = metaBlk.h;
300
301 if (pIn->numMipLevels > 1)
302 {
303 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
304
305 UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
306
307 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
308 {
309 UINT_32 mipWidth, mipHeight;
310
311 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
312
313 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
314 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
315
316 const UINT_32 pitchInM = mipWidth / metaBlk.w;
317 const UINT_32 heightInM = mipHeight / metaBlk.h;
318
319 if (pOut->pMipInfo != NULL)
320 {
321 pOut->pMipInfo[i].inMiptail = FALSE;
322 pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize;
323 pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
324 }
325
326 metaBlkPerSlice += pitchInM * heightInM;
327 }
328
329 pOut->metaBlkNumPerSlice = metaBlkPerSlice;
330
331 if (pOut->pMipInfo != NULL)
332 {
333 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
334 {
335 pOut->pMipInfo[i].inMiptail = TRUE;
336 pOut->pMipInfo[i].offset = 0;
337 pOut->pMipInfo[i].sliceSize = 0;
338 }
339
340 if (pIn->firstMipIdInTail != pIn->numMipLevels)
341 {
342 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
343 }
344 }
345 }
346 else
347 {
348 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
349 const UINT_32 heightInM = pOut->height / metaBlk.h;
350
351 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
352
353 if (pOut->pMipInfo != NULL)
354 {
355 pOut->pMipInfo[0].inMiptail = FALSE;
356 pOut->pMipInfo[0].offset = 0;
357 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
358 }
359 }
360
361 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
362 pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
363 }
364
365 return ret;
366 }
367
368 /**
369 ************************************************************************************************************************
370 * Gfx10Lib::HwlComputeDccInfo
371 *
372 * @brief
373 * Interface function to compute DCC key info
374 *
375 * @return
376 * ADDR_E_RETURNCODE
377 ************************************************************************************************************************
378 */
379 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
380 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
381 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
382 ) const
383 {
384 ADDR_E_RETURNCODE ret = ADDR_OK;
385
386 if (pIn->swizzleMode != ADDR_SW_64KB_Z_X && pIn->swizzleMode != ADDR_SW_64KB_R_X)
387 {
388 // Hardware does not support DCC for this swizzle mode.
389 ret = ADDR_INVALIDPARAMS;
390 }
391 else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
392 {
393 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
394 ret = ADDR_INVALIDPARAMS;
395 }
396 else
397 {
398 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
399 ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
400
401 Dim3d metaBlk = {0};
402 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
403 const UINT_32 numFragLog2 = Log2(pIn->numFrags);
404 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
405 pIn->resourceType,
406 pIn->swizzleMode,
407 elemLog2,
408 numFragLog2,
409 pIn->dccKeyFlags.pipeAligned,
410 &metaBlk);
411 const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
412
413 pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
414 pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
415 pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;
416
417 pOut->dccRamBaseAlign = metaBlkSize;
418 pOut->metaBlkWidth = metaBlk.w;
419 pOut->metaBlkHeight = metaBlk.h;
420 pOut->metaBlkDepth = metaBlk.d;
421
422 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
423 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
424 pOut->depth = PowTwoAlign(pIn->numSlices, metaBlk.d);
425
426 if (pIn->numMipLevels > 1)
427 {
428 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
429
430 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
431
432 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
433 {
434 UINT_32 mipWidth, mipHeight;
435
436 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
437
438 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
439 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
440
441 const UINT_32 pitchInM = mipWidth / metaBlk.w;
442 const UINT_32 heightInM = mipHeight / metaBlk.h;
443 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
444
445 if (pOut->pMipInfo != NULL)
446 {
447 pOut->pMipInfo[i].inMiptail = FALSE;
448 pOut->pMipInfo[i].offset = offset;
449 pOut->pMipInfo[i].sliceSize = mipSliceSize;
450 }
451
452 offset += mipSliceSize;
453 }
454
455 pOut->dccRamSliceSize = offset;
456 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
457 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
458
459 if (pOut->pMipInfo != NULL)
460 {
461 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
462 {
463 pOut->pMipInfo[i].inMiptail = TRUE;
464 pOut->pMipInfo[i].offset = 0;
465 pOut->pMipInfo[i].sliceSize = 0;
466 }
467
468 if (pIn->firstMipIdInTail != pIn->numMipLevels)
469 {
470 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
471 }
472 }
473 }
474 else
475 {
476 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
477 const UINT_32 heightInM = pOut->height / metaBlk.h;
478
479 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
480 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
481 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
482
483 if (pOut->pMipInfo != NULL)
484 {
485 pOut->pMipInfo[0].inMiptail = FALSE;
486 pOut->pMipInfo[0].offset = 0;
487 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
488 }
489 }
490 }
491
492 return ret;
493 }
494
495 /**
496 ************************************************************************************************************************
497 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
498 *
499 * @brief
500 * Interface function stub of AddrComputeCmaskAddrFromCoord
501 *
502 * @return
503 * ADDR_E_RETURNCODE
504 ************************************************************************************************************************
505 */
506 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
507 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
508 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
509 {
510 // Only support pipe aligned CMask
511 ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
512
513 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
514 input.size = sizeof(input);
515 input.cMaskFlags = pIn->cMaskFlags;
516 input.colorFlags = pIn->colorFlags;
517 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
518 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
519 input.numSlices = Max(pIn->numSlices, 1u);
520 input.swizzleMode = pIn->swizzleMode;
521 input.resourceType = pIn->resourceType;
522
523 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
524 output.size = sizeof(output);
525
526 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
527
528 if (returnCode == ADDR_OK)
529 {
530 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
531 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
532 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
533 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
534 const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? CMASK_VAR_RBPLUS_PATIDX :
535 (m_settings.supportRbPlus ? CMASK_64K_RBPLUS_PATIDX : CMASK_64K_PATIDX);
536
537 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
538 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
539 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(CMASK_SW_PATTERN[patIdxTable[index]],
540 blkSizeLog2 + 1, // +1 for nibble offset
541 pIn->x,
542 pIn->y,
543 pIn->slice,
544 0);
545 const UINT_32 xb = pIn->x / output.metaBlkWidth;
546 const UINT_32 yb = pIn->y / output.metaBlkHeight;
547 const UINT_32 pb = output.pitch / output.metaBlkWidth;
548 const UINT_32 blkIndex = (yb * pb) + xb;
549 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
550
551 pOut->addr = (output.sliceSize * pIn->slice) +
552 (blkIndex * (1 << blkSizeLog2)) +
553 ((blkOffset >> 1) ^ pipeXor);
554 pOut->bitPosition = (blkOffset & 1) << 2;
555 }
556
557 return returnCode;
558 }
559
560 /**
561 ************************************************************************************************************************
562 * Gfx10Lib::HwlComputeHtileAddrFromCoord
563 *
564 * @brief
565 * Interface function stub of AddrComputeHtileAddrFromCoord
566 *
567 * @return
568 * ADDR_E_RETURNCODE
569 ************************************************************************************************************************
570 */
571 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
572 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
573 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
574 {
575 ADDR_E_RETURNCODE returnCode = ADDR_OK;
576
577 if (pIn->numMipLevels > 1)
578 {
579 returnCode = ADDR_NOTIMPLEMENTED;
580 }
581 else
582 {
583 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
584 input.size = sizeof(input);
585 input.hTileFlags = pIn->hTileFlags;
586 input.depthFlags = pIn->depthflags;
587 input.swizzleMode = pIn->swizzleMode;
588 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
589 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
590 input.numSlices = Max(pIn->numSlices, 1u);
591 input.numMipLevels = 1;
592
593 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
594 output.size = sizeof(output);
595
596 returnCode = ComputeHtileInfo(&input, &output);
597
598 if (returnCode == ADDR_OK)
599 {
600 const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
601 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
602 const UINT_32 index = m_xmaskBaseIndex + numSampleLog2;
603 const UINT_8* patIdxTable = m_settings.supportRbPlus ? HTILE_RBPLUS_PATIDX : HTILE_PATIDX;
604
605 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
606 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
607 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(HTILE_SW_PATTERN[patIdxTable[index]],
608 blkSizeLog2 + 1, // +1 for nibble offset
609 pIn->x,
610 pIn->y,
611 pIn->slice,
612 0);
613 const UINT_32 xb = pIn->x / output.metaBlkWidth;
614 const UINT_32 yb = pIn->y / output.metaBlkHeight;
615 const UINT_32 pb = output.pitch / output.metaBlkWidth;
616 const UINT_32 blkIndex = (yb * pb) + xb;
617 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
618
619 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
620 (blkIndex * (1 << blkSizeLog2)) +
621 ((blkOffset >> 1) ^ pipeXor);
622 }
623 }
624
625 return returnCode;
626 }
627
628 /**
629 ************************************************************************************************************************
630 * Gfx10Lib::HwlComputeHtileCoordFromAddr
631 *
632 * @brief
633 * Interface function stub of AddrComputeHtileCoordFromAddr
634 *
635 * @return
636 * ADDR_E_RETURNCODE
637 ************************************************************************************************************************
638 */
639 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
640 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
641 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
642 {
643 ADDR_NOT_IMPLEMENTED();
644
645 return ADDR_OK;
646 }
647
648 /**
649 ************************************************************************************************************************
650 * Gfx10Lib::HwlComputeDccAddrFromCoord
651 *
652 * @brief
653 * Interface function stub of AddrComputeDccAddrFromCoord
654 *
655 * @return
656 * ADDR_E_RETURNCODE
657 ************************************************************************************************************************
658 */
659 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord(
660 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
661 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
662 {
663 ADDR_E_RETURNCODE returnCode = ADDR_OK;
664
665 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
666 (pIn->swizzleMode != ADDR_SW_64KB_R_X) ||
667 (pIn->dccKeyFlags.linear == TRUE) ||
668 (pIn->numFrags > 1) ||
669 (pIn->numMipLevels > 1) ||
670 (pIn->mipId > 0))
671 {
672 returnCode = ADDR_NOTSUPPORTED;
673 }
674 else
675 {
676 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
677 input.size = sizeof(input);
678 input.dccKeyFlags = pIn->dccKeyFlags;
679 input.colorFlags = pIn->colorFlags;
680 input.swizzleMode = pIn->swizzleMode;
681 input.resourceType = pIn->resourceType;
682 input.bpp = pIn->bpp;
683 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
684 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
685 input.numSlices = Max(pIn->numSlices, 1u);
686 input.numFrags = Max(pIn->numFrags, 1u);
687 input.numMipLevels = Max(pIn->numMipLevels, 1u);
688
689 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
690 output.size = sizeof(output);
691
692 returnCode = ComputeDccInfo(&input, &output);
693
694 if (returnCode == ADDR_OK)
695 {
696 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
697 const UINT_32 numPipeLog2 = m_pipesLog2;
698 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
699 UINT_32 index = m_dccBaseIndex + elemLog2;
700 const UINT_8* patIdxTable;
701
702 if (m_settings.supportRbPlus)
703 {
704 patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
705
706 if (pIn->dccKeyFlags.pipeAligned)
707 {
708 index += MaxNumOfBpp;
709
710 if (m_numPkrLog2 < 2)
711 {
712 index += m_pipesLog2 * MaxNumOfBpp;
713 }
714 else
715 {
716 // 4 groups for "m_numPkrLog2 < 2" case
717 index += 4 * MaxNumOfBpp;
718
719 const UINT_32 dccPipePerPkr = 3;
720
721 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
722 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
723 }
724 }
725 }
726 else
727 {
728 patIdxTable = DCC_64K_R_X_PATIDX;
729
730 if (pIn->dccKeyFlags.pipeAligned)
731 {
732 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
733 }
734 else
735 {
736 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
737 }
738 }
739
740 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) + elemLog2 - 8;
741 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
742 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
743 blkSizeLog2 + 1, // +1 for nibble offset
744 pIn->x,
745 pIn->y,
746 pIn->slice,
747 0);
748 const UINT_32 xb = pIn->x / output.metaBlkWidth;
749 const UINT_32 yb = pIn->y / output.metaBlkHeight;
750 const UINT_32 pb = output.pitch / output.metaBlkWidth;
751 const UINT_32 blkIndex = (yb * pb) + xb;
752 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
753
754 pOut->addr = (static_cast<UINT_64>(output.dccRamSliceSize) * pIn->slice) +
755 (blkIndex * (1 << blkSizeLog2)) +
756 ((blkOffset >> 1) ^ pipeXor);
757 }
758 }
759
760 return returnCode;
761 }
762
763 /**
764 ************************************************************************************************************************
765 * Gfx10Lib::HwlInitGlobalParams
766 *
767 * @brief
768 * Initializes global parameters
769 *
770 * @return
771 * TRUE if all settings are valid
772 *
773 ************************************************************************************************************************
774 */
775 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
776 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
777 {
778 BOOL_32 valid = TRUE;
779 GB_ADDR_CONFIG gbAddrConfig;
780
781 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
782
783 // These values are copied from CModel code
784 switch (gbAddrConfig.bits.NUM_PIPES)
785 {
786 case ADDR_CONFIG_1_PIPE:
787 m_pipes = 1;
788 m_pipesLog2 = 0;
789 break;
790 case ADDR_CONFIG_2_PIPE:
791 m_pipes = 2;
792 m_pipesLog2 = 1;
793 break;
794 case ADDR_CONFIG_4_PIPE:
795 m_pipes = 4;
796 m_pipesLog2 = 2;
797 break;
798 case ADDR_CONFIG_8_PIPE:
799 m_pipes = 8;
800 m_pipesLog2 = 3;
801 break;
802 case ADDR_CONFIG_16_PIPE:
803 m_pipes = 16;
804 m_pipesLog2 = 4;
805 break;
806 case ADDR_CONFIG_32_PIPE:
807 m_pipes = 32;
808 m_pipesLog2 = 5;
809 break;
810 case ADDR_CONFIG_64_PIPE:
811 m_pipes = 64;
812 m_pipesLog2 = 6;
813 break;
814 default:
815 ADDR_ASSERT_ALWAYS();
816 valid = FALSE;
817 break;
818 }
819
820 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
821 {
822 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
823 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
824 m_pipeInterleaveLog2 = 8;
825 break;
826 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
827 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
828 m_pipeInterleaveLog2 = 9;
829 break;
830 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
831 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
832 m_pipeInterleaveLog2 = 10;
833 break;
834 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
835 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
836 m_pipeInterleaveLog2 = 11;
837 break;
838 default:
839 ADDR_ASSERT_ALWAYS();
840 valid = FALSE;
841 break;
842 }
843
844 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
845 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
846 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
847 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
848
849 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
850 {
851 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
852 m_maxCompFrag = 1;
853 m_maxCompFragLog2 = 0;
854 break;
855 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
856 m_maxCompFrag = 2;
857 m_maxCompFragLog2 = 1;
858 break;
859 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
860 m_maxCompFrag = 4;
861 m_maxCompFragLog2 = 2;
862 break;
863 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
864 m_maxCompFrag = 8;
865 m_maxCompFragLog2 = 3;
866 break;
867 default:
868 ADDR_ASSERT_ALWAYS();
869 valid = FALSE;
870 break;
871 }
872
873 {
874 // Skip unaligned case
875 m_xmaskBaseIndex += MaxNumOfAA;
876
877 m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA;
878 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
879
880 if (m_settings.supportRbPlus)
881 {
882 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
883 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
884
885 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
886
887 ADDR_C_ASSERT(sizeof(HTILE_RBPLUS_PATIDX) / sizeof(HTILE_RBPLUS_PATIDX[0]) ==
888 sizeof(CMASK_64K_RBPLUS_PATIDX) / sizeof(CMASK_64K_RBPLUS_PATIDX[0]));
889
890 if (m_numPkrLog2 >= 2)
891 {
892 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
893 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
894 }
895 }
896 else
897 {
898 const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
899 static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) +
900 1;
901
902 ADDR_C_ASSERT(sizeof(HTILE_PATIDX) / sizeof(HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
903
904 ADDR_C_ASSERT(sizeof(HTILE_PATIDX) / sizeof(HTILE_PATIDX[0]) ==
905 sizeof(CMASK_64K_PATIDX) / sizeof(CMASK_64K_PATIDX[0]));
906 }
907 }
908
909 if (m_settings.supportRbPlus)
910 {
911 // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
912 // corresponding SW_64KB_* mode
913 m_blockVarSizeLog2 = m_pipesLog2 + 14;
914 }
915
916 if (valid)
917 {
918 InitEquationTable();
919 }
920
921 return valid;
922 }
923
924 /**
925 ************************************************************************************************************************
926 * Gfx10Lib::HwlConvertChipFamily
927 *
928 * @brief
929 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
930 * @return
931 * ChipFamily
932 ************************************************************************************************************************
933 */
934 ChipFamily Gfx10Lib::HwlConvertChipFamily(
935 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
936 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
937 {
938 ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
939
940 m_settings.dccUnsup3DSwDis = 1;
941
942 switch (chipFamily)
943 {
944 case FAMILY_NV:
945 m_settings.isDcn2 = 1;
946 break;
947 default:
948 ADDR_ASSERT(!"Unknown chip family");
949 break;
950 }
951
952 m_settings.dsMipmapHtileFix = 1;
953
954 if (ASICREV_IS_NAVI10_P(chipRevision))
955 {
956 m_settings.dsMipmapHtileFix = 0;
957 }
958
959 m_configFlags.use32bppFor422Fmt = TRUE;
960
961 return family;
962 }
963
964 /**
965 ************************************************************************************************************************
966 * Gfx10Lib::GetBlk256SizeLog2
967 *
968 * @brief
969 * Get block 256 size
970 *
971 * @return
972 * N/A
973 ************************************************************************************************************************
974 */
975 void Gfx10Lib::GetBlk256SizeLog2(
976 AddrResourceType resourceType, ///< [in] Resource type
977 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
978 UINT_32 elemLog2, ///< [in] element size log2
979 UINT_32 numSamplesLog2, ///< [in] number of samples
980 Dim3d* pBlock ///< [out] block size
981 ) const
982 {
983 if (IsThin(resourceType, swizzleMode))
984 {
985 UINT_32 blockBits = 8 - elemLog2;
986
987 if (IsZOrderSwizzle(swizzleMode))
988 {
989 blockBits -= numSamplesLog2;
990 }
991
992 pBlock->w = (blockBits >> 1) + (blockBits & 1);
993 pBlock->h = (blockBits >> 1);
994 pBlock->d = 0;
995 }
996 else
997 {
998 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
999
1000 UINT_32 blockBits = 8 - elemLog2;
1001
1002 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1003 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1004 pBlock->h = (blockBits / 3);
1005 }
1006 }
1007
1008 /**
1009 ************************************************************************************************************************
1010 * Gfx10Lib::GetCompressedBlockSizeLog2
1011 *
1012 * @brief
1013 * Get compress block size
1014 *
1015 * @return
1016 * N/A
1017 ************************************************************************************************************************
1018 */
1019 void Gfx10Lib::GetCompressedBlockSizeLog2(
1020 Gfx10DataType dataType, ///< [in] Data type
1021 AddrResourceType resourceType, ///< [in] Resource type
1022 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1023 UINT_32 elemLog2, ///< [in] element size log2
1024 UINT_32 numSamplesLog2, ///< [in] number of samples
1025 Dim3d* pBlock ///< [out] block size
1026 ) const
1027 {
1028 if (dataType == Gfx10DataColor)
1029 {
1030 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1031 }
1032 else
1033 {
1034 ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1035 pBlock->w = 3;
1036 pBlock->h = 3;
1037 pBlock->d = 0;
1038 }
1039 }
1040
1041 /**
1042 ************************************************************************************************************************
1043 * Gfx10Lib::GetMetaOverlapLog2
1044 *
1045 * @brief
1046 * Get meta block overlap
1047 *
1048 * @return
1049 * N/A
1050 ************************************************************************************************************************
1051 */
1052 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1053 Gfx10DataType dataType, ///< [in] Data type
1054 AddrResourceType resourceType, ///< [in] Resource type
1055 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1056 UINT_32 elemLog2, ///< [in] element size log2
1057 UINT_32 numSamplesLog2 ///< [in] number of samples
1058 ) const
1059 {
1060 Dim3d compBlock;
1061 Dim3d microBlock;
1062
1063 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1064 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
1065
1066 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
1067 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1068 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
1069 const INT_32 numPipesLog2 = GetEffectiveNumPipes();
1070 INT_32 overlap = numPipesLog2 - maxSizeLog2;
1071
1072 if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1073 {
1074 overlap++;
1075 }
1076
1077 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1078 if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1079 {
1080 overlap--;
1081 }
1082 overlap = Max(overlap, 0);
1083 return overlap;
1084 }
1085
1086 /**
1087 ************************************************************************************************************************
1088 * Gfx10Lib::Get3DMetaOverlapLog2
1089 *
1090 * @brief
1091 * Get 3d meta block overlap
1092 *
1093 * @return
1094 * N/A
1095 ************************************************************************************************************************
1096 */
1097 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1098 AddrResourceType resourceType, ///< [in] Resource type
1099 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1100 UINT_32 elemLog2 ///< [in] element size log2
1101 ) const
1102 {
1103 Dim3d microBlock;
1104 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
1105
1106 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1107
1108 if (m_settings.supportRbPlus)
1109 {
1110 overlap++;
1111 }
1112
1113 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1114 {
1115 overlap = 0;
1116 }
1117 return overlap;
1118 }
1119
1120 /**
1121 ************************************************************************************************************************
1122 * Gfx10Lib::GetPipeRotateAmount
1123 *
1124 * @brief
1125 * Get pipe rotate amount
1126 *
1127 * @return
1128 * Pipe rotate amount
1129 ************************************************************************************************************************
1130 */
1131
1132 INT_32 Gfx10Lib::GetPipeRotateAmount(
1133 AddrResourceType resourceType, ///< [in] Resource type
1134 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
1135 ) const
1136 {
1137 INT_32 amount = 0;
1138
1139 if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1140 {
1141 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1142 1 : m_pipesLog2 - (m_numSaLog2 + 1);
1143 }
1144
1145 return amount;
1146 }
1147
1148 /**
1149 ************************************************************************************************************************
1150 * Gfx10Lib::GetMetaBlkSize
1151 *
1152 * @brief
1153 * Get metadata block size
1154 *
1155 * @return
1156 * Meta block size
1157 ************************************************************************************************************************
1158 */
1159 UINT_32 Gfx10Lib::GetMetaBlkSize(
1160 Gfx10DataType dataType, ///< [in] Data type
1161 AddrResourceType resourceType, ///< [in] Resource type
1162 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1163 UINT_32 elemLog2, ///< [in] element size log2
1164 UINT_32 numSamplesLog2, ///< [in] number of samples
1165 BOOL_32 pipeAlign, ///< [in] pipe align
1166 Dim3d* pBlock ///< [out] block size
1167 ) const
1168 {
1169 INT_32 metablkSizeLog2;
1170 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
1171 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
1172 const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1173 const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1174 numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1175 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
1176 INT_32 numPipesLog2 = m_pipesLog2;
1177
1178 if (IsThin(resourceType, swizzleMode))
1179 {
1180 if ((pipeAlign == FALSE) ||
1181 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1182 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
1183 {
1184 if (pipeAlign)
1185 {
1186 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1187 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1188 }
1189 else
1190 {
1191 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1192 }
1193 }
1194 else
1195 {
1196 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1197 {
1198 numPipesLog2++;
1199 }
1200
1201 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1202
1203 if (numPipesLog2 >= 4)
1204 {
1205 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1206
1207 // In 16Bpe 8xaa, we have an extra overlap bit
1208 if ((pipeRotateLog2 > 0) &&
1209 (elemLog2 == 4) &&
1210 (numSamplesLog2 == 3) &&
1211 (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1212 {
1213 overlapLog2++;
1214 }
1215
1216 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1217 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1218
1219 if (m_settings.supportRbPlus &&
1220 IsRtOptSwizzle(swizzleMode) &&
1221 (numPipesLog2 == 6) &&
1222 (numSamplesLog2 == 3) &&
1223 (m_maxCompFragLog2 == 3) &&
1224 (metablkSizeLog2 < 15))
1225 {
1226 metablkSizeLog2 = 15;
1227 }
1228 }
1229 else
1230 {
1231 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1232 }
1233
1234 if (dataType == Gfx10DataDepthStencil)
1235 {
1236 // For htile surfaces, pad meta block size to 2K * num_pipes
1237 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1238 }
1239
1240 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1241
1242 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1243 {
1244 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1245
1246 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1247 }
1248 }
1249
1250 const INT_32 metablkBitsLog2 =
1251 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1252 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1253 pBlock->h = 1 << (metablkBitsLog2 >> 1);
1254 pBlock->d = 1;
1255 }
1256 else
1257 {
1258 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1259
1260 if (pipeAlign)
1261 {
1262 if (m_settings.supportRbPlus &&
1263 (m_pipesLog2 == m_numSaLog2 + 1) &&
1264 (m_pipesLog2 > 1) &&
1265 IsRbAligned(resourceType, swizzleMode))
1266 {
1267 numPipesLog2++;
1268 }
1269
1270 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1271
1272 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1273 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1274 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1275 }
1276 else
1277 {
1278 metablkSizeLog2 = 12;
1279 }
1280
1281 const INT_32 metablkBitsLog2 =
1282 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1283 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1284 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1285 pBlock->d = 1 << (metablkBitsLog2 / 3);
1286 }
1287
1288 return (1 << static_cast<UINT_32>(metablkSizeLog2));
1289 }
1290
1291 /**
1292 ************************************************************************************************************************
1293 * Gfx10Lib::ConvertSwizzlePatternToEquation
1294 *
1295 * @brief
1296 * Convert swizzle pattern to equation.
1297 *
1298 * @return
1299 * N/A
1300 ************************************************************************************************************************
1301 */
1302 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1303 UINT_32 elemLog2, ///< [in] element bytes log2
1304 AddrResourceType rsrcType, ///< [in] resource type
1305 AddrSwizzleMode swMode, ///< [in] swizzle mode
1306 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor
1307 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1308 const
1309 {
1310 ADDR_BIT_SETTING fullSwizzlePattern[20];
1311 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1312
1313 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
1314 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1315
1316 pEquation->numBits = blockSizeLog2;
1317 pEquation->stackedDepthSlices = FALSE;
1318
1319 for (UINT_32 i = 0; i < elemLog2; i++)
1320 {
1321 pEquation->addr[i].channel = 0;
1322 pEquation->addr[i].valid = 1;
1323 pEquation->addr[i].index = i;
1324 }
1325
1326 if (IsXor(swMode) == FALSE)
1327 {
1328 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1329 {
1330 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1331
1332 if (pSwizzle[i].x != 0)
1333 {
1334 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1335
1336 pEquation->addr[i].channel = 0;
1337 pEquation->addr[i].valid = 1;
1338 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1339 }
1340 else if (pSwizzle[i].y != 0)
1341 {
1342 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1343
1344 pEquation->addr[i].channel = 1;
1345 pEquation->addr[i].valid = 1;
1346 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1347 }
1348 else
1349 {
1350 ADDR_ASSERT(pSwizzle[i].z != 0);
1351 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1352
1353 pEquation->addr[i].channel = 2;
1354 pEquation->addr[i].valid = 1;
1355 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1356 }
1357
1358 pEquation->xor1[i].value = 0;
1359 pEquation->xor2[i].value = 0;
1360 }
1361 }
1362 else if (IsThin(rsrcType, swMode))
1363 {
1364 Dim3d dim;
1365 ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1366
1367 const UINT_32 blkXLog2 = Log2(dim.w);
1368 const UINT_32 blkYLog2 = Log2(dim.h);
1369 const UINT_32 blkXMask = dim.w - 1;
1370 const UINT_32 blkYMask = dim.h - 1;
1371
1372 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1373 UINT_32 xMask = 0;
1374 UINT_32 yMask = 0;
1375 UINT_32 bMask = (1 << elemLog2) - 1;
1376
1377 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1378 {
1379 if (IsPow2(pSwizzle[i].value))
1380 {
1381 if (pSwizzle[i].x != 0)
1382 {
1383 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1384 xMask |= pSwizzle[i].x;
1385
1386 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1387
1388 ADDR_ASSERT(xLog2 < blkXLog2);
1389
1390 pEquation->addr[i].channel = 0;
1391 pEquation->addr[i].valid = 1;
1392 pEquation->addr[i].index = xLog2 + elemLog2;
1393 }
1394 else
1395 {
1396 ADDR_ASSERT(pSwizzle[i].y != 0);
1397 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1398 yMask |= pSwizzle[i].y;
1399
1400 pEquation->addr[i].channel = 1;
1401 pEquation->addr[i].valid = 1;
1402 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1403
1404 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1405 }
1406
1407 swizzle[i].value = 0;
1408 bMask |= 1 << i;
1409 }
1410 else
1411 {
1412 if (pSwizzle[i].z != 0)
1413 {
1414 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1415
1416 pEquation->xor2[i].channel = 2;
1417 pEquation->xor2[i].valid = 1;
1418 pEquation->xor2[i].index = Log2(pSwizzle[i].z);
1419 }
1420
1421 swizzle[i].x = pSwizzle[i].x;
1422 swizzle[i].y = pSwizzle[i].y;
1423 swizzle[i].z = swizzle[i].s = 0;
1424
1425 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1426
1427 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1428
1429 if (xHi != 0)
1430 {
1431 ADDR_ASSERT(IsPow2(xHi));
1432 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1433
1434 pEquation->xor1[i].channel = 0;
1435 pEquation->xor1[i].valid = 1;
1436 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1437
1438 swizzle[i].x &= blkXMask;
1439 }
1440
1441 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1442
1443 if (yHi != 0)
1444 {
1445 ADDR_ASSERT(IsPow2(yHi));
1446
1447 if (xHi == 0)
1448 {
1449 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1450 pEquation->xor1[i].channel = 1;
1451 pEquation->xor1[i].valid = 1;
1452 pEquation->xor1[i].index = Log2(yHi);
1453 }
1454 else
1455 {
1456 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1457 pEquation->xor2[i].channel = 1;
1458 pEquation->xor2[i].valid = 1;
1459 pEquation->xor2[i].index = Log2(yHi);
1460 }
1461
1462 swizzle[i].y &= blkYMask;
1463 }
1464
1465 if (swizzle[i].value == 0)
1466 {
1467 bMask |= 1 << i;
1468 }
1469 }
1470 }
1471
1472 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1473 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1474
1475 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1476
1477 while (bMask != blockMask)
1478 {
1479 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1480 {
1481 if ((bMask & (1 << i)) == 0)
1482 {
1483 if (IsPow2(swizzle[i].value))
1484 {
1485 if (swizzle[i].x != 0)
1486 {
1487 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1488 xMask |= swizzle[i].x;
1489
1490 const UINT_32 xLog2 = Log2(swizzle[i].x);
1491
1492 ADDR_ASSERT(xLog2 < blkXLog2);
1493
1494 pEquation->addr[i].channel = 0;
1495 pEquation->addr[i].valid = 1;
1496 pEquation->addr[i].index = xLog2 + elemLog2;
1497 }
1498 else
1499 {
1500 ADDR_ASSERT(swizzle[i].y != 0);
1501 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1502 yMask |= swizzle[i].y;
1503
1504 pEquation->addr[i].channel = 1;
1505 pEquation->addr[i].valid = 1;
1506 pEquation->addr[i].index = Log2(swizzle[i].y);
1507
1508 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1509 }
1510
1511 swizzle[i].value = 0;
1512 bMask |= 1 << i;
1513 }
1514 else
1515 {
1516 const UINT_32 x = swizzle[i].x & xMask;
1517 const UINT_32 y = swizzle[i].y & yMask;
1518
1519 if (x != 0)
1520 {
1521 ADDR_ASSERT(IsPow2(x));
1522
1523 if (pEquation->xor1[i].value == 0)
1524 {
1525 pEquation->xor1[i].channel = 0;
1526 pEquation->xor1[i].valid = 1;
1527 pEquation->xor1[i].index = Log2(x) + elemLog2;
1528 }
1529 else
1530 {
1531 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1532 pEquation->xor2[i].channel = 0;
1533 pEquation->xor2[i].valid = 1;
1534 pEquation->xor2[i].index = Log2(x) + elemLog2;
1535 }
1536 }
1537
1538 if (y != 0)
1539 {
1540 ADDR_ASSERT(IsPow2(y));
1541
1542 if (pEquation->xor1[i].value == 0)
1543 {
1544 pEquation->xor1[i].channel = 1;
1545 pEquation->xor1[i].valid = 1;
1546 pEquation->xor1[i].index = Log2(y);
1547 }
1548 else
1549 {
1550 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1551 pEquation->xor2[i].channel = 1;
1552 pEquation->xor2[i].valid = 1;
1553 pEquation->xor2[i].index = Log2(y);
1554 }
1555 }
1556
1557 swizzle[i].x &= ~x;
1558 swizzle[i].y &= ~y;
1559 }
1560 }
1561 }
1562 }
1563
1564 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1565 }
1566 else
1567 {
1568 const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1569 const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1570 const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1571 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1572 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1573 const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1574
1575 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1576 UINT_32 xMask = 0;
1577 UINT_32 yMask = 0;
1578 UINT_32 zMask = 0;
1579 UINT_32 bMask = (1 << elemLog2) - 1;
1580
1581 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1582 {
1583 if (IsPow2(pSwizzle[i].value))
1584 {
1585 if (pSwizzle[i].x != 0)
1586 {
1587 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1588 xMask |= pSwizzle[i].x;
1589
1590 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1591
1592 ADDR_ASSERT(xLog2 < blkXLog2);
1593
1594 pEquation->addr[i].channel = 0;
1595 pEquation->addr[i].valid = 1;
1596 pEquation->addr[i].index = xLog2 + elemLog2;
1597 }
1598 else if (pSwizzle[i].y != 0)
1599 {
1600 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1601 yMask |= pSwizzle[i].y;
1602
1603 pEquation->addr[i].channel = 1;
1604 pEquation->addr[i].valid = 1;
1605 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1606
1607 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1608 }
1609 else
1610 {
1611 ADDR_ASSERT(pSwizzle[i].z != 0);
1612 ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1613 zMask |= pSwizzle[i].z;
1614
1615 pEquation->addr[i].channel = 2;
1616 pEquation->addr[i].valid = 1;
1617 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1618
1619 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1620 }
1621
1622 swizzle[i].value = 0;
1623 bMask |= 1 << i;
1624 }
1625 else
1626 {
1627 swizzle[i].x = pSwizzle[i].x;
1628 swizzle[i].y = pSwizzle[i].y;
1629 swizzle[i].z = pSwizzle[i].z;
1630 swizzle[i].s = 0;
1631
1632 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1633
1634 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1635 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1636 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1637
1638 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1639
1640 if (xHi != 0)
1641 {
1642 ADDR_ASSERT(IsPow2(xHi));
1643 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1644
1645 pEquation->xor1[i].channel = 0;
1646 pEquation->xor1[i].valid = 1;
1647 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1648
1649 swizzle[i].x &= blkXMask;
1650 }
1651
1652 if (yHi != 0)
1653 {
1654 ADDR_ASSERT(IsPow2(yHi));
1655
1656 if (pEquation->xor1[i].value == 0)
1657 {
1658 pEquation->xor1[i].channel = 1;
1659 pEquation->xor1[i].valid = 1;
1660 pEquation->xor1[i].index = Log2(yHi);
1661 }
1662 else
1663 {
1664 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1665 pEquation->xor2[i].channel = 1;
1666 pEquation->xor2[i].valid = 1;
1667 pEquation->xor2[i].index = Log2(yHi);
1668 }
1669
1670 swizzle[i].y &= blkYMask;
1671 }
1672
1673 if (zHi != 0)
1674 {
1675 ADDR_ASSERT(IsPow2(zHi));
1676
1677 if (pEquation->xor1[i].value == 0)
1678 {
1679 pEquation->xor1[i].channel = 2;
1680 pEquation->xor1[i].valid = 1;
1681 pEquation->xor1[i].index = Log2(zHi);
1682 }
1683 else
1684 {
1685 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1686 pEquation->xor2[i].channel = 2;
1687 pEquation->xor2[i].valid = 1;
1688 pEquation->xor2[i].index = Log2(zHi);
1689 }
1690
1691 swizzle[i].z &= blkZMask;
1692 }
1693
1694 if (swizzle[i].value == 0)
1695 {
1696 bMask |= 1 << i;
1697 }
1698 }
1699 }
1700
1701 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1702 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1703
1704 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1705
1706 while (bMask != blockMask)
1707 {
1708 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1709 {
1710 if ((bMask & (1 << i)) == 0)
1711 {
1712 if (IsPow2(swizzle[i].value))
1713 {
1714 if (swizzle[i].x != 0)
1715 {
1716 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1717 xMask |= swizzle[i].x;
1718
1719 const UINT_32 xLog2 = Log2(swizzle[i].x);
1720
1721 ADDR_ASSERT(xLog2 < blkXLog2);
1722
1723 pEquation->addr[i].channel = 0;
1724 pEquation->addr[i].valid = 1;
1725 pEquation->addr[i].index = xLog2 + elemLog2;
1726 }
1727 else if (swizzle[i].y != 0)
1728 {
1729 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1730 yMask |= swizzle[i].y;
1731
1732 pEquation->addr[i].channel = 1;
1733 pEquation->addr[i].valid = 1;
1734 pEquation->addr[i].index = Log2(swizzle[i].y);
1735
1736 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1737 }
1738 else
1739 {
1740 ADDR_ASSERT(swizzle[i].z != 0);
1741 ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1742 zMask |= swizzle[i].z;
1743
1744 pEquation->addr[i].channel = 2;
1745 pEquation->addr[i].valid = 1;
1746 pEquation->addr[i].index = Log2(swizzle[i].z);
1747
1748 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1749 }
1750
1751 swizzle[i].value = 0;
1752 bMask |= 1 << i;
1753 }
1754 else
1755 {
1756 const UINT_32 x = swizzle[i].x & xMask;
1757 const UINT_32 y = swizzle[i].y & yMask;
1758 const UINT_32 z = swizzle[i].z & zMask;
1759
1760 if (x != 0)
1761 {
1762 ADDR_ASSERT(IsPow2(x));
1763
1764 if (pEquation->xor1[i].value == 0)
1765 {
1766 pEquation->xor1[i].channel = 0;
1767 pEquation->xor1[i].valid = 1;
1768 pEquation->xor1[i].index = Log2(x) + elemLog2;
1769 }
1770 else
1771 {
1772 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1773 pEquation->xor2[i].channel = 0;
1774 pEquation->xor2[i].valid = 1;
1775 pEquation->xor2[i].index = Log2(x) + elemLog2;
1776 }
1777 }
1778
1779 if (y != 0)
1780 {
1781 ADDR_ASSERT(IsPow2(y));
1782
1783 if (pEquation->xor1[i].value == 0)
1784 {
1785 pEquation->xor1[i].channel = 1;
1786 pEquation->xor1[i].valid = 1;
1787 pEquation->xor1[i].index = Log2(y);
1788 }
1789 else
1790 {
1791 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1792 pEquation->xor2[i].channel = 1;
1793 pEquation->xor2[i].valid = 1;
1794 pEquation->xor2[i].index = Log2(y);
1795 }
1796 }
1797
1798 if (z != 0)
1799 {
1800 ADDR_ASSERT(IsPow2(z));
1801
1802 if (pEquation->xor1[i].value == 0)
1803 {
1804 pEquation->xor1[i].channel = 2;
1805 pEquation->xor1[i].valid = 1;
1806 pEquation->xor1[i].index = Log2(z);
1807 }
1808 else
1809 {
1810 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1811 pEquation->xor2[i].channel = 2;
1812 pEquation->xor2[i].valid = 1;
1813 pEquation->xor2[i].index = Log2(z);
1814 }
1815 }
1816
1817 swizzle[i].x &= ~x;
1818 swizzle[i].y &= ~y;
1819 swizzle[i].z &= ~z;
1820 }
1821 }
1822 }
1823 }
1824
1825 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1826 }
1827 }
1828
1829 /**
1830 ************************************************************************************************************************
1831 * Gfx10Lib::InitEquationTable
1832 *
1833 * @brief
1834 * Initialize Equation table.
1835 *
1836 * @return
1837 * N/A
1838 ************************************************************************************************************************
1839 */
1840 VOID Gfx10Lib::InitEquationTable()
1841 {
1842 memset(m_equationTable, 0, sizeof(m_equationTable));
1843
1844 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1845 {
1846 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1847
1848 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1849 {
1850 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1851
1852 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1853 {
1854 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1855 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1856
1857 if (pPatInfo != NULL)
1858 {
1859 ADDR_ASSERT(IsValidSwMode(swMode));
1860
1861 if (pPatInfo->maxItemCount <= 3)
1862 {
1863 ADDR_EQUATION equation = {};
1864
1865 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
1866
1867 equationIndex = m_numEquations;
1868 ADDR_ASSERT(equationIndex < EquationTableSize);
1869
1870 m_equationTable[equationIndex] = equation;
1871
1872 m_numEquations++;
1873 }
1874 else
1875 {
1876 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
1877 ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
1878 ADDR_ASSERT(rsrcTypeIdx == 1);
1879 ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
1880 ADDR_ASSERT(m_settings.supportRbPlus == 1);
1881 }
1882 }
1883
1884 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1885 }
1886 }
1887 }
1888 }
1889
1890 /**
1891 ************************************************************************************************************************
1892 * Gfx10Lib::HwlGetEquationIndex
1893 *
1894 * @brief
1895 * Interface function stub of GetEquationIndex
1896 *
1897 * @return
1898 * ADDR_E_RETURNCODE
1899 ************************************************************************************************************************
1900 */
1901 UINT_32 Gfx10Lib::HwlGetEquationIndex(
1902 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
1903 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
1904 ) const
1905 {
1906 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1907
1908 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1909 (pIn->resourceType == ADDR_RSRC_TEX_3D))
1910 {
1911 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1912 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
1913 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
1914
1915 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1916 }
1917
1918 if (pOut->pMipInfo != NULL)
1919 {
1920 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1921 {
1922 pOut->pMipInfo[i].equationIndex = equationIdx;
1923 }
1924 }
1925
1926 return equationIdx;
1927 }
1928
1929 /**
1930 ************************************************************************************************************************
1931 * Gfx10Lib::IsValidDisplaySwizzleMode
1932 *
1933 * @brief
1934 * Check if a swizzle mode is supported by display engine
1935 *
1936 * @return
1937 * TRUE is swizzle mode is supported by display engine
1938 ************************************************************************************************************************
1939 */
1940 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
1941 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
1942 ) const
1943 {
1944 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
1945
1946 BOOL_32 support = FALSE;
1947
1948 if (m_settings.isDcn2)
1949 {
1950 switch (pIn->swizzleMode)
1951 {
1952 case ADDR_SW_4KB_D:
1953 case ADDR_SW_4KB_D_X:
1954 case ADDR_SW_64KB_D:
1955 case ADDR_SW_64KB_D_T:
1956 case ADDR_SW_64KB_D_X:
1957 support = (pIn->bpp == 64);
1958 break;
1959
1960 case ADDR_SW_LINEAR:
1961 case ADDR_SW_4KB_S:
1962 case ADDR_SW_4KB_S_X:
1963 case ADDR_SW_64KB_S:
1964 case ADDR_SW_64KB_S_T:
1965 case ADDR_SW_64KB_S_X:
1966 case ADDR_SW_64KB_R_X:
1967 support = (pIn->bpp <= 64);
1968 break;
1969
1970 default:
1971 break;
1972 }
1973 }
1974 else
1975 {
1976 ADDR_NOT_IMPLEMENTED();
1977 }
1978
1979 return support;
1980 }
1981
1982 /**
1983 ************************************************************************************************************************
1984 * Gfx10Lib::GetMaxNumMipsInTail
1985 *
1986 * @brief
1987 * Return max number of mips in tails
1988 *
1989 * @return
1990 * Max number of mips in tails
1991 ************************************************************************************************************************
1992 */
1993 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
1994 UINT_32 blockSizeLog2, ///< block size log2
1995 BOOL_32 isThin ///< is thin or thick
1996 ) const
1997 {
1998 UINT_32 effectiveLog2 = blockSizeLog2;
1999
2000 if (isThin == FALSE)
2001 {
2002 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
2003 }
2004
2005 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
2006 }
2007
2008 /**
2009 ************************************************************************************************************************
2010 * Gfx10Lib::HwlComputePipeBankXor
2011 *
2012 * @brief
2013 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2014 *
2015 * @return
2016 * PipeBankXor value
2017 ************************************************************************************************************************
2018 */
2019 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2020 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2021 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2022 ) const
2023 {
2024 if (IsNonPrtXor(pIn->swizzleMode))
2025 {
2026 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2027 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2028 const UINT_32 bankBits = GetBankXorBits(blockBits);
2029
2030 UINT_32 pipeXor = 0;
2031 UINT_32 bankXor = 0;
2032
2033 if (bankBits != 0)
2034 {
2035 if (blockBits == 16)
2036 {
2037 const UINT_32 XorPatternLen = 8;
2038 static const UINT_32 XorBank1b[XorPatternLen] = {0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80};
2039 static const UINT_32 XorBank2b[XorPatternLen] = {0x00, 0x80, 0x40, 0xC0, 0x80, 0x00, 0xC0, 0x40};
2040 static const UINT_32 XorBank3b[XorPatternLen] = {0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0};
2041
2042 const UINT_32 index = pIn->surfIndex % XorPatternLen;
2043
2044 if (bankBits == 1)
2045 {
2046 bankXor = XorBank1b[index];
2047 }
2048 else if (bankBits == 2)
2049 {
2050 bankXor = XorBank2b[index];
2051 }
2052 else
2053 {
2054 bankXor = XorBank3b[index];
2055
2056 if (bankBits == 4)
2057 {
2058 bankXor >>= (2 - pipeBits);
2059 }
2060 }
2061 }
2062 }
2063
2064 pOut->pipeBankXor = bankXor | pipeXor;
2065 }
2066 else
2067 {
2068 pOut->pipeBankXor = 0;
2069 }
2070
2071 return ADDR_OK;
2072 }
2073
2074 /**
2075 ************************************************************************************************************************
2076 * Gfx10Lib::HwlComputeSlicePipeBankXor
2077 *
2078 * @brief
2079 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2080 *
2081 * @return
2082 * PipeBankXor value
2083 ************************************************************************************************************************
2084 */
2085 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2086 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2087 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2088 ) const
2089 {
2090 if (IsNonPrtXor(pIn->swizzleMode))
2091 {
2092 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2093 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2094 const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2095
2096 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2097 }
2098 else
2099 {
2100 pOut->pipeBankXor = 0;
2101 }
2102
2103 return ADDR_OK;
2104 }
2105
2106 /**
2107 ************************************************************************************************************************
2108 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2109 *
2110 * @brief
2111 * Compute sub resource offset to support swizzle pattern
2112 *
2113 * @return
2114 * Offset
2115 ************************************************************************************************************************
2116 */
2117 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2118 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
2119 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
2120 ) const
2121 {
2122 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2123
2124 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2125
2126 return ADDR_OK;
2127 }
2128
2129 /**
2130 ************************************************************************************************************************
2131 * Gfx10Lib::ValidateNonSwModeParams
2132 *
2133 * @brief
2134 * Validate compute surface info params except swizzle mode
2135 *
2136 * @return
2137 * TRUE if parameters are valid, FALSE otherwise
2138 ************************************************************************************************************************
2139 */
2140 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2141 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2142 {
2143 BOOL_32 valid = TRUE;
2144
2145 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2146 {
2147 ADDR_ASSERT_ALWAYS();
2148 valid = FALSE;
2149 }
2150
2151 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2152 {
2153 ADDR_ASSERT_ALWAYS();
2154 valid = FALSE;
2155 }
2156
2157 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2158 const AddrResourceType rsrcType = pIn->resourceType;
2159 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2160 const BOOL_32 msaa = (pIn->numFrags > 1);
2161 const BOOL_32 display = flags.display;
2162 const BOOL_32 tex3d = IsTex3d(rsrcType);
2163 const BOOL_32 tex2d = IsTex2d(rsrcType);
2164 const BOOL_32 tex1d = IsTex1d(rsrcType);
2165 const BOOL_32 stereo = flags.qbStereo;
2166
2167 // Resource type check
2168 if (tex1d)
2169 {
2170 if (msaa || display || stereo)
2171 {
2172 ADDR_ASSERT_ALWAYS();
2173 valid = FALSE;
2174 }
2175 }
2176 else if (tex2d)
2177 {
2178 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2179 {
2180 ADDR_ASSERT_ALWAYS();
2181 valid = FALSE;
2182 }
2183 }
2184 else if (tex3d)
2185 {
2186 if (msaa || display || stereo)
2187 {
2188 ADDR_ASSERT_ALWAYS();
2189 valid = FALSE;
2190 }
2191 }
2192 else
2193 {
2194 ADDR_ASSERT_ALWAYS();
2195 valid = FALSE;
2196 }
2197
2198 return valid;
2199 }
2200
2201 /**
2202 ************************************************************************************************************************
2203 * Gfx10Lib::ValidateSwModeParams
2204 *
2205 * @brief
2206 * Validate compute surface info related to swizzle mode
2207 *
2208 * @return
2209 * TRUE if parameters are valid, FALSE otherwise
2210 ************************************************************************************************************************
2211 */
2212 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2213 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2214 {
2215 BOOL_32 valid = TRUE;
2216
2217 if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
2218 {
2219 ADDR_ASSERT_ALWAYS();
2220 valid = FALSE;
2221 }
2222
2223 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2224 const AddrResourceType rsrcType = pIn->resourceType;
2225 const AddrSwizzleMode swizzle = pIn->swizzleMode;
2226 const BOOL_32 msaa = (pIn->numFrags > 1);
2227 const BOOL_32 zbuffer = flags.depth || flags.stencil;
2228 const BOOL_32 color = flags.color;
2229 const BOOL_32 display = flags.display;
2230 const BOOL_32 tex3d = IsTex3d(rsrcType);
2231 const BOOL_32 tex2d = IsTex2d(rsrcType);
2232 const BOOL_32 tex1d = IsTex1d(rsrcType);
2233 const BOOL_32 thin3d = flags.view3dAs2dArray;
2234 const BOOL_32 linear = IsLinear(swizzle);
2235 const BOOL_32 blk256B = IsBlock256b(swizzle);
2236 const BOOL_32 blkVar = IsBlockVariable(swizzle);
2237 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2238 const BOOL_32 prt = flags.prt;
2239 const BOOL_32 fmask = flags.fmask;
2240
2241 // Misc check
2242 if ((pIn->numFrags > 1) &&
2243 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2244 {
2245 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2246 ADDR_ASSERT_ALWAYS();
2247 valid = FALSE;
2248 }
2249
2250 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2251 {
2252 ADDR_ASSERT_ALWAYS();
2253 valid = FALSE;
2254 }
2255
2256 if ((pIn->bpp == 96) && (linear == FALSE))
2257 {
2258 ADDR_ASSERT_ALWAYS();
2259 valid = FALSE;
2260 }
2261
2262 const UINT_32 swizzleMask = 1 << swizzle;
2263
2264 // Resource type check
2265 if (tex1d)
2266 {
2267 if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2268 {
2269 ADDR_ASSERT_ALWAYS();
2270 valid = FALSE;
2271 }
2272 }
2273 else if (tex2d)
2274 {
2275 if (((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0) ||
2276 (prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2277 (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2278 {
2279 ADDR_ASSERT_ALWAYS();
2280 valid = FALSE;
2281 }
2282 }
2283 else if (tex3d)
2284 {
2285 if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2286 (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2287 (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2288 {
2289 ADDR_ASSERT_ALWAYS();
2290 valid = FALSE;
2291 }
2292 }
2293
2294 // Swizzle type check
2295 if (linear)
2296 {
2297 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2298 {
2299 ADDR_ASSERT_ALWAYS();
2300 valid = FALSE;
2301 }
2302 }
2303 else if (IsZOrderSwizzle(swizzle))
2304 {
2305 if ((pIn->bpp > 64) ||
2306 (msaa && (color || (pIn->bpp > 32))) ||
2307 ElemLib::IsBlockCompressed(pIn->format) ||
2308 ElemLib::IsMacroPixelPacked(pIn->format))
2309 {
2310 ADDR_ASSERT_ALWAYS();
2311 valid = FALSE;
2312 }
2313 }
2314 else if (IsStandardSwizzle(rsrcType, swizzle))
2315 {
2316 if (zbuffer || msaa)
2317 {
2318 ADDR_ASSERT_ALWAYS();
2319 valid = FALSE;
2320 }
2321 }
2322 else if (IsDisplaySwizzle(rsrcType, swizzle))
2323 {
2324 if (zbuffer || msaa)
2325 {
2326 ADDR_ASSERT_ALWAYS();
2327 valid = FALSE;
2328 }
2329 }
2330 else if (IsRtOptSwizzle(swizzle))
2331 {
2332 if (zbuffer)
2333 {
2334 ADDR_ASSERT_ALWAYS();
2335 valid = FALSE;
2336 }
2337 }
2338 else
2339 {
2340 ADDR_ASSERT_ALWAYS();
2341 valid = FALSE;
2342 }
2343
2344 // Block type check
2345 if (blk256B)
2346 {
2347 if (zbuffer || tex3d || msaa)
2348 {
2349 ADDR_ASSERT_ALWAYS();
2350 valid = FALSE;
2351 }
2352 }
2353 else if (blkVar)
2354 {
2355 if (m_blockVarSizeLog2 == 0)
2356 {
2357 ADDR_ASSERT_ALWAYS();
2358 valid = FALSE;
2359 }
2360 }
2361
2362 return valid;
2363 }
2364
2365 /**
2366 ************************************************************************************************************************
2367 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2368 *
2369 * @brief
2370 * Compute surface info sanity check
2371 *
2372 * @return
2373 * Offset
2374 ************************************************************************************************************************
2375 */
2376 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2377 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2378 ) const
2379 {
2380 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2381 }
2382
2383 /**
2384 ************************************************************************************************************************
2385 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2386 *
2387 * @brief
2388 * Internal function to get suggested surface information for cliet to use
2389 *
2390 * @return
2391 * ADDR_E_RETURNCODE
2392 ************************************************************************************************************************
2393 */
2394 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2395 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2396 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2397 ) const
2398 {
2399 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2400
2401 if (pIn->flags.fmask)
2402 {
2403 const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2404 const BOOL_32 forbidVarBlockType = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2405
2406 if (forbid64KbBlockType && forbidVarBlockType)
2407 {
2408 // Invalid combination...
2409 ADDR_ASSERT_ALWAYS();
2410 returnCode = ADDR_INVALIDPARAMS;
2411 }
2412 else
2413 {
2414 pOut->resourceType = ADDR_RSRC_TEX_2D;
2415 pOut->validBlockSet.value = 0;
2416 pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1;
2417 pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1;
2418 pOut->validSwModeSet.value = 0;
2419 pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1;
2420 pOut->validSwModeSet.swVar_Z_X = forbidVarBlockType ? 0 : 1;
2421 pOut->canXor = TRUE;
2422 pOut->validSwTypeSet.value = AddrSwSetZ;
2423 pOut->clientPreferredSwSet = pOut->validSwTypeSet;
2424
2425 BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2426
2427 if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2428 {
2429 const UINT_8 maxFmaskSwizzleModeType = 2;
2430 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2431 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2432 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2433 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2434 const UINT_32 width = Max(pIn->width, 1u);
2435 const UINT_32 height = Max(pIn->height, 1u);
2436 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2437
2438 AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2439 Dim3d blkDim[maxFmaskSwizzleModeType] = {{0}, {0}};
2440 Dim3d padDim[maxFmaskSwizzleModeType] = {{0}, {0}};
2441 UINT_64 padSize[maxFmaskSwizzleModeType] = {0};
2442
2443 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2444 {
2445 ComputeBlockDimensionForSurf(&blkDim[i].w,
2446 &blkDim[i].h,
2447 &blkDim[i].d,
2448 fmaskBpp,
2449 1,
2450 pOut->resourceType,
2451 swMode[i]);
2452
2453 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2454 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2455 }
2456
2457 if (GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0]))
2458 {
2459 if ((padSize[1] * ratioHi) <= (padSize[0] * ratioLow))
2460 {
2461 use64KbBlockType = FALSE;
2462 }
2463 }
2464 else
2465 {
2466 if ((padSize[1] * ratioLow) < (padSize[0] * ratioHi))
2467 {
2468 use64KbBlockType = FALSE;
2469 }
2470 }
2471 }
2472 else if (forbidVarBlockType)
2473 {
2474 use64KbBlockType = TRUE;
2475 }
2476
2477 if (use64KbBlockType)
2478 {
2479 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2480 }
2481 else
2482 {
2483 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2484 }
2485 }
2486 }
2487 else
2488 {
2489 UINT_32 bpp = pIn->bpp;
2490 UINT_32 width = Max(pIn->width, 1u);
2491 UINT_32 height = Max(pIn->height, 1u);
2492
2493 // Set format to INVALID will skip this conversion
2494 if (pIn->format != ADDR_FMT_INVALID)
2495 {
2496 ElemMode elemMode = ADDR_UNCOMPRESSED;
2497 UINT_32 expandX, expandY;
2498
2499 // Get compression/expansion factors and element mode which indicates compression/expansion
2500 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2501 &elemMode,
2502 &expandX,
2503 &expandY);
2504
2505 UINT_32 basePitch = 0;
2506 GetElemLib()->AdjustSurfaceInfo(elemMode,
2507 expandX,
2508 expandY,
2509 &bpp,
2510 &basePitch,
2511 &width,
2512 &height);
2513 }
2514
2515 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2516 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2517 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2518 const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2519 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
2520
2521 // Pre sanity check on non swizzle mode parameters
2522 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2523 localIn.flags = pIn->flags;
2524 localIn.resourceType = pIn->resourceType;
2525 localIn.format = pIn->format;
2526 localIn.bpp = bpp;
2527 localIn.width = width;
2528 localIn.height = height;
2529 localIn.numSlices = numSlices;
2530 localIn.numMipLevels = numMipLevels;
2531 localIn.numSamples = numSamples;
2532 localIn.numFrags = numFrags;
2533
2534 if (ValidateNonSwModeParams(&localIn))
2535 {
2536 // Forbid swizzle mode(s) by client setting
2537 ADDR2_SWMODE_SET allowedSwModeSet = {};
2538 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2539 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;
2540 allowedSwModeSet.value |=
2541 pIn->forbiddenBlock.macroThin4KB ? 0 :
2542 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2543 allowedSwModeSet.value |=
2544 pIn->forbiddenBlock.macroThick4KB ? 0 :
2545 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2546 allowedSwModeSet.value |=
2547 pIn->forbiddenBlock.macroThin64KB ? 0 :
2548 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2549 allowedSwModeSet.value |=
2550 pIn->forbiddenBlock.macroThick64KB ? 0 :
2551 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2552 allowedSwModeSet.value |=
2553 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2554
2555 if (pIn->preferredSwSet.value != 0)
2556 {
2557 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2558 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2559 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2560 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2561 }
2562
2563 if (pIn->noXor)
2564 {
2565 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2566 }
2567
2568 if (pIn->maxAlign > 0)
2569 {
2570 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2571 {
2572 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2573 }
2574
2575 if (pIn->maxAlign < Size64K)
2576 {
2577 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2578 }
2579
2580 if (pIn->maxAlign < Size4K)
2581 {
2582 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2583 }
2584
2585 if (pIn->maxAlign < Size256)
2586 {
2587 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2588 }
2589 }
2590
2591 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2592 switch (pIn->resourceType)
2593 {
2594 case ADDR_RSRC_TEX_1D:
2595 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2596 break;
2597
2598 case ADDR_RSRC_TEX_2D:
2599 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2600 break;
2601
2602 case ADDR_RSRC_TEX_3D:
2603 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2604
2605 if (pIn->flags.view3dAs2dArray)
2606 {
2607 allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2608 }
2609 break;
2610
2611 default:
2612 ADDR_ASSERT_ALWAYS();
2613 allowedSwModeSet.value = 0;
2614 break;
2615 }
2616
2617 if (ElemLib::IsBlockCompressed(pIn->format) ||
2618 ElemLib::IsMacroPixelPacked(pIn->format) ||
2619 (bpp > 64) ||
2620 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2621 {
2622 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2623 }
2624
2625 if (pIn->format == ADDR_FMT_32_32_32)
2626 {
2627 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2628 }
2629
2630 if (msaa)
2631 {
2632 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2633 }
2634
2635 if (pIn->flags.depth || pIn->flags.stencil)
2636 {
2637 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2638 }
2639
2640 if (pIn->flags.display)
2641 {
2642 if (m_settings.isDcn2)
2643 {
2644 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
2645 }
2646 else
2647 {
2648 ADDR_NOT_IMPLEMENTED();
2649 }
2650 }
2651
2652 if (allowedSwModeSet.value != 0)
2653 {
2654 #if DEBUG
2655 // Post sanity check, at least AddrLib should accept the output generated by its own
2656 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2657
2658 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2659 {
2660 if (validateSwModeSet & 1)
2661 {
2662 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2663 ADDR_ASSERT(ValidateSwModeParams(&localIn));
2664 }
2665
2666 validateSwModeSet >>= 1;
2667 }
2668 #endif
2669
2670 pOut->resourceType = pIn->resourceType;
2671 pOut->validSwModeSet = allowedSwModeSet;
2672 pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
2673 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2674 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
2675
2676 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2677
2678 if (pOut->clientPreferredSwSet.value == 0)
2679 {
2680 pOut->clientPreferredSwSet.value = AddrSwSetAll;
2681 }
2682
2683 // Apply optional restrictions
2684 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
2685 {
2686 if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
2687 {
2688 // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
2689 // the GL2 in VAR mode, so it should be avoided.
2690 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2691 }
2692 else
2693 {
2694 // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
2695 // But we have to suffer from low performance because there is no other choice...
2696 ADDR_ASSERT_ALWAYS();
2697 }
2698 }
2699
2700 if (pIn->flags.needEquation)
2701 {
2702 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
2703 }
2704
2705 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
2706 {
2707 pOut->swizzleMode = ADDR_SW_LINEAR;
2708 }
2709 else
2710 {
2711 // Always ignore linear swizzle mode if there is other choice.
2712 allowedSwModeSet.swLinear = 0;
2713
2714 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2715
2716 // Determine block size if there is 2 or more block type candidates
2717 if (IsPow2(allowedBlockSet.value) == FALSE)
2718 {
2719 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR };
2720
2721 if (m_blockVarSizeLog2 != 0)
2722 {
2723 swMode[AddrBlockVar] = ADDR_SW_VAR_R_X;
2724 }
2725
2726 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
2727 {
2728 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
2729 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X;
2730 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
2731 }
2732 else
2733 {
2734 swMode[AddrBlockMicro] = ADDR_SW_256B_S;
2735 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_S;
2736 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
2737 }
2738
2739 Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
2740 Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
2741 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
2742
2743 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2744 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2745 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2746 UINT_32 minSizeBlk = AddrBlockMicro;
2747 UINT_64 minSize = 0;
2748
2749 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2750 {
2751 if (allowedBlockSet.value & (1 << i))
2752 {
2753 ComputeBlockDimensionForSurf(&blkDim[i].w,
2754 &blkDim[i].h,
2755 &blkDim[i].d,
2756 bpp,
2757 numFrags,
2758 pOut->resourceType,
2759 swMode[i]);
2760
2761 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2762 padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
2763
2764 if (minSize == 0)
2765 {
2766 minSize = padSize[i];
2767 minSizeBlk = i;
2768 }
2769 else
2770 {
2771 // Due to the fact that VAR block size = 16KB * m_pipes, it is possible that VAR
2772 // block size < 64KB. And ratio[Hi/Low] logic implicitly requires iterating from
2773 // smaller block type to bigger block type. So we have to correct comparing logic
2774 // according to the size of existing "minimun block" and size of coming/comparing
2775 // block. The new logic can also be useful to any future change about AddrBlockType.
2776 if (GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk]))
2777 {
2778 if ((padSize[i] * ratioHi) <= (minSize * ratioLow))
2779 {
2780 minSize = padSize[i];
2781 minSizeBlk = i;
2782 }
2783 }
2784 else
2785 {
2786 if ((padSize[i] * ratioLow) < (minSize * ratioHi))
2787 {
2788 minSize = padSize[i];
2789 minSizeBlk = i;
2790 }
2791 }
2792 }
2793 }
2794 }
2795
2796 if ((allowedBlockSet.micro == TRUE) &&
2797 (width <= blkDim[AddrBlockMicro].w) &&
2798 (height <= blkDim[AddrBlockMicro].h))
2799 {
2800 minSizeBlk = AddrBlockMicro;
2801 }
2802
2803 if (minSizeBlk == AddrBlockMicro)
2804 {
2805 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2806 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
2807 }
2808 else if (minSizeBlk == AddrBlockThick4KB)
2809 {
2810 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2811 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
2812 }
2813 else if (minSizeBlk == AddrBlockThin4KB)
2814 {
2815 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2816 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
2817 }
2818 else if (minSizeBlk == AddrBlockThick64KB)
2819 {
2820 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2821 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
2822 }
2823 else if (minSizeBlk == AddrBlockThin64KB)
2824 {
2825 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2826 Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
2827 }
2828 else
2829 {
2830 ADDR_ASSERT(minSizeBlk == AddrBlockVar);
2831 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
2832 }
2833 }
2834
2835 // Block type should be determined.
2836 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
2837
2838 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
2839
2840 // Determine swizzle type if there is 2 or more swizzle type candidates
2841 if (IsPow2(allowedSwSet.value) == FALSE)
2842 {
2843 if (ElemLib::IsBlockCompressed(pIn->format))
2844 {
2845 if (allowedSwSet.sw_D)
2846 {
2847 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2848 }
2849 else if (allowedSwSet.sw_S)
2850 {
2851 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2852 }
2853 else
2854 {
2855 ADDR_ASSERT(allowedSwSet.sw_R);
2856 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2857 }
2858 }
2859 else if (ElemLib::IsMacroPixelPacked(pIn->format))
2860 {
2861 if (allowedSwSet.sw_S)
2862 {
2863 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2864 }
2865 else if (allowedSwSet.sw_D)
2866 {
2867 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2868 }
2869 else
2870 {
2871 ADDR_ASSERT(allowedSwSet.sw_R);
2872 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2873 }
2874 }
2875 else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2876 {
2877 if (pIn->flags.color &&
2878 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
2879 allowedSwSet.sw_D)
2880 {
2881 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2882 }
2883 else if (allowedSwSet.sw_S)
2884 {
2885 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2886 }
2887 else if (allowedSwSet.sw_R)
2888 {
2889 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2890 }
2891 else
2892 {
2893 ADDR_ASSERT(allowedSwSet.sw_Z);
2894 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2895 }
2896 }
2897 else
2898 {
2899 if (allowedSwSet.sw_R)
2900 {
2901 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2902 }
2903 else if (allowedSwSet.sw_D)
2904 {
2905 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2906 }
2907 else if (allowedSwSet.sw_S)
2908 {
2909 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2910 }
2911 else
2912 {
2913 ADDR_ASSERT(allowedSwSet.sw_Z);
2914 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2915 }
2916 }
2917 }
2918
2919 // Swizzle type should be determined.
2920 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
2921
2922 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2923 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2924 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2925 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
2926 }
2927 }
2928 else
2929 {
2930 // Invalid combination...
2931 ADDR_ASSERT_ALWAYS();
2932 returnCode = ADDR_INVALIDPARAMS;
2933 }
2934 }
2935 else
2936 {
2937 // Invalid combination...
2938 ADDR_ASSERT_ALWAYS();
2939 returnCode = ADDR_INVALIDPARAMS;
2940 }
2941 }
2942
2943 return returnCode;
2944 }
2945
2946 /**
2947 ************************************************************************************************************************
2948 * Gfx10Lib::ComputeStereoInfo
2949 *
2950 * @brief
2951 * Compute height alignment and right eye pipeBankXor for stereo surface
2952 *
2953 * @return
2954 * Error code
2955 *
2956 ************************************************************************************************************************
2957 */
2958 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
2959 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
2960 UINT_32 blkHeight, ///< Block height
2961 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
2962 UINT_32* pRightXor ///< Right eye xor
2963 ) const
2964 {
2965 ADDR_E_RETURNCODE ret = ADDR_OK;
2966
2967 *pAlignY = 1;
2968 *pRightXor = 0;
2969
2970 if (IsNonPrtXor(pIn->swizzleMode))
2971 {
2972 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
2973 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
2974 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
2975 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
2976 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
2977
2978 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
2979 {
2980 UINT_32 yMax = 0;
2981 UINT_32 yPos = 0;
2982
2983 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
2984 {
2985 if (m_equationTable[eqIndex].xor1[i].value == 0)
2986 {
2987 break;
2988 }
2989
2990 ADDR_ASSERT(m_equationTable[eqIndex].xor1[i].valid == 1);
2991
2992 if ((m_equationTable[eqIndex].xor1[i].channel == 1) &&
2993 (m_equationTable[eqIndex].xor1[i].index > yMax))
2994 {
2995 yMax = m_equationTable[eqIndex].xor1[i].index;
2996 yPos = i;
2997 }
2998 }
2999
3000 const UINT_32 additionalAlign = 1 << yMax;
3001
3002 if (additionalAlign >= blkHeight)
3003 {
3004 *pAlignY *= (additionalAlign / blkHeight);
3005
3006 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3007
3008 if ((alignedHeight >> yMax) & 1)
3009 {
3010 *pRightXor = 1 << (yPos - m_pipeInterleaveLog2);
3011 }
3012 }
3013 }
3014 else
3015 {
3016 ret = ADDR_INVALIDPARAMS;
3017 }
3018 }
3019
3020 return ret;
3021 }
3022
3023 /**
3024 ************************************************************************************************************************
3025 * Gfx10Lib::HwlComputeSurfaceInfoTiled
3026 *
3027 * @brief
3028 * Internal function to calculate alignment for tiled surface
3029 *
3030 * @return
3031 * ADDR_E_RETURNCODE
3032 ************************************************************************************************************************
3033 */
3034 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3035 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3036 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3037 ) const
3038 {
3039 ADDR_E_RETURNCODE ret;
3040
3041 if (IsBlock256b(pIn->swizzleMode))
3042 {
3043 ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3044 }
3045 else
3046 {
3047 ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3048 }
3049
3050 return ret;
3051 }
3052
3053 /**
3054 ************************************************************************************************************************
3055 * Gfx10Lib::ComputeSurfaceInfoMicroTiled
3056 *
3057 * @brief
3058 * Internal function to calculate alignment for micro tiled surface
3059 *
3060 * @return
3061 * ADDR_E_RETURNCODE
3062 ************************************************************************************************************************
3063 */
3064 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3065 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3066 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3067 ) const
3068 {
3069 ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3070 &pOut->blockHeight,
3071 &pOut->blockSlices,
3072 pIn->bpp,
3073 pIn->numFrags,
3074 pIn->resourceType,
3075 pIn->swizzleMode);
3076
3077 if (ret == ADDR_OK)
3078 {
3079 pOut->mipChainPitch = 0;
3080 pOut->mipChainHeight = 0;
3081 pOut->mipChainSlice = 0;
3082 pOut->epitchIsHeight = FALSE;
3083 pOut->mipChainInTail = FALSE;
3084 pOut->firstMipIdInTail = pIn->numMipLevels;
3085
3086 const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3087
3088 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3089 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3090 pOut->numSlices = pIn->numSlices;
3091 pOut->baseAlign = blockSize;
3092
3093 if (pIn->numMipLevels > 1)
3094 {
3095 const UINT_32 mip0Width = pIn->width;
3096 const UINT_32 mip0Height = pIn->height;
3097 UINT_64 mipSliceSize = 0;
3098
3099 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3100 {
3101 UINT_32 mipWidth, mipHeight;
3102
3103 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3104
3105 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);
3106 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3107
3108 if (pOut->pMipInfo != NULL)
3109 {
3110 pOut->pMipInfo[i].pitch = mipActualWidth;
3111 pOut->pMipInfo[i].height = mipActualHeight;
3112 pOut->pMipInfo[i].depth = 1;
3113 pOut->pMipInfo[i].offset = mipSliceSize;
3114 pOut->pMipInfo[i].mipTailOffset = 0;
3115 pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3116 }
3117
3118 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3119 }
3120
3121 pOut->sliceSize = mipSliceSize;
3122 pOut->surfSize = mipSliceSize * pOut->numSlices;
3123 }
3124 else
3125 {
3126 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3127 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3128
3129 if (pOut->pMipInfo != NULL)
3130 {
3131 pOut->pMipInfo[0].pitch = pOut->pitch;
3132 pOut->pMipInfo[0].height = pOut->height;
3133 pOut->pMipInfo[0].depth = 1;
3134 pOut->pMipInfo[0].offset = 0;
3135 pOut->pMipInfo[0].mipTailOffset = 0;
3136 pOut->pMipInfo[0].macroBlockOffset = 0;
3137 }
3138 }
3139
3140 }
3141
3142 return ret;
3143 }
3144
3145 /**
3146 ************************************************************************************************************************
3147 * Gfx10Lib::ComputeSurfaceInfoMacroTiled
3148 *
3149 * @brief
3150 * Internal function to calculate alignment for macro tiled surface
3151 *
3152 * @return
3153 * ADDR_E_RETURNCODE
3154 ************************************************************************************************************************
3155 */
3156 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3157 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3158 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3159 ) const
3160 {
3161 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3162 &pOut->blockHeight,
3163 &pOut->blockSlices,
3164 pIn->bpp,
3165 pIn->numFrags,
3166 pIn->resourceType,
3167 pIn->swizzleMode);
3168
3169 if (returnCode == ADDR_OK)
3170 {
3171 UINT_32 heightAlign = pOut->blockHeight;
3172
3173 if (pIn->flags.qbStereo)
3174 {
3175 UINT_32 rightXor = 0;
3176 UINT_32 alignY = 1;
3177
3178 returnCode = ComputeStereoInfo(pIn, heightAlign, &alignY, &rightXor);
3179
3180 if (returnCode == ADDR_OK)
3181 {
3182 pOut->pStereoInfo->rightSwizzle = rightXor;
3183
3184 heightAlign *= alignY;
3185 }
3186 }
3187
3188 if (returnCode == ADDR_OK)
3189 {
3190 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3191 pOut->mipChainPitch = 0;
3192 pOut->mipChainHeight = 0;
3193 pOut->mipChainSlice = 0;
3194 pOut->epitchIsHeight = FALSE;
3195 pOut->mipChainInTail = FALSE;
3196 pOut->firstMipIdInTail = pIn->numMipLevels;
3197
3198 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3199 const UINT_32 blockSize = 1 << blockSizeLog2;
3200
3201 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3202 pOut->height = PowTwoAlign(pIn->height, heightAlign);
3203 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3204 pOut->baseAlign = blockSize;
3205
3206 if (pIn->numMipLevels > 1)
3207 {
3208 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3209 pIn->swizzleMode,
3210 pOut->blockWidth,
3211 pOut->blockHeight,
3212 pOut->blockSlices);
3213 const UINT_32 mip0Width = pIn->width;
3214 const UINT_32 mip0Height = pIn->height;
3215 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3216 const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;
3217 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3218 const UINT_32 index = Log2(pIn->bpp >> 3);
3219 UINT_32 firstMipInTail = pIn->numMipLevels;
3220 UINT_64 mipChainSliceSize = 0;
3221 UINT_64 mipSize[MaxMipLevels];
3222 UINT_64 mipSliceSize[MaxMipLevels];
3223
3224 Dim3d fixedTailMaxDim = tailMaxDim;
3225
3226 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3227 {
3228 fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3229 fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3230 }
3231
3232 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3233 {
3234 UINT_32 mipWidth, mipHeight, mipDepth;
3235
3236 GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3237
3238 if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3239 {
3240 firstMipInTail = i;
3241 mipChainSliceSize += blockSize / pOut->blockSlices;
3242 break;
3243 }
3244 else
3245 {
3246 const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);
3247 const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);
3248 const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);
3249 const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3250
3251 mipSize[i] = sliceSize * depth;
3252 mipSliceSize[i] = sliceSize * pOut->blockSlices;
3253 mipChainSliceSize += sliceSize;
3254
3255 if (pOut->pMipInfo != NULL)
3256 {
3257 pOut->pMipInfo[i].pitch = pitch;
3258 pOut->pMipInfo[i].height = height;
3259 pOut->pMipInfo[i].depth = depth;
3260 }
3261 }
3262 }
3263
3264 pOut->sliceSize = mipChainSliceSize;
3265 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
3266 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
3267 pOut->firstMipIdInTail = firstMipInTail;
3268
3269 if (pOut->pMipInfo != NULL)
3270 {
3271 UINT_64 offset = 0;
3272 UINT_64 macroBlkOffset = 0;
3273 UINT_32 tailMaxDepth = 0;
3274
3275 if (firstMipInTail != pIn->numMipLevels)
3276 {
3277 UINT_32 mipWidth, mipHeight;
3278
3279 GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3280 &mipWidth, &mipHeight, &tailMaxDepth);
3281
3282 offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3283 macroBlkOffset = blockSize;
3284 }
3285
3286 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3287 {
3288 pOut->pMipInfo[i].offset = offset;
3289 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3290 pOut->pMipInfo[i].mipTailOffset = 0;
3291
3292 offset += mipSize[i];
3293 macroBlkOffset += mipSliceSize[i];
3294 }
3295
3296 UINT_32 pitch = tailMaxDim.w;
3297 UINT_32 height = tailMaxDim.h;
3298 UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3299
3300 tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3301
3302 for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3303 {
3304 const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);
3305 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3306
3307 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
3308 pOut->pMipInfo[i].mipTailOffset = mipOffset;
3309 pOut->pMipInfo[i].macroBlockOffset = 0;
3310
3311 pOut->pMipInfo[i].pitch = pitch;
3312 pOut->pMipInfo[i].height = height;
3313 pOut->pMipInfo[i].depth = depth;
3314
3315 UINT_32 mipX = ((mipOffset >> 9) & 1) |
3316 ((mipOffset >> 10) & 2) |
3317 ((mipOffset >> 11) & 4) |
3318 ((mipOffset >> 12) & 8) |
3319 ((mipOffset >> 13) & 16) |
3320 ((mipOffset >> 14) & 32);
3321 UINT_32 mipY = ((mipOffset >> 8) & 1) |
3322 ((mipOffset >> 9) & 2) |
3323 ((mipOffset >> 10) & 4) |
3324 ((mipOffset >> 11) & 8) |
3325 ((mipOffset >> 12) & 16) |
3326 ((mipOffset >> 13) & 32);
3327
3328 if (blockSizeLog2 & 1)
3329 {
3330 const UINT_32 temp = mipX;
3331 mipX = mipY;
3332 mipY = temp;
3333
3334 if (index & 1)
3335 {
3336 mipY = (mipY << 1) | (mipX & 1);
3337 mipX = mipX >> 1;
3338 }
3339 }
3340
3341 if (isThin)
3342 {
3343 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3344 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3345 pOut->pMipInfo[i].mipTailCoordZ = 0;
3346
3347 pitch = Max(pitch >> 1, Block256_2d[index].w);
3348 height = Max(height >> 1, Block256_2d[index].h);
3349 depth = 1;
3350 }
3351 else
3352 {
3353 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3354 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3355 pOut->pMipInfo[i].mipTailCoordZ = 0;
3356
3357 pitch = Max(pitch >> 1, Block256_3d[index].w);
3358 height = Max(height >> 1, Block256_3d[index].h);
3359 depth = PowTwoAlign(Max(depth >> 1, 1u), Block256_3d[index].d);
3360 }
3361 }
3362 }
3363 }
3364 else
3365 {
3366 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3367 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3368
3369 if (pOut->pMipInfo != NULL)
3370 {
3371 pOut->pMipInfo[0].pitch = pOut->pitch;
3372 pOut->pMipInfo[0].height = pOut->height;
3373 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3374 pOut->pMipInfo[0].offset = 0;
3375 pOut->pMipInfo[0].mipTailOffset = 0;
3376 pOut->pMipInfo[0].macroBlockOffset = 0;
3377 pOut->pMipInfo[0].mipTailCoordX = 0;
3378 pOut->pMipInfo[0].mipTailCoordY = 0;
3379 pOut->pMipInfo[0].mipTailCoordZ = 0;
3380 }
3381 }
3382 }
3383 }
3384
3385 return returnCode;
3386 }
3387
3388 /**
3389 ************************************************************************************************************************
3390 * Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3391 *
3392 * @brief
3393 * Internal function to calculate address from coord for tiled swizzle surface
3394 *
3395 * @return
3396 * ADDR_E_RETURNCODE
3397 ************************************************************************************************************************
3398 */
3399 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3400 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3401 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3402 ) const
3403 {
3404 ADDR_E_RETURNCODE ret;
3405
3406 if (IsBlock256b(pIn->swizzleMode))
3407 {
3408 ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3409 }
3410 else
3411 {
3412 ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3413 }
3414
3415 return ret;
3416 }
3417
3418 /**
3419 ************************************************************************************************************************
3420 * Gfx10Lib::ComputeOffsetFromEquation
3421 *
3422 * @brief
3423 * Compute offset from equation
3424 *
3425 * @return
3426 * Offset
3427 ************************************************************************************************************************
3428 */
3429 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3430 const ADDR_EQUATION* pEq, ///< Equation
3431 UINT_32 x, ///< x coord in bytes
3432 UINT_32 y, ///< y coord in pixel
3433 UINT_32 z ///< z coord in slice
3434 ) const
3435 {
3436 UINT_32 offset = 0;
3437
3438 for (UINT_32 i = 0; i < pEq->numBits; i++)
3439 {
3440 UINT_32 v = 0;
3441
3442 if (pEq->addr[i].valid)
3443 {
3444 if (pEq->addr[i].channel == 0)
3445 {
3446 v ^= (x >> pEq->addr[i].index) & 1;
3447 }
3448 else if (pEq->addr[i].channel == 1)
3449 {
3450 v ^= (y >> pEq->addr[i].index) & 1;
3451 }
3452 else
3453 {
3454 ADDR_ASSERT(pEq->addr[i].channel == 2);
3455 v ^= (z >> pEq->addr[i].index) & 1;
3456 }
3457 }
3458
3459 if (pEq->xor1[i].valid)
3460 {
3461 if (pEq->xor1[i].channel == 0)
3462 {
3463 v ^= (x >> pEq->xor1[i].index) & 1;
3464 }
3465 else if (pEq->xor1[i].channel == 1)
3466 {
3467 v ^= (y >> pEq->xor1[i].index) & 1;
3468 }
3469 else
3470 {
3471 ADDR_ASSERT(pEq->xor1[i].channel == 2);
3472 v ^= (z >> pEq->xor1[i].index) & 1;
3473 }
3474 }
3475
3476 if (pEq->xor2[i].valid)
3477 {
3478 if (pEq->xor2[i].channel == 0)
3479 {
3480 v ^= (x >> pEq->xor2[i].index) & 1;
3481 }
3482 else if (pEq->xor2[i].channel == 1)
3483 {
3484 v ^= (y >> pEq->xor2[i].index) & 1;
3485 }
3486 else
3487 {
3488 ADDR_ASSERT(pEq->xor2[i].channel == 2);
3489 v ^= (z >> pEq->xor2[i].index) & 1;
3490 }
3491 }
3492
3493 offset |= (v << i);
3494 }
3495
3496 return offset;
3497 }
3498
3499 /**
3500 ************************************************************************************************************************
3501 * Gfx10Lib::ComputeOffsetFromSwizzlePattern
3502 *
3503 * @brief
3504 * Compute offset from swizzle pattern
3505 *
3506 * @return
3507 * Offset
3508 ************************************************************************************************************************
3509 */
3510 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3511 const UINT_64* pPattern, ///< Swizzle pattern
3512 UINT_32 numBits, ///< Number of bits in pattern
3513 UINT_32 x, ///< x coord in pixel
3514 UINT_32 y, ///< y coord in pixel
3515 UINT_32 z, ///< z coord in slice
3516 UINT_32 s ///< sample id
3517 ) const
3518 {
3519 UINT_32 offset = 0;
3520 const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3521
3522 for (UINT_32 i = 0; i < numBits; i++)
3523 {
3524 UINT_32 v = 0;
3525
3526 if (pSwizzlePattern[i].x != 0)
3527 {
3528 UINT_16 mask = pSwizzlePattern[i].x;
3529 UINT_32 xBits = x;
3530
3531 while (mask != 0)
3532 {
3533 if (mask & 1)
3534 {
3535 v ^= xBits & 1;
3536 }
3537
3538 xBits >>= 1;
3539 mask >>= 1;
3540 }
3541 }
3542
3543 if (pSwizzlePattern[i].y != 0)
3544 {
3545 UINT_16 mask = pSwizzlePattern[i].y;
3546 UINT_32 yBits = y;
3547
3548 while (mask != 0)
3549 {
3550 if (mask & 1)
3551 {
3552 v ^= yBits & 1;
3553 }
3554
3555 yBits >>= 1;
3556 mask >>= 1;
3557 }
3558 }
3559
3560 if (pSwizzlePattern[i].z != 0)
3561 {
3562 UINT_16 mask = pSwizzlePattern[i].z;
3563 UINT_32 zBits = z;
3564
3565 while (mask != 0)
3566 {
3567 if (mask & 1)
3568 {
3569 v ^= zBits & 1;
3570 }
3571
3572 zBits >>= 1;
3573 mask >>= 1;
3574 }
3575 }
3576
3577 if (pSwizzlePattern[i].s != 0)
3578 {
3579 UINT_16 mask = pSwizzlePattern[i].s;
3580 UINT_32 sBits = s;
3581
3582 while (mask != 0)
3583 {
3584 if (mask & 1)
3585 {
3586 v ^= sBits & 1;
3587 }
3588
3589 sBits >>= 1;
3590 mask >>= 1;
3591 }
3592 }
3593
3594 offset |= (v << i);
3595 }
3596
3597 return offset;
3598 }
3599
3600 /**
3601 ************************************************************************************************************************
3602 * Gfx10Lib::GetSwizzlePatternInfo
3603 *
3604 * @brief
3605 * Get swizzle pattern
3606 *
3607 * @return
3608 * Swizzle pattern information
3609 ************************************************************************************************************************
3610 */
3611 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
3612 AddrSwizzleMode swizzleMode, ///< Swizzle mode
3613 AddrResourceType resourceType, ///< Resource type
3614 UINT_32 elemLog2, ///< Element size in bytes log2
3615 UINT_32 numFrag ///< Number of fragment
3616 ) const
3617 {
3618 const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
3619 const ADDR_SW_PATINFO* patInfo = NULL;
3620 const UINT_32 swizzleMask = 1 << swizzleMode;
3621
3622 if (IsLinear(swizzleMode) == FALSE)
3623 {
3624 if (IsBlockVariable(swizzleMode))
3625 {
3626 if (m_blockVarSizeLog2 != 0)
3627 {
3628 ADDR_ASSERT(m_settings.supportRbPlus);
3629
3630 if (IsRtOptSwizzle(swizzleMode))
3631 {
3632 if (numFrag == 1)
3633 {
3634 patInfo = SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
3635 }
3636 else if (numFrag == 2)
3637 {
3638 patInfo = SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
3639 }
3640 else if (numFrag == 4)
3641 {
3642 patInfo = SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
3643 }
3644 else
3645 {
3646 ADDR_ASSERT(numFrag == 8);
3647 patInfo = SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
3648 }
3649 }
3650 else if (IsZOrderSwizzle(swizzleMode))
3651 {
3652 if (numFrag == 1)
3653 {
3654 patInfo = SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
3655 }
3656 else if (numFrag == 2)
3657 {
3658 patInfo = SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
3659 }
3660 else if (numFrag == 4)
3661 {
3662 patInfo = SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
3663 }
3664 else
3665 {
3666 ADDR_ASSERT(numFrag == 8);
3667 patInfo = SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
3668 }
3669 }
3670 }
3671 }
3672 else if (resourceType == ADDR_RSRC_TEX_3D)
3673 {
3674 ADDR_ASSERT(numFrag == 1);
3675
3676 if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
3677 {
3678 if (IsRtOptSwizzle(swizzleMode))
3679 {
3680 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_1xaa_RBPLUS_PATINFO : SW_64K_R_X_1xaa_PATINFO;
3681 }
3682 else if (IsZOrderSwizzle(swizzleMode))
3683 {
3684 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_1xaa_RBPLUS_PATINFO : SW_64K_Z_X_1xaa_PATINFO;
3685 }
3686 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3687 {
3688 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
3689 patInfo = m_settings.supportRbPlus ? SW_64K_D3_X_RBPLUS_PATINFO : SW_64K_D3_X_PATINFO;
3690 }
3691 else
3692 {
3693 ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
3694
3695 if (IsBlock4kb(swizzleMode))
3696 {
3697 if (swizzleMode == ADDR_SW_4KB_S)
3698 {
3699 patInfo = m_settings.supportRbPlus ? SW_4K_S3_RBPLUS_PATINFO : SW_4K_S3_PATINFO;
3700 }
3701 else
3702 {
3703 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3704 patInfo = m_settings.supportRbPlus ? SW_4K_S3_X_RBPLUS_PATINFO : SW_4K_S3_X_PATINFO;
3705 }
3706 }
3707 else
3708 {
3709 if (swizzleMode == ADDR_SW_64KB_S)
3710 {
3711 patInfo = m_settings.supportRbPlus ? SW_64K_S3_RBPLUS_PATINFO : SW_64K_S3_PATINFO;
3712 }
3713 else if (swizzleMode == ADDR_SW_64KB_S_X)
3714 {
3715 patInfo = m_settings.supportRbPlus ? SW_64K_S3_X_RBPLUS_PATINFO : SW_64K_S3_X_PATINFO;
3716 }
3717 else
3718 {
3719 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3720 patInfo = m_settings.supportRbPlus ? SW_64K_S3_T_RBPLUS_PATINFO : SW_64K_S3_T_PATINFO;
3721 }
3722 }
3723 }
3724 }
3725 }
3726 else
3727 {
3728 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
3729 {
3730 if (IsBlock256b(swizzleMode))
3731 {
3732 if (swizzleMode == ADDR_SW_256B_S)
3733 {
3734 patInfo = m_settings.supportRbPlus ? SW_256_S_RBPLUS_PATINFO : SW_256_S_PATINFO;
3735 }
3736 else
3737 {
3738 ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
3739 patInfo = m_settings.supportRbPlus ? SW_256_D_RBPLUS_PATINFO : SW_256_D_PATINFO;
3740 }
3741 }
3742 else if (IsBlock4kb(swizzleMode))
3743 {
3744 if (IsStandardSwizzle(resourceType, swizzleMode))
3745 {
3746 if (swizzleMode == ADDR_SW_4KB_S)
3747 {
3748 patInfo = m_settings.supportRbPlus ? SW_4K_S_RBPLUS_PATINFO : SW_4K_S_PATINFO;
3749 }
3750 else
3751 {
3752 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3753 patInfo = m_settings.supportRbPlus ? SW_4K_S_X_RBPLUS_PATINFO : SW_4K_S_X_PATINFO;
3754 }
3755 }
3756 else
3757 {
3758 if (swizzleMode == ADDR_SW_4KB_D)
3759 {
3760 patInfo = m_settings.supportRbPlus ? SW_4K_D_RBPLUS_PATINFO : SW_4K_D_PATINFO;
3761 }
3762 else
3763 {
3764 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
3765 patInfo = m_settings.supportRbPlus ? SW_4K_D_X_RBPLUS_PATINFO : SW_4K_D_X_PATINFO;
3766 }
3767 }
3768 }
3769 else
3770 {
3771 if (IsRtOptSwizzle(swizzleMode))
3772 {
3773 if (numFrag == 1)
3774 {
3775 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_1xaa_RBPLUS_PATINFO : SW_64K_R_X_1xaa_PATINFO;
3776 }
3777 else if (numFrag == 2)
3778 {
3779 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_2xaa_RBPLUS_PATINFO : SW_64K_R_X_2xaa_PATINFO;
3780 }
3781 else if (numFrag == 4)
3782 {
3783 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_4xaa_RBPLUS_PATINFO : SW_64K_R_X_4xaa_PATINFO;
3784 }
3785 else
3786 {
3787 ADDR_ASSERT(numFrag == 8);
3788 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_8xaa_RBPLUS_PATINFO : SW_64K_R_X_8xaa_PATINFO;
3789 }
3790 }
3791 else if (IsZOrderSwizzle(swizzleMode))
3792 {
3793 if (numFrag == 1)
3794 {
3795 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_1xaa_RBPLUS_PATINFO : SW_64K_Z_X_1xaa_PATINFO;
3796 }
3797 else if (numFrag == 2)
3798 {
3799 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_2xaa_RBPLUS_PATINFO : SW_64K_Z_X_2xaa_PATINFO;
3800 }
3801 else if (numFrag == 4)
3802 {
3803 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_4xaa_RBPLUS_PATINFO : SW_64K_Z_X_4xaa_PATINFO;
3804 }
3805 else
3806 {
3807 ADDR_ASSERT(numFrag == 8);
3808 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_8xaa_RBPLUS_PATINFO : SW_64K_Z_X_8xaa_PATINFO;
3809 }
3810 }
3811 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3812 {
3813 if (swizzleMode == ADDR_SW_64KB_D)
3814 {
3815 patInfo = m_settings.supportRbPlus ? SW_64K_D_RBPLUS_PATINFO : SW_64K_D_PATINFO;
3816 }
3817 else if (swizzleMode == ADDR_SW_64KB_D_X)
3818 {
3819 patInfo = m_settings.supportRbPlus ? SW_64K_D_X_RBPLUS_PATINFO : SW_64K_D_X_PATINFO;
3820 }
3821 else
3822 {
3823 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
3824 patInfo = m_settings.supportRbPlus ? SW_64K_D_T_RBPLUS_PATINFO : SW_64K_D_T_PATINFO;
3825 }
3826 }
3827 else
3828 {
3829 if (swizzleMode == ADDR_SW_64KB_S)
3830 {
3831 patInfo = m_settings.supportRbPlus ? SW_64K_S_RBPLUS_PATINFO : SW_64K_S_PATINFO;
3832 }
3833 else if (swizzleMode == ADDR_SW_64KB_S_X)
3834 {
3835 patInfo = m_settings.supportRbPlus ? SW_64K_S_X_RBPLUS_PATINFO : SW_64K_S_X_PATINFO;
3836 }
3837 else
3838 {
3839 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3840 patInfo = m_settings.supportRbPlus ? SW_64K_S_T_RBPLUS_PATINFO : SW_64K_S_T_PATINFO;
3841 }
3842 }
3843 }
3844 }
3845 }
3846 }
3847
3848 return (patInfo != NULL) ? &patInfo[index] : NULL;
3849 }
3850
3851 /**
3852 ************************************************************************************************************************
3853 * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
3854 *
3855 * @brief
3856 * Internal function to calculate address from coord for micro tiled swizzle surface
3857 *
3858 * @return
3859 * ADDR_E_RETURNCODE
3860 ************************************************************************************************************************
3861 */
3862 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
3863 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3864 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3865 ) const
3866 {
3867 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3868 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3869 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
3870
3871 localIn.swizzleMode = pIn->swizzleMode;
3872 localIn.flags = pIn->flags;
3873 localIn.resourceType = pIn->resourceType;
3874 localIn.bpp = pIn->bpp;
3875 localIn.width = Max(pIn->unalignedWidth, 1u);
3876 localIn.height = Max(pIn->unalignedHeight, 1u);
3877 localIn.numSlices = Max(pIn->numSlices, 1u);
3878 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3879 localIn.numSamples = Max(pIn->numSamples, 1u);
3880 localIn.numFrags = Max(pIn->numFrags, 1u);
3881 localOut.pMipInfo = mipInfo;
3882
3883 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
3884
3885 if (ret == ADDR_OK)
3886 {
3887 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3888 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3889 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3890 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3891
3892 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3893 {
3894 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3895 const UINT_32 yb = pIn->y / localOut.blockHeight;
3896 const UINT_32 xb = pIn->x / localOut.blockWidth;
3897 const UINT_32 blockIndex = yb * pb + xb;
3898 const UINT_32 blockSize = 256;
3899 const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3900 pIn->x << elemLog2,
3901 pIn->y,
3902 0);
3903 pOut->addr = localOut.sliceSize * pIn->slice +
3904 mipInfo[pIn->mipId].macroBlockOffset +
3905 (blockIndex * blockSize) +
3906 blk256Offset;
3907 }
3908 else
3909 {
3910 ret = ADDR_INVALIDPARAMS;
3911 }
3912 }
3913
3914 return ret;
3915 }
3916
3917 /**
3918 ************************************************************************************************************************
3919 * Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
3920 *
3921 * @brief
3922 * Internal function to calculate address from coord for macro tiled swizzle surface
3923 *
3924 * @return
3925 * ADDR_E_RETURNCODE
3926 ************************************************************************************************************************
3927 */
3928 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
3929 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3930 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3931 ) const
3932 {
3933 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3934 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3935 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
3936
3937 localIn.swizzleMode = pIn->swizzleMode;
3938 localIn.flags = pIn->flags;
3939 localIn.resourceType = pIn->resourceType;
3940 localIn.bpp = pIn->bpp;
3941 localIn.width = Max(pIn->unalignedWidth, 1u);
3942 localIn.height = Max(pIn->unalignedHeight, 1u);
3943 localIn.numSlices = Max(pIn->numSlices, 1u);
3944 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3945 localIn.numSamples = Max(pIn->numSamples, 1u);
3946 localIn.numFrags = Max(pIn->numFrags, 1u);
3947 localOut.pMipInfo = mipInfo;
3948
3949 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
3950
3951 if (ret == ADDR_OK)
3952 {
3953 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3954 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3955 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
3956 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
3957 const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
3958 const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
3959 (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
3960
3961 if (localIn.numFrags > 1)
3962 {
3963 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
3964 pIn->resourceType,
3965 elemLog2,
3966 localIn.numFrags);
3967
3968 if (pPatInfo != NULL)
3969 {
3970 const UINT_32 pb = localOut.pitch / localOut.blockWidth;
3971 const UINT_32 yb = pIn->y / localOut.blockHeight;
3972 const UINT_32 xb = pIn->x / localOut.blockWidth;
3973 const UINT_64 blkIdx = yb * pb + xb;
3974
3975 ADDR_BIT_SETTING fullSwizzlePattern[20];
3976 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
3977
3978 const UINT_32 blkOffset =
3979 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
3980 blkSizeLog2,
3981 pIn->x,
3982 pIn->y,
3983 pIn->slice,
3984 pIn->sample);
3985
3986 pOut->addr = (localOut.sliceSize * pIn->slice) +
3987 (blkIdx << blkSizeLog2) +
3988 (blkOffset ^ pipeBankXor);
3989 }
3990 else
3991 {
3992 ret = ADDR_INVALIDPARAMS;
3993 }
3994 }
3995 else
3996 {
3997 const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
3998 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3999 const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4000
4001 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4002 {
4003 const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4004 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
4005 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4006 const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4007 const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4008 const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4009 const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4010 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4011 const UINT_32 yb = pIn->y / localOut.blockHeight;
4012 const UINT_32 xb = pIn->x / localOut.blockWidth;
4013 const UINT_64 blkIdx = yb * pb + xb;
4014 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4015 x << elemLog2,
4016 y,
4017 z);
4018 pOut->addr = sliceSize * sliceId +
4019 mipInfo[pIn->mipId].macroBlockOffset +
4020 (blkIdx << blkSizeLog2) +
4021 (blkOffset ^ pipeBankXor);
4022 }
4023 else
4024 {
4025 ret = ADDR_INVALIDPARAMS;
4026 }
4027 }
4028 }
4029
4030 return ret;
4031 }
4032
4033 /**
4034 ************************************************************************************************************************
4035 * Gfx10Lib::HwlComputeMaxBaseAlignments
4036 *
4037 * @brief
4038 * Gets maximum alignments
4039 * @return
4040 * maximum alignments
4041 ************************************************************************************************************************
4042 */
4043 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4044 {
4045 return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4046 }
4047
4048 /**
4049 ************************************************************************************************************************
4050 * Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4051 *
4052 * @brief
4053 * Gets maximum alignments for metadata
4054 * @return
4055 * maximum alignments for metadata
4056 ************************************************************************************************************************
4057 */
4058 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4059 {
4060 Dim3d metaBlk;
4061
4062 const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4063 {
4064 ADDR_SW_64KB_Z_X,
4065 m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4066 };
4067
4068 UINT_32 maxBaseAlignHtile = 0;
4069 UINT_32 maxBaseAlignCmask = 0;
4070
4071 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4072 {
4073 for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4074 {
4075 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4076 {
4077 // Max base alignment for Htile
4078 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4079 ADDR_RSRC_TEX_2D,
4080 ValidSwizzleModeForXmask[swIdx],
4081 bppLog2,
4082 numFragLog2,
4083 TRUE,
4084 &metaBlk);
4085
4086 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4087 }
4088 }
4089
4090 // Max base alignment for Cmask
4091 const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4092 ADDR_RSRC_TEX_2D,
4093 ValidSwizzleModeForXmask[swIdx],
4094 0,
4095 0,
4096 TRUE,
4097 &metaBlk);
4098
4099 maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4100 }
4101
4102 // Max base alignment for 2D Dcc
4103 const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4104 {
4105 ADDR_SW_64KB_S_X,
4106 ADDR_SW_64KB_D_X,
4107 ADDR_SW_64KB_R_X,
4108 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4109 };
4110
4111 UINT_32 maxBaseAlignDcc2D = 0;
4112
4113 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4114 {
4115 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4116 {
4117 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4118 {
4119 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4120 ADDR_RSRC_TEX_2D,
4121 ValidSwizzleModeForDcc2D[swIdx],
4122 bppLog2,
4123 numFragLog2,
4124 TRUE,
4125 &metaBlk);
4126
4127 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4128 }
4129 }
4130 }
4131
4132 // Max base alignment for 3D Dcc
4133 const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4134 {
4135 ADDR_SW_64KB_Z_X,
4136 ADDR_SW_64KB_S_X,
4137 ADDR_SW_64KB_D_X,
4138 ADDR_SW_64KB_R_X,
4139 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4140 };
4141
4142 UINT_32 maxBaseAlignDcc3D = 0;
4143
4144 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4145 {
4146 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4147 {
4148 const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4149 ADDR_RSRC_TEX_3D,
4150 ValidSwizzleModeForDcc3D[swIdx],
4151 bppLog2,
4152 0,
4153 TRUE,
4154 &metaBlk);
4155
4156 maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4157 }
4158 }
4159
4160 return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4161 }
4162
4163 /**
4164 ************************************************************************************************************************
4165 * Gfx10Lib::GetMetaElementSizeLog2
4166 *
4167 * @brief
4168 * Gets meta data element size log2
4169 * @return
4170 * Meta data element size log2
4171 ************************************************************************************************************************
4172 */
4173 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4174 Gfx10DataType dataType) ///< Data surface type
4175 {
4176 INT_32 elemSizeLog2 = 0;
4177
4178 if (dataType == Gfx10DataColor)
4179 {
4180 elemSizeLog2 = 0;
4181 }
4182 else if (dataType == Gfx10DataDepthStencil)
4183 {
4184 elemSizeLog2 = 2;
4185 }
4186 else
4187 {
4188 ADDR_ASSERT(dataType == Gfx10DataFmask);
4189 elemSizeLog2 = -1;
4190 }
4191
4192 return elemSizeLog2;
4193 }
4194
4195 /**
4196 ************************************************************************************************************************
4197 * Gfx10Lib::GetMetaCacheSizeLog2
4198 *
4199 * @brief
4200 * Gets meta data cache line size log2
4201 * @return
4202 * Meta data cache line size log2
4203 ************************************************************************************************************************
4204 */
4205 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4206 Gfx10DataType dataType) ///< Data surface type
4207 {
4208 INT_32 cacheSizeLog2 = 0;
4209
4210 if (dataType == Gfx10DataColor)
4211 {
4212 cacheSizeLog2 = 6;
4213 }
4214 else if (dataType == Gfx10DataDepthStencil)
4215 {
4216 cacheSizeLog2 = 8;
4217 }
4218 else
4219 {
4220 ADDR_ASSERT(dataType == Gfx10DataFmask);
4221 cacheSizeLog2 = 8;
4222 }
4223 return cacheSizeLog2;
4224 }
4225
4226 /**
4227 ************************************************************************************************************************
4228 * Gfx10Lib::HwlComputeSurfaceInfoLinear
4229 *
4230 * @brief
4231 * Internal function to calculate alignment for linear surface
4232 *
4233 * @return
4234 * ADDR_E_RETURNCODE
4235 ************************************************************************************************************************
4236 */
4237 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4238 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4239 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4240 ) const
4241 {
4242 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4243
4244 if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4245 {
4246 returnCode = ADDR_INVALIDPARAMS;
4247 }
4248 else
4249 {
4250 const UINT_32 elementBytes = pIn->bpp >> 3;
4251 const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4252 const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4253 UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);
4254 UINT_32 actualHeight = pIn->height;
4255 UINT_64 sliceSize = 0;
4256
4257 if (pIn->numMipLevels > 1)
4258 {
4259 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4260 {
4261 UINT_32 mipWidth, mipHeight;
4262
4263 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4264
4265 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4266
4267 if (pOut->pMipInfo != NULL)
4268 {
4269 pOut->pMipInfo[i].pitch = mipActualWidth;
4270 pOut->pMipInfo[i].height = mipHeight;
4271 pOut->pMipInfo[i].depth = mipDepth;
4272 pOut->pMipInfo[i].offset = sliceSize;
4273 pOut->pMipInfo[i].mipTailOffset = 0;
4274 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4275 }
4276
4277 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4278 }
4279 }
4280 else
4281 {
4282 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4283
4284 if (returnCode == ADDR_OK)
4285 {
4286 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4287
4288 if (pOut->pMipInfo != NULL)
4289 {
4290 pOut->pMipInfo[0].pitch = pitch;
4291 pOut->pMipInfo[0].height = actualHeight;
4292 pOut->pMipInfo[0].depth = mipDepth;
4293 pOut->pMipInfo[0].offset = 0;
4294 pOut->pMipInfo[0].mipTailOffset = 0;
4295 pOut->pMipInfo[0].macroBlockOffset = 0;
4296 }
4297 }
4298 }
4299
4300 if (returnCode == ADDR_OK)
4301 {
4302 pOut->pitch = pitch;
4303 pOut->height = actualHeight;
4304 pOut->numSlices = pIn->numSlices;
4305 pOut->sliceSize = sliceSize;
4306 pOut->surfSize = sliceSize * pOut->numSlices;
4307 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4308 pOut->blockWidth = pitchAlign;
4309 pOut->blockHeight = 1;
4310 pOut->blockSlices = 1;
4311
4312 // Following members are useless on GFX10
4313 pOut->mipChainPitch = 0;
4314 pOut->mipChainHeight = 0;
4315 pOut->mipChainSlice = 0;
4316 pOut->epitchIsHeight = FALSE;
4317
4318 // Post calculation validate
4319 ADDR_ASSERT(pOut->sliceSize > 0);
4320 }
4321 }
4322
4323 return returnCode;
4324 }
4325
4326 } // V2
4327 } // Addr