amd/addrlib: Clean up unused colorFlags argument
[mesa.git] / src / amd / addrlib / src / gfx10 / gfx10addrlib.cpp
1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx10addrlib.cpp
30 * @brief Contain the implementation for the Gfx10Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx10addrlib.h"
35 #include "gfx10_gb_reg.h"
36
37 #include "amdgpu_asic_addr.h"
38
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
41
42 namespace Addr
43 {
44 /**
45 ************************************************************************************************************************
46 * Gfx10HwlInit
47 *
48 * @brief
49 * Creates an Gfx10Lib object.
50 *
51 * @return
52 * Returns an Gfx10Lib object pointer.
53 ************************************************************************************************************************
54 */
55 Addr::Lib* Gfx10HwlInit(const Client* pClient)
56 {
57 return V2::Gfx10Lib::CreateObj(pClient);
58 }
59
60 namespace V2
61 {
62
63 ////////////////////////////////////////////////////////////////////////////////////////////////////
64 // Static Const Member
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66
67 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
68 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
69 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
70 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
71 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
72 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
73
74 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
75 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
76 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
77 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
78
79 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
80 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
81 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
82 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
83
84 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
85 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
86 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
87 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
88
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
91 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
92 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
93
94 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
95 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_X
96 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_X
97 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
98
99 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
100 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
101 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
102 {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_64KB_R_X
103
104 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_Z_X
105 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
106 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
107 {0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_VAR_R_X
108 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
109 };
110
111 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
112
113 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114 const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
115
116 /**
117 ************************************************************************************************************************
118 * Gfx10Lib::Gfx10Lib
119 *
120 * @brief
121 * Constructor
122 *
123 ************************************************************************************************************************
124 */
125 Gfx10Lib::Gfx10Lib(const Client* pClient)
126 :
127 Lib(pClient),
128 m_colorBaseIndex(0),
129 m_xmaskBaseIndex(0),
130 m_dccBaseIndex(0)
131 {
132 m_class = AI_ADDRLIB;
133 memset(&m_settings, 0, sizeof(m_settings));
134 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
135 }
136
137 /**
138 ************************************************************************************************************************
139 * Gfx10Lib::~Gfx10Lib
140 *
141 * @brief
142 * Destructor
143 ************************************************************************************************************************
144 */
145 Gfx10Lib::~Gfx10Lib()
146 {
147 }
148
149 /**
150 ************************************************************************************************************************
151 * Gfx10Lib::HwlComputeHtileInfo
152 *
153 * @brief
154 * Interface function stub of AddrComputeHtilenfo
155 *
156 * @return
157 * ADDR_E_RETURNCODE
158 ************************************************************************************************************************
159 */
160 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
161 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
162 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
163 ) const
164 {
165 ADDR_E_RETURNCODE ret = ADDR_OK;
166
167 if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
168 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
169 (pIn->hTileFlags.pipeAligned != TRUE))
170 {
171 ret = ADDR_INVALIDPARAMS;
172 }
173 else
174 {
175 Dim3d metaBlk = {0};
176 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
177 ADDR_RSRC_TEX_2D,
178 pIn->swizzleMode,
179 0,
180 0,
181 TRUE,
182 &metaBlk);
183
184 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
185 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
186 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
187 pOut->metaBlkWidth = metaBlk.w;
188 pOut->metaBlkHeight = metaBlk.h;
189
190 if (pIn->numMipLevels > 1)
191 {
192 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
193
194 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
195
196 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
197 {
198 UINT_32 mipWidth, mipHeight;
199
200 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
201
202 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
203 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
204
205 const UINT_32 pitchInM = mipWidth / metaBlk.w;
206 const UINT_32 heightInM = mipHeight / metaBlk.h;
207 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
208
209 if (pOut->pMipInfo != NULL)
210 {
211 pOut->pMipInfo[i].inMiptail = FALSE;
212 pOut->pMipInfo[i].offset = offset;
213 pOut->pMipInfo[i].sliceSize = mipSliceSize;
214 }
215
216 offset += mipSliceSize;
217 }
218
219 pOut->sliceSize = offset;
220 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
221 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
222
223 if (pOut->pMipInfo != NULL)
224 {
225 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
226 {
227 pOut->pMipInfo[i].inMiptail = TRUE;
228 pOut->pMipInfo[i].offset = 0;
229 pOut->pMipInfo[i].sliceSize = 0;
230 }
231
232 if (pIn->firstMipIdInTail != pIn->numMipLevels)
233 {
234 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
235 }
236 }
237 }
238 else
239 {
240 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
241 const UINT_32 heightInM = pOut->height / metaBlk.h;
242
243 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
244 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
245 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
246
247 if (pOut->pMipInfo != NULL)
248 {
249 pOut->pMipInfo[0].inMiptail = FALSE;
250 pOut->pMipInfo[0].offset = 0;
251 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
252 }
253 }
254 }
255
256 return ret;
257 }
258
259 /**
260 ************************************************************************************************************************
261 * Gfx10Lib::HwlComputeCmaskInfo
262 *
263 * @brief
264 * Interface function stub of AddrComputeCmaskInfo
265 *
266 * @return
267 * ADDR_E_RETURNCODE
268 ************************************************************************************************************************
269 */
270 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
271 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
272 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
273 ) const
274 {
275 ADDR_E_RETURNCODE ret = ADDR_OK;
276
277 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
278 (pIn->cMaskFlags.pipeAligned != TRUE) ||
279 ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
280 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
281 {
282 ret = ADDR_INVALIDPARAMS;
283 }
284 else
285 {
286 Dim3d metaBlk = {0};
287 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
288 ADDR_RSRC_TEX_2D,
289 pIn->swizzleMode,
290 0,
291 0,
292 TRUE,
293 &metaBlk);
294
295 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
296 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
297 pOut->baseAlign = metaBlkSize;
298 pOut->metaBlkWidth = metaBlk.w;
299 pOut->metaBlkHeight = metaBlk.h;
300
301 if (pIn->numMipLevels > 1)
302 {
303 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
304
305 UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
306
307 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
308 {
309 UINT_32 mipWidth, mipHeight;
310
311 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
312
313 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
314 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
315
316 const UINT_32 pitchInM = mipWidth / metaBlk.w;
317 const UINT_32 heightInM = mipHeight / metaBlk.h;
318
319 if (pOut->pMipInfo != NULL)
320 {
321 pOut->pMipInfo[i].inMiptail = FALSE;
322 pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize;
323 pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
324 }
325
326 metaBlkPerSlice += pitchInM * heightInM;
327 }
328
329 pOut->metaBlkNumPerSlice = metaBlkPerSlice;
330
331 if (pOut->pMipInfo != NULL)
332 {
333 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
334 {
335 pOut->pMipInfo[i].inMiptail = TRUE;
336 pOut->pMipInfo[i].offset = 0;
337 pOut->pMipInfo[i].sliceSize = 0;
338 }
339
340 if (pIn->firstMipIdInTail != pIn->numMipLevels)
341 {
342 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
343 }
344 }
345 }
346 else
347 {
348 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
349 const UINT_32 heightInM = pOut->height / metaBlk.h;
350
351 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
352
353 if (pOut->pMipInfo != NULL)
354 {
355 pOut->pMipInfo[0].inMiptail = FALSE;
356 pOut->pMipInfo[0].offset = 0;
357 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
358 }
359 }
360
361 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
362 pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
363 }
364
365 return ret;
366 }
367
368 /**
369 ************************************************************************************************************************
370 * Gfx10Lib::HwlComputeDccInfo
371 *
372 * @brief
373 * Interface function to compute DCC key info
374 *
375 * @return
376 * ADDR_E_RETURNCODE
377 ************************************************************************************************************************
378 */
379 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
380 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
381 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
382 ) const
383 {
384 ADDR_E_RETURNCODE ret = ADDR_OK;
385
386 if (pIn->swizzleMode != ADDR_SW_64KB_Z_X && pIn->swizzleMode != ADDR_SW_64KB_R_X)
387 {
388 // Hardware does not support DCC for this swizzle mode.
389 ret = ADDR_INVALIDPARAMS;
390 }
391 else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
392 {
393 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
394 ret = ADDR_INVALIDPARAMS;
395 }
396 else
397 {
398 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
399 ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
400
401 Dim3d metaBlk = {0};
402 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
403 const UINT_32 numFragLog2 = Log2(pIn->numFrags);
404 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
405 pIn->resourceType,
406 pIn->swizzleMode,
407 elemLog2,
408 numFragLog2,
409 pIn->dccKeyFlags.pipeAligned,
410 &metaBlk);
411 const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
412
413 pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
414 pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
415 pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;
416
417 pOut->dccRamBaseAlign = metaBlkSize;
418 pOut->metaBlkWidth = metaBlk.w;
419 pOut->metaBlkHeight = metaBlk.h;
420 pOut->metaBlkDepth = metaBlk.d;
421
422 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
423 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
424 pOut->depth = PowTwoAlign(pIn->numSlices, metaBlk.d);
425
426 if (pIn->numMipLevels > 1)
427 {
428 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
429
430 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
431
432 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
433 {
434 UINT_32 mipWidth, mipHeight;
435
436 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
437
438 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
439 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
440
441 const UINT_32 pitchInM = mipWidth / metaBlk.w;
442 const UINT_32 heightInM = mipHeight / metaBlk.h;
443 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
444
445 if (pOut->pMipInfo != NULL)
446 {
447 pOut->pMipInfo[i].inMiptail = FALSE;
448 pOut->pMipInfo[i].offset = offset;
449 pOut->pMipInfo[i].sliceSize = mipSliceSize;
450 }
451
452 offset += mipSliceSize;
453 }
454
455 pOut->dccRamSliceSize = offset;
456 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
457 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
458
459 if (pOut->pMipInfo != NULL)
460 {
461 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
462 {
463 pOut->pMipInfo[i].inMiptail = TRUE;
464 pOut->pMipInfo[i].offset = 0;
465 pOut->pMipInfo[i].sliceSize = 0;
466 }
467
468 if (pIn->firstMipIdInTail != pIn->numMipLevels)
469 {
470 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
471 }
472 }
473 }
474 else
475 {
476 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
477 const UINT_32 heightInM = pOut->height / metaBlk.h;
478
479 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
480 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
481 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
482
483 if (pOut->pMipInfo != NULL)
484 {
485 pOut->pMipInfo[0].inMiptail = FALSE;
486 pOut->pMipInfo[0].offset = 0;
487 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
488 }
489 }
490 }
491
492 return ret;
493 }
494
495 /**
496 ************************************************************************************************************************
497 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
498 *
499 * @brief
500 * Interface function stub of AddrComputeCmaskAddrFromCoord
501 *
502 * @return
503 * ADDR_E_RETURNCODE
504 ************************************************************************************************************************
505 */
506 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
507 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
508 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
509 {
510 // Only support pipe aligned CMask
511 ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
512
513 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
514 input.size = sizeof(input);
515 input.cMaskFlags = pIn->cMaskFlags;
516 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
517 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
518 input.numSlices = Max(pIn->numSlices, 1u);
519 input.swizzleMode = pIn->swizzleMode;
520 input.resourceType = pIn->resourceType;
521
522 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
523 output.size = sizeof(output);
524
525 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
526
527 if (returnCode == ADDR_OK)
528 {
529 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
530 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
531 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
532 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
533 const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? CMASK_VAR_RBPLUS_PATIDX :
534 (m_settings.supportRbPlus ? CMASK_64K_RBPLUS_PATIDX : CMASK_64K_PATIDX);
535
536 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
537 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
538 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(CMASK_SW_PATTERN[patIdxTable[index]],
539 blkSizeLog2 + 1, // +1 for nibble offset
540 pIn->x,
541 pIn->y,
542 pIn->slice,
543 0);
544 const UINT_32 xb = pIn->x / output.metaBlkWidth;
545 const UINT_32 yb = pIn->y / output.metaBlkHeight;
546 const UINT_32 pb = output.pitch / output.metaBlkWidth;
547 const UINT_32 blkIndex = (yb * pb) + xb;
548 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
549
550 pOut->addr = (output.sliceSize * pIn->slice) +
551 (blkIndex * (1 << blkSizeLog2)) +
552 ((blkOffset >> 1) ^ pipeXor);
553 pOut->bitPosition = (blkOffset & 1) << 2;
554 }
555
556 return returnCode;
557 }
558
559 /**
560 ************************************************************************************************************************
561 * Gfx10Lib::HwlComputeHtileAddrFromCoord
562 *
563 * @brief
564 * Interface function stub of AddrComputeHtileAddrFromCoord
565 *
566 * @return
567 * ADDR_E_RETURNCODE
568 ************************************************************************************************************************
569 */
570 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
571 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
572 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
573 {
574 ADDR_E_RETURNCODE returnCode = ADDR_OK;
575
576 if (pIn->numMipLevels > 1)
577 {
578 returnCode = ADDR_NOTIMPLEMENTED;
579 }
580 else
581 {
582 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
583 input.size = sizeof(input);
584 input.hTileFlags = pIn->hTileFlags;
585 input.depthFlags = pIn->depthflags;
586 input.swizzleMode = pIn->swizzleMode;
587 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
588 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
589 input.numSlices = Max(pIn->numSlices, 1u);
590 input.numMipLevels = 1;
591
592 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
593 output.size = sizeof(output);
594
595 returnCode = ComputeHtileInfo(&input, &output);
596
597 if (returnCode == ADDR_OK)
598 {
599 const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
600 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
601 const UINT_32 index = m_xmaskBaseIndex + numSampleLog2;
602 const UINT_8* patIdxTable = m_settings.supportRbPlus ? HTILE_RBPLUS_PATIDX : HTILE_PATIDX;
603
604 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
605 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
606 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(HTILE_SW_PATTERN[patIdxTable[index]],
607 blkSizeLog2 + 1, // +1 for nibble offset
608 pIn->x,
609 pIn->y,
610 pIn->slice,
611 0);
612 const UINT_32 xb = pIn->x / output.metaBlkWidth;
613 const UINT_32 yb = pIn->y / output.metaBlkHeight;
614 const UINT_32 pb = output.pitch / output.metaBlkWidth;
615 const UINT_32 blkIndex = (yb * pb) + xb;
616 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
617
618 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
619 (blkIndex * (1 << blkSizeLog2)) +
620 ((blkOffset >> 1) ^ pipeXor);
621 }
622 }
623
624 return returnCode;
625 }
626
627 /**
628 ************************************************************************************************************************
629 * Gfx10Lib::HwlComputeHtileCoordFromAddr
630 *
631 * @brief
632 * Interface function stub of AddrComputeHtileCoordFromAddr
633 *
634 * @return
635 * ADDR_E_RETURNCODE
636 ************************************************************************************************************************
637 */
638 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
639 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
640 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
641 {
642 ADDR_NOT_IMPLEMENTED();
643
644 return ADDR_OK;
645 }
646
647 /**
648 ************************************************************************************************************************
649 * Gfx10Lib::HwlComputeDccAddrFromCoord
650 *
651 * @brief
652 * Interface function stub of AddrComputeDccAddrFromCoord
653 *
654 * @return
655 * ADDR_E_RETURNCODE
656 ************************************************************************************************************************
657 */
658 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord(
659 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
660 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
661 {
662 ADDR_E_RETURNCODE returnCode = ADDR_OK;
663
664 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
665 (pIn->swizzleMode != ADDR_SW_64KB_R_X) ||
666 (pIn->dccKeyFlags.linear == TRUE) ||
667 (pIn->numFrags > 1) ||
668 (pIn->numMipLevels > 1) ||
669 (pIn->mipId > 0))
670 {
671 returnCode = ADDR_NOTSUPPORTED;
672 }
673 else
674 {
675 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
676 const UINT_32 numPipeLog2 = m_pipesLog2;
677 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
678 UINT_32 index = m_dccBaseIndex + elemLog2;
679 const UINT_8* patIdxTable;
680
681 if (m_settings.supportRbPlus)
682 {
683 patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
684
685 if (pIn->dccKeyFlags.pipeAligned)
686 {
687 index += MaxNumOfBpp;
688
689 if (m_numPkrLog2 < 2)
690 {
691 index += m_pipesLog2 * MaxNumOfBpp;
692 }
693 else
694 {
695 // 4 groups for "m_numPkrLog2 < 2" case
696 index += 4 * MaxNumOfBpp;
697
698 const UINT_32 dccPipePerPkr = 3;
699
700 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
701 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
702 }
703 }
704 }
705 else
706 {
707 patIdxTable = DCC_64K_R_X_PATIDX;
708
709 if (pIn->dccKeyFlags.pipeAligned)
710 {
711 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
712 }
713 else
714 {
715 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
716 }
717 }
718
719 const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
720 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
721 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
722 blkSizeLog2 + 1, // +1 for nibble offset
723 pIn->x,
724 pIn->y,
725 pIn->slice,
726 0);
727 const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
728 const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
729 const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
730 const UINT_32 blkIndex = (yb * pb) + xb;
731 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
732
733 pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
734 (blkIndex * (1 << blkSizeLog2)) +
735 ((blkOffset >> 1) ^ pipeXor);
736 }
737
738 return returnCode;
739 }
740
741 /**
742 ************************************************************************************************************************
743 * Gfx10Lib::HwlInitGlobalParams
744 *
745 * @brief
746 * Initializes global parameters
747 *
748 * @return
749 * TRUE if all settings are valid
750 *
751 ************************************************************************************************************************
752 */
753 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
754 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
755 {
756 BOOL_32 valid = TRUE;
757 GB_ADDR_CONFIG_gfx10 gbAddrConfig;
758
759 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
760
761 // These values are copied from CModel code
762 switch (gbAddrConfig.bits.NUM_PIPES)
763 {
764 case ADDR_CONFIG_1_PIPE:
765 m_pipes = 1;
766 m_pipesLog2 = 0;
767 break;
768 case ADDR_CONFIG_2_PIPE:
769 m_pipes = 2;
770 m_pipesLog2 = 1;
771 break;
772 case ADDR_CONFIG_4_PIPE:
773 m_pipes = 4;
774 m_pipesLog2 = 2;
775 break;
776 case ADDR_CONFIG_8_PIPE:
777 m_pipes = 8;
778 m_pipesLog2 = 3;
779 break;
780 case ADDR_CONFIG_16_PIPE:
781 m_pipes = 16;
782 m_pipesLog2 = 4;
783 break;
784 case ADDR_CONFIG_32_PIPE:
785 m_pipes = 32;
786 m_pipesLog2 = 5;
787 break;
788 case ADDR_CONFIG_64_PIPE:
789 m_pipes = 64;
790 m_pipesLog2 = 6;
791 break;
792 default:
793 ADDR_ASSERT_ALWAYS();
794 valid = FALSE;
795 break;
796 }
797
798 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
799 {
800 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
801 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
802 m_pipeInterleaveLog2 = 8;
803 break;
804 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
805 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
806 m_pipeInterleaveLog2 = 9;
807 break;
808 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
809 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
810 m_pipeInterleaveLog2 = 10;
811 break;
812 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
813 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
814 m_pipeInterleaveLog2 = 11;
815 break;
816 default:
817 ADDR_ASSERT_ALWAYS();
818 valid = FALSE;
819 break;
820 }
821
822 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
823 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
824 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
825 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
826
827 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
828 {
829 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
830 m_maxCompFrag = 1;
831 m_maxCompFragLog2 = 0;
832 break;
833 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
834 m_maxCompFrag = 2;
835 m_maxCompFragLog2 = 1;
836 break;
837 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
838 m_maxCompFrag = 4;
839 m_maxCompFragLog2 = 2;
840 break;
841 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
842 m_maxCompFrag = 8;
843 m_maxCompFragLog2 = 3;
844 break;
845 default:
846 ADDR_ASSERT_ALWAYS();
847 valid = FALSE;
848 break;
849 }
850
851 {
852 // Skip unaligned case
853 m_xmaskBaseIndex += MaxNumOfAA;
854
855 m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA;
856 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
857
858 if (m_settings.supportRbPlus)
859 {
860 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
861 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
862
863 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
864
865 ADDR_C_ASSERT(sizeof(HTILE_RBPLUS_PATIDX) / sizeof(HTILE_RBPLUS_PATIDX[0]) ==
866 sizeof(CMASK_64K_RBPLUS_PATIDX) / sizeof(CMASK_64K_RBPLUS_PATIDX[0]));
867
868 if (m_numPkrLog2 >= 2)
869 {
870 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
871 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
872 }
873 }
874 else
875 {
876 const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
877 static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) +
878 1;
879
880 ADDR_C_ASSERT(sizeof(HTILE_PATIDX) / sizeof(HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
881
882 ADDR_C_ASSERT(sizeof(HTILE_PATIDX) / sizeof(HTILE_PATIDX[0]) ==
883 sizeof(CMASK_64K_PATIDX) / sizeof(CMASK_64K_PATIDX[0]));
884 }
885 }
886
887 if (m_settings.supportRbPlus)
888 {
889 // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
890 // corresponding SW_64KB_* mode
891 m_blockVarSizeLog2 = m_pipesLog2 + 14;
892 }
893
894 if (valid)
895 {
896 InitEquationTable();
897 }
898
899 return valid;
900 }
901
902 /**
903 ************************************************************************************************************************
904 * Gfx10Lib::HwlConvertChipFamily
905 *
906 * @brief
907 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
908 * @return
909 * ChipFamily
910 ************************************************************************************************************************
911 */
912 ChipFamily Gfx10Lib::HwlConvertChipFamily(
913 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
914 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
915 {
916 ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
917
918 m_settings.dccUnsup3DSwDis = 1;
919
920 switch (chipFamily)
921 {
922 case FAMILY_NV:
923 m_settings.isDcn2 = 1;
924
925 if (ASICREV_IS_SIENNA_M(chipRevision))
926 {
927 m_settings.supportRbPlus = 1;
928 m_settings.dccUnsup3DSwDis = 0;
929 }
930 break;
931 default:
932 ADDR_ASSERT(!"Unknown chip family");
933 break;
934 }
935
936 m_settings.dsMipmapHtileFix = 1;
937
938 if (ASICREV_IS_NAVI10_P(chipRevision))
939 {
940 m_settings.dsMipmapHtileFix = 0;
941 }
942
943 m_configFlags.use32bppFor422Fmt = TRUE;
944
945 return family;
946 }
947
948 /**
949 ************************************************************************************************************************
950 * Gfx10Lib::GetBlk256SizeLog2
951 *
952 * @brief
953 * Get block 256 size
954 *
955 * @return
956 * N/A
957 ************************************************************************************************************************
958 */
959 void Gfx10Lib::GetBlk256SizeLog2(
960 AddrResourceType resourceType, ///< [in] Resource type
961 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
962 UINT_32 elemLog2, ///< [in] element size log2
963 UINT_32 numSamplesLog2, ///< [in] number of samples
964 Dim3d* pBlock ///< [out] block size
965 ) const
966 {
967 if (IsThin(resourceType, swizzleMode))
968 {
969 UINT_32 blockBits = 8 - elemLog2;
970
971 if (IsZOrderSwizzle(swizzleMode))
972 {
973 blockBits -= numSamplesLog2;
974 }
975
976 pBlock->w = (blockBits >> 1) + (blockBits & 1);
977 pBlock->h = (blockBits >> 1);
978 pBlock->d = 0;
979 }
980 else
981 {
982 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
983
984 UINT_32 blockBits = 8 - elemLog2;
985
986 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
987 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
988 pBlock->h = (blockBits / 3);
989 }
990 }
991
992 /**
993 ************************************************************************************************************************
994 * Gfx10Lib::GetCompressedBlockSizeLog2
995 *
996 * @brief
997 * Get compress block size
998 *
999 * @return
1000 * N/A
1001 ************************************************************************************************************************
1002 */
1003 void Gfx10Lib::GetCompressedBlockSizeLog2(
1004 Gfx10DataType dataType, ///< [in] Data type
1005 AddrResourceType resourceType, ///< [in] Resource type
1006 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1007 UINT_32 elemLog2, ///< [in] element size log2
1008 UINT_32 numSamplesLog2, ///< [in] number of samples
1009 Dim3d* pBlock ///< [out] block size
1010 ) const
1011 {
1012 if (dataType == Gfx10DataColor)
1013 {
1014 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1015 }
1016 else
1017 {
1018 ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1019 pBlock->w = 3;
1020 pBlock->h = 3;
1021 pBlock->d = 0;
1022 }
1023 }
1024
1025 /**
1026 ************************************************************************************************************************
1027 * Gfx10Lib::GetMetaOverlapLog2
1028 *
1029 * @brief
1030 * Get meta block overlap
1031 *
1032 * @return
1033 * N/A
1034 ************************************************************************************************************************
1035 */
1036 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1037 Gfx10DataType dataType, ///< [in] Data type
1038 AddrResourceType resourceType, ///< [in] Resource type
1039 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1040 UINT_32 elemLog2, ///< [in] element size log2
1041 UINT_32 numSamplesLog2 ///< [in] number of samples
1042 ) const
1043 {
1044 Dim3d compBlock;
1045 Dim3d microBlock;
1046
1047 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1048 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
1049
1050 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
1051 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1052 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
1053 const INT_32 numPipesLog2 = GetEffectiveNumPipes();
1054 INT_32 overlap = numPipesLog2 - maxSizeLog2;
1055
1056 if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1057 {
1058 overlap++;
1059 }
1060
1061 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1062 if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1063 {
1064 overlap--;
1065 }
1066 overlap = Max(overlap, 0);
1067 return overlap;
1068 }
1069
1070 /**
1071 ************************************************************************************************************************
1072 * Gfx10Lib::Get3DMetaOverlapLog2
1073 *
1074 * @brief
1075 * Get 3d meta block overlap
1076 *
1077 * @return
1078 * N/A
1079 ************************************************************************************************************************
1080 */
1081 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1082 AddrResourceType resourceType, ///< [in] Resource type
1083 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1084 UINT_32 elemLog2 ///< [in] element size log2
1085 ) const
1086 {
1087 Dim3d microBlock;
1088 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
1089
1090 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1091
1092 if (m_settings.supportRbPlus)
1093 {
1094 overlap++;
1095 }
1096
1097 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1098 {
1099 overlap = 0;
1100 }
1101 return overlap;
1102 }
1103
1104 /**
1105 ************************************************************************************************************************
1106 * Gfx10Lib::GetPipeRotateAmount
1107 *
1108 * @brief
1109 * Get pipe rotate amount
1110 *
1111 * @return
1112 * Pipe rotate amount
1113 ************************************************************************************************************************
1114 */
1115
1116 INT_32 Gfx10Lib::GetPipeRotateAmount(
1117 AddrResourceType resourceType, ///< [in] Resource type
1118 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
1119 ) const
1120 {
1121 INT_32 amount = 0;
1122
1123 if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1124 {
1125 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1126 1 : m_pipesLog2 - (m_numSaLog2 + 1);
1127 }
1128
1129 return amount;
1130 }
1131
1132 /**
1133 ************************************************************************************************************************
1134 * Gfx10Lib::GetMetaBlkSize
1135 *
1136 * @brief
1137 * Get metadata block size
1138 *
1139 * @return
1140 * Meta block size
1141 ************************************************************************************************************************
1142 */
1143 UINT_32 Gfx10Lib::GetMetaBlkSize(
1144 Gfx10DataType dataType, ///< [in] Data type
1145 AddrResourceType resourceType, ///< [in] Resource type
1146 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1147 UINT_32 elemLog2, ///< [in] element size log2
1148 UINT_32 numSamplesLog2, ///< [in] number of samples
1149 BOOL_32 pipeAlign, ///< [in] pipe align
1150 Dim3d* pBlock ///< [out] block size
1151 ) const
1152 {
1153 INT_32 metablkSizeLog2;
1154 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
1155 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
1156 const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1157 const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1158 numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1159 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
1160 INT_32 numPipesLog2 = m_pipesLog2;
1161
1162 if (IsThin(resourceType, swizzleMode))
1163 {
1164 if ((pipeAlign == FALSE) ||
1165 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1166 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
1167 {
1168 if (pipeAlign)
1169 {
1170 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1171 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1172 }
1173 else
1174 {
1175 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1176 }
1177 }
1178 else
1179 {
1180 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1181 {
1182 numPipesLog2++;
1183 }
1184
1185 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1186
1187 if (numPipesLog2 >= 4)
1188 {
1189 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1190
1191 // In 16Bpe 8xaa, we have an extra overlap bit
1192 if ((pipeRotateLog2 > 0) &&
1193 (elemLog2 == 4) &&
1194 (numSamplesLog2 == 3) &&
1195 (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1196 {
1197 overlapLog2++;
1198 }
1199
1200 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1201 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1202
1203 if (m_settings.supportRbPlus &&
1204 IsRtOptSwizzle(swizzleMode) &&
1205 (numPipesLog2 == 6) &&
1206 (numSamplesLog2 == 3) &&
1207 (m_maxCompFragLog2 == 3) &&
1208 (metablkSizeLog2 < 15))
1209 {
1210 metablkSizeLog2 = 15;
1211 }
1212 }
1213 else
1214 {
1215 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1216 }
1217
1218 if (dataType == Gfx10DataDepthStencil)
1219 {
1220 // For htile surfaces, pad meta block size to 2K * num_pipes
1221 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1222 }
1223
1224 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1225
1226 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1227 {
1228 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1229
1230 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1231 }
1232 }
1233
1234 const INT_32 metablkBitsLog2 =
1235 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1236 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1237 pBlock->h = 1 << (metablkBitsLog2 >> 1);
1238 pBlock->d = 1;
1239 }
1240 else
1241 {
1242 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1243
1244 if (pipeAlign)
1245 {
1246 if (m_settings.supportRbPlus &&
1247 (m_pipesLog2 == m_numSaLog2 + 1) &&
1248 (m_pipesLog2 > 1) &&
1249 IsRbAligned(resourceType, swizzleMode))
1250 {
1251 numPipesLog2++;
1252 }
1253
1254 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1255
1256 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1257 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1258 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1259 }
1260 else
1261 {
1262 metablkSizeLog2 = 12;
1263 }
1264
1265 const INT_32 metablkBitsLog2 =
1266 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1267 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1268 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1269 pBlock->d = 1 << (metablkBitsLog2 / 3);
1270 }
1271
1272 return (1 << static_cast<UINT_32>(metablkSizeLog2));
1273 }
1274
1275 /**
1276 ************************************************************************************************************************
1277 * Gfx10Lib::ConvertSwizzlePatternToEquation
1278 *
1279 * @brief
1280 * Convert swizzle pattern to equation.
1281 *
1282 * @return
1283 * N/A
1284 ************************************************************************************************************************
1285 */
1286 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1287 UINT_32 elemLog2, ///< [in] element bytes log2
1288 AddrResourceType rsrcType, ///< [in] resource type
1289 AddrSwizzleMode swMode, ///< [in] swizzle mode
1290 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor
1291 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1292 const
1293 {
1294 ADDR_BIT_SETTING fullSwizzlePattern[20];
1295 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1296
1297 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
1298 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1299
1300 pEquation->numBits = blockSizeLog2;
1301 pEquation->stackedDepthSlices = FALSE;
1302
1303 for (UINT_32 i = 0; i < elemLog2; i++)
1304 {
1305 pEquation->addr[i].channel = 0;
1306 pEquation->addr[i].valid = 1;
1307 pEquation->addr[i].index = i;
1308 }
1309
1310 if (IsXor(swMode) == FALSE)
1311 {
1312 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1313 {
1314 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1315
1316 if (pSwizzle[i].x != 0)
1317 {
1318 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1319
1320 pEquation->addr[i].channel = 0;
1321 pEquation->addr[i].valid = 1;
1322 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1323 }
1324 else if (pSwizzle[i].y != 0)
1325 {
1326 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1327
1328 pEquation->addr[i].channel = 1;
1329 pEquation->addr[i].valid = 1;
1330 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1331 }
1332 else
1333 {
1334 ADDR_ASSERT(pSwizzle[i].z != 0);
1335 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1336
1337 pEquation->addr[i].channel = 2;
1338 pEquation->addr[i].valid = 1;
1339 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1340 }
1341
1342 pEquation->xor1[i].value = 0;
1343 pEquation->xor2[i].value = 0;
1344 }
1345 }
1346 else if (IsThin(rsrcType, swMode))
1347 {
1348 Dim3d dim;
1349 ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1350
1351 const UINT_32 blkXLog2 = Log2(dim.w);
1352 const UINT_32 blkYLog2 = Log2(dim.h);
1353 const UINT_32 blkXMask = dim.w - 1;
1354 const UINT_32 blkYMask = dim.h - 1;
1355
1356 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1357 UINT_32 xMask = 0;
1358 UINT_32 yMask = 0;
1359 UINT_32 bMask = (1 << elemLog2) - 1;
1360
1361 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1362 {
1363 if (IsPow2(pSwizzle[i].value))
1364 {
1365 if (pSwizzle[i].x != 0)
1366 {
1367 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1368 xMask |= pSwizzle[i].x;
1369
1370 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1371
1372 ADDR_ASSERT(xLog2 < blkXLog2);
1373
1374 pEquation->addr[i].channel = 0;
1375 pEquation->addr[i].valid = 1;
1376 pEquation->addr[i].index = xLog2 + elemLog2;
1377 }
1378 else
1379 {
1380 ADDR_ASSERT(pSwizzle[i].y != 0);
1381 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1382 yMask |= pSwizzle[i].y;
1383
1384 pEquation->addr[i].channel = 1;
1385 pEquation->addr[i].valid = 1;
1386 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1387
1388 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1389 }
1390
1391 swizzle[i].value = 0;
1392 bMask |= 1 << i;
1393 }
1394 else
1395 {
1396 if (pSwizzle[i].z != 0)
1397 {
1398 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1399
1400 pEquation->xor2[i].channel = 2;
1401 pEquation->xor2[i].valid = 1;
1402 pEquation->xor2[i].index = Log2(pSwizzle[i].z);
1403 }
1404
1405 swizzle[i].x = pSwizzle[i].x;
1406 swizzle[i].y = pSwizzle[i].y;
1407 swizzle[i].z = swizzle[i].s = 0;
1408
1409 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1410
1411 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1412
1413 if (xHi != 0)
1414 {
1415 ADDR_ASSERT(IsPow2(xHi));
1416 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1417
1418 pEquation->xor1[i].channel = 0;
1419 pEquation->xor1[i].valid = 1;
1420 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1421
1422 swizzle[i].x &= blkXMask;
1423 }
1424
1425 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1426
1427 if (yHi != 0)
1428 {
1429 ADDR_ASSERT(IsPow2(yHi));
1430
1431 if (xHi == 0)
1432 {
1433 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1434 pEquation->xor1[i].channel = 1;
1435 pEquation->xor1[i].valid = 1;
1436 pEquation->xor1[i].index = Log2(yHi);
1437 }
1438 else
1439 {
1440 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1441 pEquation->xor2[i].channel = 1;
1442 pEquation->xor2[i].valid = 1;
1443 pEquation->xor2[i].index = Log2(yHi);
1444 }
1445
1446 swizzle[i].y &= blkYMask;
1447 }
1448
1449 if (swizzle[i].value == 0)
1450 {
1451 bMask |= 1 << i;
1452 }
1453 }
1454 }
1455
1456 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1457 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1458
1459 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1460
1461 while (bMask != blockMask)
1462 {
1463 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1464 {
1465 if ((bMask & (1 << i)) == 0)
1466 {
1467 if (IsPow2(swizzle[i].value))
1468 {
1469 if (swizzle[i].x != 0)
1470 {
1471 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1472 xMask |= swizzle[i].x;
1473
1474 const UINT_32 xLog2 = Log2(swizzle[i].x);
1475
1476 ADDR_ASSERT(xLog2 < blkXLog2);
1477
1478 pEquation->addr[i].channel = 0;
1479 pEquation->addr[i].valid = 1;
1480 pEquation->addr[i].index = xLog2 + elemLog2;
1481 }
1482 else
1483 {
1484 ADDR_ASSERT(swizzle[i].y != 0);
1485 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1486 yMask |= swizzle[i].y;
1487
1488 pEquation->addr[i].channel = 1;
1489 pEquation->addr[i].valid = 1;
1490 pEquation->addr[i].index = Log2(swizzle[i].y);
1491
1492 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1493 }
1494
1495 swizzle[i].value = 0;
1496 bMask |= 1 << i;
1497 }
1498 else
1499 {
1500 const UINT_32 x = swizzle[i].x & xMask;
1501 const UINT_32 y = swizzle[i].y & yMask;
1502
1503 if (x != 0)
1504 {
1505 ADDR_ASSERT(IsPow2(x));
1506
1507 if (pEquation->xor1[i].value == 0)
1508 {
1509 pEquation->xor1[i].channel = 0;
1510 pEquation->xor1[i].valid = 1;
1511 pEquation->xor1[i].index = Log2(x) + elemLog2;
1512 }
1513 else
1514 {
1515 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1516 pEquation->xor2[i].channel = 0;
1517 pEquation->xor2[i].valid = 1;
1518 pEquation->xor2[i].index = Log2(x) + elemLog2;
1519 }
1520 }
1521
1522 if (y != 0)
1523 {
1524 ADDR_ASSERT(IsPow2(y));
1525
1526 if (pEquation->xor1[i].value == 0)
1527 {
1528 pEquation->xor1[i].channel = 1;
1529 pEquation->xor1[i].valid = 1;
1530 pEquation->xor1[i].index = Log2(y);
1531 }
1532 else
1533 {
1534 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1535 pEquation->xor2[i].channel = 1;
1536 pEquation->xor2[i].valid = 1;
1537 pEquation->xor2[i].index = Log2(y);
1538 }
1539 }
1540
1541 swizzle[i].x &= ~x;
1542 swizzle[i].y &= ~y;
1543 }
1544 }
1545 }
1546 }
1547
1548 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1549 }
1550 else
1551 {
1552 const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1553 const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1554 const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1555 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1556 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1557 const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1558
1559 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1560 UINT_32 xMask = 0;
1561 UINT_32 yMask = 0;
1562 UINT_32 zMask = 0;
1563 UINT_32 bMask = (1 << elemLog2) - 1;
1564
1565 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1566 {
1567 if (IsPow2(pSwizzle[i].value))
1568 {
1569 if (pSwizzle[i].x != 0)
1570 {
1571 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1572 xMask |= pSwizzle[i].x;
1573
1574 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1575
1576 ADDR_ASSERT(xLog2 < blkXLog2);
1577
1578 pEquation->addr[i].channel = 0;
1579 pEquation->addr[i].valid = 1;
1580 pEquation->addr[i].index = xLog2 + elemLog2;
1581 }
1582 else if (pSwizzle[i].y != 0)
1583 {
1584 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1585 yMask |= pSwizzle[i].y;
1586
1587 pEquation->addr[i].channel = 1;
1588 pEquation->addr[i].valid = 1;
1589 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1590
1591 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1592 }
1593 else
1594 {
1595 ADDR_ASSERT(pSwizzle[i].z != 0);
1596 ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1597 zMask |= pSwizzle[i].z;
1598
1599 pEquation->addr[i].channel = 2;
1600 pEquation->addr[i].valid = 1;
1601 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1602
1603 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1604 }
1605
1606 swizzle[i].value = 0;
1607 bMask |= 1 << i;
1608 }
1609 else
1610 {
1611 swizzle[i].x = pSwizzle[i].x;
1612 swizzle[i].y = pSwizzle[i].y;
1613 swizzle[i].z = pSwizzle[i].z;
1614 swizzle[i].s = 0;
1615
1616 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1617
1618 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1619 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1620 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1621
1622 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1623
1624 if (xHi != 0)
1625 {
1626 ADDR_ASSERT(IsPow2(xHi));
1627 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1628
1629 pEquation->xor1[i].channel = 0;
1630 pEquation->xor1[i].valid = 1;
1631 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1632
1633 swizzle[i].x &= blkXMask;
1634 }
1635
1636 if (yHi != 0)
1637 {
1638 ADDR_ASSERT(IsPow2(yHi));
1639
1640 if (pEquation->xor1[i].value == 0)
1641 {
1642 pEquation->xor1[i].channel = 1;
1643 pEquation->xor1[i].valid = 1;
1644 pEquation->xor1[i].index = Log2(yHi);
1645 }
1646 else
1647 {
1648 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1649 pEquation->xor2[i].channel = 1;
1650 pEquation->xor2[i].valid = 1;
1651 pEquation->xor2[i].index = Log2(yHi);
1652 }
1653
1654 swizzle[i].y &= blkYMask;
1655 }
1656
1657 if (zHi != 0)
1658 {
1659 ADDR_ASSERT(IsPow2(zHi));
1660
1661 if (pEquation->xor1[i].value == 0)
1662 {
1663 pEquation->xor1[i].channel = 2;
1664 pEquation->xor1[i].valid = 1;
1665 pEquation->xor1[i].index = Log2(zHi);
1666 }
1667 else
1668 {
1669 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1670 pEquation->xor2[i].channel = 2;
1671 pEquation->xor2[i].valid = 1;
1672 pEquation->xor2[i].index = Log2(zHi);
1673 }
1674
1675 swizzle[i].z &= blkZMask;
1676 }
1677
1678 if (swizzle[i].value == 0)
1679 {
1680 bMask |= 1 << i;
1681 }
1682 }
1683 }
1684
1685 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1686 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1687
1688 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1689
1690 while (bMask != blockMask)
1691 {
1692 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1693 {
1694 if ((bMask & (1 << i)) == 0)
1695 {
1696 if (IsPow2(swizzle[i].value))
1697 {
1698 if (swizzle[i].x != 0)
1699 {
1700 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1701 xMask |= swizzle[i].x;
1702
1703 const UINT_32 xLog2 = Log2(swizzle[i].x);
1704
1705 ADDR_ASSERT(xLog2 < blkXLog2);
1706
1707 pEquation->addr[i].channel = 0;
1708 pEquation->addr[i].valid = 1;
1709 pEquation->addr[i].index = xLog2 + elemLog2;
1710 }
1711 else if (swizzle[i].y != 0)
1712 {
1713 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1714 yMask |= swizzle[i].y;
1715
1716 pEquation->addr[i].channel = 1;
1717 pEquation->addr[i].valid = 1;
1718 pEquation->addr[i].index = Log2(swizzle[i].y);
1719
1720 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1721 }
1722 else
1723 {
1724 ADDR_ASSERT(swizzle[i].z != 0);
1725 ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1726 zMask |= swizzle[i].z;
1727
1728 pEquation->addr[i].channel = 2;
1729 pEquation->addr[i].valid = 1;
1730 pEquation->addr[i].index = Log2(swizzle[i].z);
1731
1732 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1733 }
1734
1735 swizzle[i].value = 0;
1736 bMask |= 1 << i;
1737 }
1738 else
1739 {
1740 const UINT_32 x = swizzle[i].x & xMask;
1741 const UINT_32 y = swizzle[i].y & yMask;
1742 const UINT_32 z = swizzle[i].z & zMask;
1743
1744 if (x != 0)
1745 {
1746 ADDR_ASSERT(IsPow2(x));
1747
1748 if (pEquation->xor1[i].value == 0)
1749 {
1750 pEquation->xor1[i].channel = 0;
1751 pEquation->xor1[i].valid = 1;
1752 pEquation->xor1[i].index = Log2(x) + elemLog2;
1753 }
1754 else
1755 {
1756 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1757 pEquation->xor2[i].channel = 0;
1758 pEquation->xor2[i].valid = 1;
1759 pEquation->xor2[i].index = Log2(x) + elemLog2;
1760 }
1761 }
1762
1763 if (y != 0)
1764 {
1765 ADDR_ASSERT(IsPow2(y));
1766
1767 if (pEquation->xor1[i].value == 0)
1768 {
1769 pEquation->xor1[i].channel = 1;
1770 pEquation->xor1[i].valid = 1;
1771 pEquation->xor1[i].index = Log2(y);
1772 }
1773 else
1774 {
1775 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1776 pEquation->xor2[i].channel = 1;
1777 pEquation->xor2[i].valid = 1;
1778 pEquation->xor2[i].index = Log2(y);
1779 }
1780 }
1781
1782 if (z != 0)
1783 {
1784 ADDR_ASSERT(IsPow2(z));
1785
1786 if (pEquation->xor1[i].value == 0)
1787 {
1788 pEquation->xor1[i].channel = 2;
1789 pEquation->xor1[i].valid = 1;
1790 pEquation->xor1[i].index = Log2(z);
1791 }
1792 else
1793 {
1794 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1795 pEquation->xor2[i].channel = 2;
1796 pEquation->xor2[i].valid = 1;
1797 pEquation->xor2[i].index = Log2(z);
1798 }
1799 }
1800
1801 swizzle[i].x &= ~x;
1802 swizzle[i].y &= ~y;
1803 swizzle[i].z &= ~z;
1804 }
1805 }
1806 }
1807 }
1808
1809 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1810 }
1811 }
1812
1813 /**
1814 ************************************************************************************************************************
1815 * Gfx10Lib::InitEquationTable
1816 *
1817 * @brief
1818 * Initialize Equation table.
1819 *
1820 * @return
1821 * N/A
1822 ************************************************************************************************************************
1823 */
1824 VOID Gfx10Lib::InitEquationTable()
1825 {
1826 memset(m_equationTable, 0, sizeof(m_equationTable));
1827
1828 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1829 {
1830 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1831
1832 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1833 {
1834 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1835
1836 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1837 {
1838 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1839 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1840
1841 if (pPatInfo != NULL)
1842 {
1843 ADDR_ASSERT(IsValidSwMode(swMode));
1844
1845 if (pPatInfo->maxItemCount <= 3)
1846 {
1847 ADDR_EQUATION equation = {};
1848
1849 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
1850
1851 equationIndex = m_numEquations;
1852 ADDR_ASSERT(equationIndex < EquationTableSize);
1853
1854 m_equationTable[equationIndex] = equation;
1855
1856 m_numEquations++;
1857 }
1858 else
1859 {
1860 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
1861 ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
1862 ADDR_ASSERT(rsrcTypeIdx == 1);
1863 ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
1864 ADDR_ASSERT(m_settings.supportRbPlus == 1);
1865 }
1866 }
1867
1868 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1869 }
1870 }
1871 }
1872 }
1873
1874 /**
1875 ************************************************************************************************************************
1876 * Gfx10Lib::HwlGetEquationIndex
1877 *
1878 * @brief
1879 * Interface function stub of GetEquationIndex
1880 *
1881 * @return
1882 * ADDR_E_RETURNCODE
1883 ************************************************************************************************************************
1884 */
1885 UINT_32 Gfx10Lib::HwlGetEquationIndex(
1886 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
1887 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
1888 ) const
1889 {
1890 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1891
1892 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1893 (pIn->resourceType == ADDR_RSRC_TEX_3D))
1894 {
1895 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1896 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
1897 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
1898
1899 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1900 }
1901
1902 if (pOut->pMipInfo != NULL)
1903 {
1904 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1905 {
1906 pOut->pMipInfo[i].equationIndex = equationIdx;
1907 }
1908 }
1909
1910 return equationIdx;
1911 }
1912
1913 /**
1914 ************************************************************************************************************************
1915 * Gfx10Lib::IsValidDisplaySwizzleMode
1916 *
1917 * @brief
1918 * Check if a swizzle mode is supported by display engine
1919 *
1920 * @return
1921 * TRUE is swizzle mode is supported by display engine
1922 ************************************************************************************************************************
1923 */
1924 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
1925 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
1926 ) const
1927 {
1928 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
1929
1930 BOOL_32 support = FALSE;
1931
1932 if (m_settings.isDcn2)
1933 {
1934 switch (pIn->swizzleMode)
1935 {
1936 case ADDR_SW_4KB_D:
1937 case ADDR_SW_4KB_D_X:
1938 case ADDR_SW_64KB_D:
1939 case ADDR_SW_64KB_D_T:
1940 case ADDR_SW_64KB_D_X:
1941 support = (pIn->bpp == 64);
1942 break;
1943
1944 case ADDR_SW_LINEAR:
1945 case ADDR_SW_4KB_S:
1946 case ADDR_SW_4KB_S_X:
1947 case ADDR_SW_64KB_S:
1948 case ADDR_SW_64KB_S_T:
1949 case ADDR_SW_64KB_S_X:
1950 case ADDR_SW_64KB_R_X:
1951 support = (pIn->bpp <= 64);
1952 break;
1953
1954 default:
1955 break;
1956 }
1957 }
1958 else
1959 {
1960 ADDR_NOT_IMPLEMENTED();
1961 }
1962
1963 return support;
1964 }
1965
1966 /**
1967 ************************************************************************************************************************
1968 * Gfx10Lib::GetMaxNumMipsInTail
1969 *
1970 * @brief
1971 * Return max number of mips in tails
1972 *
1973 * @return
1974 * Max number of mips in tails
1975 ************************************************************************************************************************
1976 */
1977 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
1978 UINT_32 blockSizeLog2, ///< block size log2
1979 BOOL_32 isThin ///< is thin or thick
1980 ) const
1981 {
1982 UINT_32 effectiveLog2 = blockSizeLog2;
1983
1984 if (isThin == FALSE)
1985 {
1986 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
1987 }
1988
1989 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
1990 }
1991
1992 /**
1993 ************************************************************************************************************************
1994 * Gfx10Lib::HwlComputePipeBankXor
1995 *
1996 * @brief
1997 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
1998 *
1999 * @return
2000 * PipeBankXor value
2001 ************************************************************************************************************************
2002 */
2003 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2004 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2005 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2006 ) const
2007 {
2008 if (IsNonPrtXor(pIn->swizzleMode))
2009 {
2010 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2011 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2012 const UINT_32 bankBits = GetBankXorBits(blockBits);
2013
2014 UINT_32 pipeXor = 0;
2015 UINT_32 bankXor = 0;
2016
2017 if (bankBits != 0)
2018 {
2019 if (blockBits == 16)
2020 {
2021 const UINT_32 XorPatternLen = 8;
2022 static const UINT_32 XorBank1b[XorPatternLen] = {0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80};
2023 static const UINT_32 XorBank2b[XorPatternLen] = {0x00, 0x80, 0x40, 0xC0, 0x80, 0x00, 0xC0, 0x40};
2024 static const UINT_32 XorBank3b[XorPatternLen] = {0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0};
2025
2026 const UINT_32 index = pIn->surfIndex % XorPatternLen;
2027
2028 if (bankBits == 1)
2029 {
2030 bankXor = XorBank1b[index];
2031 }
2032 else if (bankBits == 2)
2033 {
2034 bankXor = XorBank2b[index];
2035 }
2036 else
2037 {
2038 bankXor = XorBank3b[index];
2039
2040 if (bankBits == 4)
2041 {
2042 bankXor >>= (2 - pipeBits);
2043 }
2044 }
2045 }
2046 }
2047
2048 pOut->pipeBankXor = bankXor | pipeXor;
2049 }
2050 else
2051 {
2052 pOut->pipeBankXor = 0;
2053 }
2054
2055 return ADDR_OK;
2056 }
2057
2058 /**
2059 ************************************************************************************************************************
2060 * Gfx10Lib::HwlComputeSlicePipeBankXor
2061 *
2062 * @brief
2063 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2064 *
2065 * @return
2066 * PipeBankXor value
2067 ************************************************************************************************************************
2068 */
2069 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2070 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2071 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2072 ) const
2073 {
2074 if (IsNonPrtXor(pIn->swizzleMode))
2075 {
2076 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2077 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2078 const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2079
2080 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2081 }
2082 else
2083 {
2084 pOut->pipeBankXor = 0;
2085 }
2086
2087 return ADDR_OK;
2088 }
2089
2090 /**
2091 ************************************************************************************************************************
2092 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2093 *
2094 * @brief
2095 * Compute sub resource offset to support swizzle pattern
2096 *
2097 * @return
2098 * Offset
2099 ************************************************************************************************************************
2100 */
2101 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2102 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
2103 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
2104 ) const
2105 {
2106 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2107
2108 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2109
2110 return ADDR_OK;
2111 }
2112
2113 /**
2114 ************************************************************************************************************************
2115 * Gfx10Lib::ValidateNonSwModeParams
2116 *
2117 * @brief
2118 * Validate compute surface info params except swizzle mode
2119 *
2120 * @return
2121 * TRUE if parameters are valid, FALSE otherwise
2122 ************************************************************************************************************************
2123 */
2124 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2125 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2126 {
2127 BOOL_32 valid = TRUE;
2128
2129 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2130 {
2131 ADDR_ASSERT_ALWAYS();
2132 valid = FALSE;
2133 }
2134
2135 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2136 {
2137 ADDR_ASSERT_ALWAYS();
2138 valid = FALSE;
2139 }
2140
2141 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2142 const AddrResourceType rsrcType = pIn->resourceType;
2143 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2144 const BOOL_32 msaa = (pIn->numFrags > 1);
2145 const BOOL_32 display = flags.display;
2146 const BOOL_32 tex3d = IsTex3d(rsrcType);
2147 const BOOL_32 tex2d = IsTex2d(rsrcType);
2148 const BOOL_32 tex1d = IsTex1d(rsrcType);
2149 const BOOL_32 stereo = flags.qbStereo;
2150
2151 // Resource type check
2152 if (tex1d)
2153 {
2154 if (msaa || display || stereo)
2155 {
2156 ADDR_ASSERT_ALWAYS();
2157 valid = FALSE;
2158 }
2159 }
2160 else if (tex2d)
2161 {
2162 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2163 {
2164 ADDR_ASSERT_ALWAYS();
2165 valid = FALSE;
2166 }
2167 }
2168 else if (tex3d)
2169 {
2170 if (msaa || display || stereo)
2171 {
2172 ADDR_ASSERT_ALWAYS();
2173 valid = FALSE;
2174 }
2175 }
2176 else
2177 {
2178 ADDR_ASSERT_ALWAYS();
2179 valid = FALSE;
2180 }
2181
2182 return valid;
2183 }
2184
2185 /**
2186 ************************************************************************************************************************
2187 * Gfx10Lib::ValidateSwModeParams
2188 *
2189 * @brief
2190 * Validate compute surface info related to swizzle mode
2191 *
2192 * @return
2193 * TRUE if parameters are valid, FALSE otherwise
2194 ************************************************************************************************************************
2195 */
2196 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2197 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2198 {
2199 BOOL_32 valid = TRUE;
2200
2201 if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
2202 {
2203 ADDR_ASSERT_ALWAYS();
2204 valid = FALSE;
2205 }
2206
2207 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2208 const AddrResourceType rsrcType = pIn->resourceType;
2209 const AddrSwizzleMode swizzle = pIn->swizzleMode;
2210 const BOOL_32 msaa = (pIn->numFrags > 1);
2211 const BOOL_32 zbuffer = flags.depth || flags.stencil;
2212 const BOOL_32 color = flags.color;
2213 const BOOL_32 display = flags.display;
2214 const BOOL_32 tex3d = IsTex3d(rsrcType);
2215 const BOOL_32 tex2d = IsTex2d(rsrcType);
2216 const BOOL_32 tex1d = IsTex1d(rsrcType);
2217 const BOOL_32 thin3d = flags.view3dAs2dArray;
2218 const BOOL_32 linear = IsLinear(swizzle);
2219 const BOOL_32 blk256B = IsBlock256b(swizzle);
2220 const BOOL_32 blkVar = IsBlockVariable(swizzle);
2221 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2222 const BOOL_32 prt = flags.prt;
2223 const BOOL_32 fmask = flags.fmask;
2224
2225 // Misc check
2226 if ((pIn->numFrags > 1) &&
2227 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2228 {
2229 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2230 ADDR_ASSERT_ALWAYS();
2231 valid = FALSE;
2232 }
2233
2234 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2235 {
2236 ADDR_ASSERT_ALWAYS();
2237 valid = FALSE;
2238 }
2239
2240 if ((pIn->bpp == 96) && (linear == FALSE))
2241 {
2242 ADDR_ASSERT_ALWAYS();
2243 valid = FALSE;
2244 }
2245
2246 const UINT_32 swizzleMask = 1 << swizzle;
2247
2248 // Resource type check
2249 if (tex1d)
2250 {
2251 if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2252 {
2253 ADDR_ASSERT_ALWAYS();
2254 valid = FALSE;
2255 }
2256 }
2257 else if (tex2d)
2258 {
2259 if (((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0) ||
2260 (prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2261 (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2262 {
2263 ADDR_ASSERT_ALWAYS();
2264 valid = FALSE;
2265 }
2266 }
2267 else if (tex3d)
2268 {
2269 if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2270 (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2271 (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2272 {
2273 ADDR_ASSERT_ALWAYS();
2274 valid = FALSE;
2275 }
2276 }
2277
2278 // Swizzle type check
2279 if (linear)
2280 {
2281 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2282 {
2283 ADDR_ASSERT_ALWAYS();
2284 valid = FALSE;
2285 }
2286 }
2287 else if (IsZOrderSwizzle(swizzle))
2288 {
2289 if ((pIn->bpp > 64) ||
2290 (msaa && (color || (pIn->bpp > 32))) ||
2291 ElemLib::IsBlockCompressed(pIn->format) ||
2292 ElemLib::IsMacroPixelPacked(pIn->format))
2293 {
2294 ADDR_ASSERT_ALWAYS();
2295 valid = FALSE;
2296 }
2297 }
2298 else if (IsStandardSwizzle(rsrcType, swizzle))
2299 {
2300 if (zbuffer || msaa)
2301 {
2302 ADDR_ASSERT_ALWAYS();
2303 valid = FALSE;
2304 }
2305 }
2306 else if (IsDisplaySwizzle(rsrcType, swizzle))
2307 {
2308 if (zbuffer || msaa)
2309 {
2310 ADDR_ASSERT_ALWAYS();
2311 valid = FALSE;
2312 }
2313 }
2314 else if (IsRtOptSwizzle(swizzle))
2315 {
2316 if (zbuffer)
2317 {
2318 ADDR_ASSERT_ALWAYS();
2319 valid = FALSE;
2320 }
2321 }
2322 else
2323 {
2324 ADDR_ASSERT_ALWAYS();
2325 valid = FALSE;
2326 }
2327
2328 // Block type check
2329 if (blk256B)
2330 {
2331 if (zbuffer || tex3d || msaa)
2332 {
2333 ADDR_ASSERT_ALWAYS();
2334 valid = FALSE;
2335 }
2336 }
2337 else if (blkVar)
2338 {
2339 if (m_blockVarSizeLog2 == 0)
2340 {
2341 ADDR_ASSERT_ALWAYS();
2342 valid = FALSE;
2343 }
2344 }
2345
2346 return valid;
2347 }
2348
2349 /**
2350 ************************************************************************************************************************
2351 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2352 *
2353 * @brief
2354 * Compute surface info sanity check
2355 *
2356 * @return
2357 * Offset
2358 ************************************************************************************************************************
2359 */
2360 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2361 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2362 ) const
2363 {
2364 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2365 }
2366
2367 /**
2368 ************************************************************************************************************************
2369 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2370 *
2371 * @brief
2372 * Internal function to get suggested surface information for cliet to use
2373 *
2374 * @return
2375 * ADDR_E_RETURNCODE
2376 ************************************************************************************************************************
2377 */
2378 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2379 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2380 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2381 ) const
2382 {
2383 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2384
2385 if (pIn->flags.fmask)
2386 {
2387 const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2388 const BOOL_32 forbidVarBlockType = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2389
2390 if (forbid64KbBlockType && forbidVarBlockType)
2391 {
2392 // Invalid combination...
2393 ADDR_ASSERT_ALWAYS();
2394 returnCode = ADDR_INVALIDPARAMS;
2395 }
2396 else
2397 {
2398 pOut->resourceType = ADDR_RSRC_TEX_2D;
2399 pOut->validBlockSet.value = 0;
2400 pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1;
2401 pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1;
2402 pOut->validSwModeSet.value = 0;
2403 pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1;
2404 pOut->validSwModeSet.swVar_Z_X = forbidVarBlockType ? 0 : 1;
2405 pOut->canXor = TRUE;
2406 pOut->validSwTypeSet.value = AddrSwSetZ;
2407 pOut->clientPreferredSwSet = pOut->validSwTypeSet;
2408
2409 BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2410
2411 if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2412 {
2413 const UINT_8 maxFmaskSwizzleModeType = 2;
2414 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2415 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2416 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2417 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2418 const UINT_32 width = Max(pIn->width, 1u);
2419 const UINT_32 height = Max(pIn->height, 1u);
2420 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2421
2422 AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2423 Dim3d blkDim[maxFmaskSwizzleModeType] = {{0}, {0}};
2424 Dim3d padDim[maxFmaskSwizzleModeType] = {{0}, {0}};
2425 UINT_64 padSize[maxFmaskSwizzleModeType] = {0};
2426
2427 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2428 {
2429 ComputeBlockDimensionForSurf(&blkDim[i].w,
2430 &blkDim[i].h,
2431 &blkDim[i].d,
2432 fmaskBpp,
2433 1,
2434 pOut->resourceType,
2435 swMode[i]);
2436
2437 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2438 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2439 }
2440
2441 if (GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0]))
2442 {
2443 if ((padSize[1] * ratioHi) <= (padSize[0] * ratioLow))
2444 {
2445 use64KbBlockType = FALSE;
2446 }
2447 }
2448 else
2449 {
2450 if ((padSize[1] * ratioLow) < (padSize[0] * ratioHi))
2451 {
2452 use64KbBlockType = FALSE;
2453 }
2454 }
2455 }
2456 else if (forbidVarBlockType)
2457 {
2458 use64KbBlockType = TRUE;
2459 }
2460
2461 if (use64KbBlockType)
2462 {
2463 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2464 }
2465 else
2466 {
2467 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2468 }
2469 }
2470 }
2471 else
2472 {
2473 UINT_32 bpp = pIn->bpp;
2474 UINT_32 width = Max(pIn->width, 1u);
2475 UINT_32 height = Max(pIn->height, 1u);
2476
2477 // Set format to INVALID will skip this conversion
2478 if (pIn->format != ADDR_FMT_INVALID)
2479 {
2480 ElemMode elemMode = ADDR_UNCOMPRESSED;
2481 UINT_32 expandX, expandY;
2482
2483 // Get compression/expansion factors and element mode which indicates compression/expansion
2484 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2485 &elemMode,
2486 &expandX,
2487 &expandY);
2488
2489 UINT_32 basePitch = 0;
2490 GetElemLib()->AdjustSurfaceInfo(elemMode,
2491 expandX,
2492 expandY,
2493 &bpp,
2494 &basePitch,
2495 &width,
2496 &height);
2497 }
2498
2499 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2500 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2501 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2502 const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2503 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
2504
2505 // Pre sanity check on non swizzle mode parameters
2506 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2507 localIn.flags = pIn->flags;
2508 localIn.resourceType = pIn->resourceType;
2509 localIn.format = pIn->format;
2510 localIn.bpp = bpp;
2511 localIn.width = width;
2512 localIn.height = height;
2513 localIn.numSlices = numSlices;
2514 localIn.numMipLevels = numMipLevels;
2515 localIn.numSamples = numSamples;
2516 localIn.numFrags = numFrags;
2517
2518 if (ValidateNonSwModeParams(&localIn))
2519 {
2520 // Forbid swizzle mode(s) by client setting
2521 ADDR2_SWMODE_SET allowedSwModeSet = {};
2522 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2523 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;
2524 allowedSwModeSet.value |=
2525 pIn->forbiddenBlock.macroThin4KB ? 0 :
2526 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2527 allowedSwModeSet.value |=
2528 pIn->forbiddenBlock.macroThick4KB ? 0 :
2529 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2530 allowedSwModeSet.value |=
2531 pIn->forbiddenBlock.macroThin64KB ? 0 :
2532 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2533 allowedSwModeSet.value |=
2534 pIn->forbiddenBlock.macroThick64KB ? 0 :
2535 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2536 allowedSwModeSet.value |=
2537 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2538
2539 if (pIn->preferredSwSet.value != 0)
2540 {
2541 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2542 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2543 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2544 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2545 }
2546
2547 if (pIn->noXor)
2548 {
2549 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2550 }
2551
2552 if (pIn->maxAlign > 0)
2553 {
2554 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2555 {
2556 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2557 }
2558
2559 if (pIn->maxAlign < Size64K)
2560 {
2561 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2562 }
2563
2564 if (pIn->maxAlign < Size4K)
2565 {
2566 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2567 }
2568
2569 if (pIn->maxAlign < Size256)
2570 {
2571 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2572 }
2573 }
2574
2575 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2576 switch (pIn->resourceType)
2577 {
2578 case ADDR_RSRC_TEX_1D:
2579 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2580 break;
2581
2582 case ADDR_RSRC_TEX_2D:
2583 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2584 break;
2585
2586 case ADDR_RSRC_TEX_3D:
2587 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2588
2589 if (pIn->flags.view3dAs2dArray)
2590 {
2591 allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2592 }
2593 break;
2594
2595 default:
2596 ADDR_ASSERT_ALWAYS();
2597 allowedSwModeSet.value = 0;
2598 break;
2599 }
2600
2601 if (ElemLib::IsBlockCompressed(pIn->format) ||
2602 ElemLib::IsMacroPixelPacked(pIn->format) ||
2603 (bpp > 64) ||
2604 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2605 {
2606 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2607 }
2608
2609 if (pIn->format == ADDR_FMT_32_32_32)
2610 {
2611 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2612 }
2613
2614 if (msaa)
2615 {
2616 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2617 }
2618
2619 if (pIn->flags.depth || pIn->flags.stencil)
2620 {
2621 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2622 }
2623
2624 if (pIn->flags.display)
2625 {
2626 if (m_settings.isDcn2)
2627 {
2628 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
2629 }
2630 else
2631 {
2632 ADDR_NOT_IMPLEMENTED();
2633 }
2634 }
2635
2636 if (allowedSwModeSet.value != 0)
2637 {
2638 #if DEBUG
2639 // Post sanity check, at least AddrLib should accept the output generated by its own
2640 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2641
2642 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2643 {
2644 if (validateSwModeSet & 1)
2645 {
2646 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2647 ADDR_ASSERT(ValidateSwModeParams(&localIn));
2648 }
2649
2650 validateSwModeSet >>= 1;
2651 }
2652 #endif
2653
2654 pOut->resourceType = pIn->resourceType;
2655 pOut->validSwModeSet = allowedSwModeSet;
2656 pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
2657 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2658 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
2659
2660 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2661
2662 if (pOut->clientPreferredSwSet.value == 0)
2663 {
2664 pOut->clientPreferredSwSet.value = AddrSwSetAll;
2665 }
2666
2667 // Apply optional restrictions
2668 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
2669 {
2670 if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
2671 {
2672 // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
2673 // the GL2 in VAR mode, so it should be avoided.
2674 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2675 }
2676 else
2677 {
2678 // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
2679 // But we have to suffer from low performance because there is no other choice...
2680 ADDR_ASSERT_ALWAYS();
2681 }
2682 }
2683
2684 if (pIn->flags.needEquation)
2685 {
2686 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
2687 }
2688
2689 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
2690 {
2691 pOut->swizzleMode = ADDR_SW_LINEAR;
2692 }
2693 else
2694 {
2695 // Always ignore linear swizzle mode if there is other choice.
2696 allowedSwModeSet.swLinear = 0;
2697
2698 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2699
2700 // Determine block size if there is 2 or more block type candidates
2701 if (IsPow2(allowedBlockSet.value) == FALSE)
2702 {
2703 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR };
2704
2705 if (m_blockVarSizeLog2 != 0)
2706 {
2707 swMode[AddrBlockVar] = ADDR_SW_VAR_R_X;
2708 }
2709
2710 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
2711 {
2712 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
2713 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X;
2714 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
2715 }
2716 else
2717 {
2718 swMode[AddrBlockMicro] = ADDR_SW_256B_S;
2719 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_S;
2720 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
2721 }
2722
2723 Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
2724 Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
2725 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
2726
2727 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2728 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2729 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2730 UINT_32 minSizeBlk = AddrBlockMicro;
2731 UINT_64 minSize = 0;
2732
2733 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2734 {
2735 if (allowedBlockSet.value & (1 << i))
2736 {
2737 ComputeBlockDimensionForSurf(&blkDim[i].w,
2738 &blkDim[i].h,
2739 &blkDim[i].d,
2740 bpp,
2741 numFrags,
2742 pOut->resourceType,
2743 swMode[i]);
2744
2745 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2746 padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
2747
2748 if (minSize == 0)
2749 {
2750 minSize = padSize[i];
2751 minSizeBlk = i;
2752 }
2753 else
2754 {
2755 // Due to the fact that VAR block size = 16KB * m_pipes, it is possible that VAR
2756 // block size < 64KB. And ratio[Hi/Low] logic implicitly requires iterating from
2757 // smaller block type to bigger block type. So we have to correct comparing logic
2758 // according to the size of existing "minimun block" and size of coming/comparing
2759 // block. The new logic can also be useful to any future change about AddrBlockType.
2760 if (GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk]))
2761 {
2762 if ((padSize[i] * ratioHi) <= (minSize * ratioLow))
2763 {
2764 minSize = padSize[i];
2765 minSizeBlk = i;
2766 }
2767 }
2768 else
2769 {
2770 if ((padSize[i] * ratioLow) < (minSize * ratioHi))
2771 {
2772 minSize = padSize[i];
2773 minSizeBlk = i;
2774 }
2775 }
2776 }
2777 }
2778 }
2779
2780 if ((allowedBlockSet.micro == TRUE) &&
2781 (width <= blkDim[AddrBlockMicro].w) &&
2782 (height <= blkDim[AddrBlockMicro].h))
2783 {
2784 minSizeBlk = AddrBlockMicro;
2785 }
2786
2787 if (minSizeBlk == AddrBlockMicro)
2788 {
2789 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2790 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
2791 }
2792 else if (minSizeBlk == AddrBlockThick4KB)
2793 {
2794 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2795 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
2796 }
2797 else if (minSizeBlk == AddrBlockThin4KB)
2798 {
2799 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2800 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
2801 }
2802 else if (minSizeBlk == AddrBlockThick64KB)
2803 {
2804 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2805 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
2806 }
2807 else if (minSizeBlk == AddrBlockThin64KB)
2808 {
2809 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2810 Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
2811 }
2812 else
2813 {
2814 ADDR_ASSERT(minSizeBlk == AddrBlockVar);
2815 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
2816 }
2817 }
2818
2819 // Block type should be determined.
2820 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
2821
2822 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
2823
2824 // Determine swizzle type if there is 2 or more swizzle type candidates
2825 if (IsPow2(allowedSwSet.value) == FALSE)
2826 {
2827 if (ElemLib::IsBlockCompressed(pIn->format))
2828 {
2829 if (allowedSwSet.sw_D)
2830 {
2831 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2832 }
2833 else if (allowedSwSet.sw_S)
2834 {
2835 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2836 }
2837 else
2838 {
2839 ADDR_ASSERT(allowedSwSet.sw_R);
2840 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2841 }
2842 }
2843 else if (ElemLib::IsMacroPixelPacked(pIn->format))
2844 {
2845 if (allowedSwSet.sw_S)
2846 {
2847 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2848 }
2849 else if (allowedSwSet.sw_D)
2850 {
2851 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2852 }
2853 else
2854 {
2855 ADDR_ASSERT(allowedSwSet.sw_R);
2856 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2857 }
2858 }
2859 else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2860 {
2861 if (pIn->flags.color &&
2862 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
2863 allowedSwSet.sw_D)
2864 {
2865 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2866 }
2867 else if (allowedSwSet.sw_S)
2868 {
2869 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2870 }
2871 else if (allowedSwSet.sw_R)
2872 {
2873 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2874 }
2875 else
2876 {
2877 ADDR_ASSERT(allowedSwSet.sw_Z);
2878 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2879 }
2880 }
2881 else
2882 {
2883 if (allowedSwSet.sw_R)
2884 {
2885 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2886 }
2887 else if (allowedSwSet.sw_D)
2888 {
2889 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2890 }
2891 else if (allowedSwSet.sw_S)
2892 {
2893 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2894 }
2895 else
2896 {
2897 ADDR_ASSERT(allowedSwSet.sw_Z);
2898 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2899 }
2900 }
2901 }
2902
2903 // Swizzle type should be determined.
2904 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
2905
2906 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2907 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2908 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2909 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
2910 }
2911 }
2912 else
2913 {
2914 // Invalid combination...
2915 ADDR_ASSERT_ALWAYS();
2916 returnCode = ADDR_INVALIDPARAMS;
2917 }
2918 }
2919 else
2920 {
2921 // Invalid combination...
2922 ADDR_ASSERT_ALWAYS();
2923 returnCode = ADDR_INVALIDPARAMS;
2924 }
2925 }
2926
2927 return returnCode;
2928 }
2929
2930 /**
2931 ************************************************************************************************************************
2932 * Gfx10Lib::ComputeStereoInfo
2933 *
2934 * @brief
2935 * Compute height alignment and right eye pipeBankXor for stereo surface
2936 *
2937 * @return
2938 * Error code
2939 *
2940 ************************************************************************************************************************
2941 */
2942 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
2943 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
2944 UINT_32 blkHeight, ///< Block height
2945 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
2946 UINT_32* pRightXor ///< Right eye xor
2947 ) const
2948 {
2949 ADDR_E_RETURNCODE ret = ADDR_OK;
2950
2951 *pAlignY = 1;
2952 *pRightXor = 0;
2953
2954 if (IsNonPrtXor(pIn->swizzleMode))
2955 {
2956 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
2957 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
2958 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
2959 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
2960 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
2961
2962 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
2963 {
2964 UINT_32 yMax = 0;
2965 UINT_32 yPos = 0;
2966
2967 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
2968 {
2969 if (m_equationTable[eqIndex].xor1[i].value == 0)
2970 {
2971 break;
2972 }
2973
2974 ADDR_ASSERT(m_equationTable[eqIndex].xor1[i].valid == 1);
2975
2976 if ((m_equationTable[eqIndex].xor1[i].channel == 1) &&
2977 (m_equationTable[eqIndex].xor1[i].index > yMax))
2978 {
2979 yMax = m_equationTable[eqIndex].xor1[i].index;
2980 yPos = i;
2981 }
2982 }
2983
2984 const UINT_32 additionalAlign = 1 << yMax;
2985
2986 if (additionalAlign >= blkHeight)
2987 {
2988 *pAlignY *= (additionalAlign / blkHeight);
2989
2990 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
2991
2992 if ((alignedHeight >> yMax) & 1)
2993 {
2994 *pRightXor = 1 << (yPos - m_pipeInterleaveLog2);
2995 }
2996 }
2997 }
2998 else
2999 {
3000 ret = ADDR_INVALIDPARAMS;
3001 }
3002 }
3003
3004 return ret;
3005 }
3006
3007 /**
3008 ************************************************************************************************************************
3009 * Gfx10Lib::HwlComputeSurfaceInfoTiled
3010 *
3011 * @brief
3012 * Internal function to calculate alignment for tiled surface
3013 *
3014 * @return
3015 * ADDR_E_RETURNCODE
3016 ************************************************************************************************************************
3017 */
3018 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3019 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3020 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3021 ) const
3022 {
3023 ADDR_E_RETURNCODE ret;
3024
3025 if (IsBlock256b(pIn->swizzleMode))
3026 {
3027 ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3028 }
3029 else
3030 {
3031 ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3032 }
3033
3034 return ret;
3035 }
3036
3037 /**
3038 ************************************************************************************************************************
3039 * Gfx10Lib::ComputeSurfaceInfoMicroTiled
3040 *
3041 * @brief
3042 * Internal function to calculate alignment for micro tiled surface
3043 *
3044 * @return
3045 * ADDR_E_RETURNCODE
3046 ************************************************************************************************************************
3047 */
3048 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3049 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3050 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3051 ) const
3052 {
3053 ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3054 &pOut->blockHeight,
3055 &pOut->blockSlices,
3056 pIn->bpp,
3057 pIn->numFrags,
3058 pIn->resourceType,
3059 pIn->swizzleMode);
3060
3061 if (ret == ADDR_OK)
3062 {
3063 pOut->mipChainPitch = 0;
3064 pOut->mipChainHeight = 0;
3065 pOut->mipChainSlice = 0;
3066 pOut->epitchIsHeight = FALSE;
3067 pOut->mipChainInTail = FALSE;
3068 pOut->firstMipIdInTail = pIn->numMipLevels;
3069
3070 const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3071
3072 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3073 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3074 pOut->numSlices = pIn->numSlices;
3075 pOut->baseAlign = blockSize;
3076
3077 if (pIn->numMipLevels > 1)
3078 {
3079 const UINT_32 mip0Width = pIn->width;
3080 const UINT_32 mip0Height = pIn->height;
3081 UINT_64 mipSliceSize = 0;
3082
3083 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3084 {
3085 UINT_32 mipWidth, mipHeight;
3086
3087 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3088
3089 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);
3090 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3091
3092 if (pOut->pMipInfo != NULL)
3093 {
3094 pOut->pMipInfo[i].pitch = mipActualWidth;
3095 pOut->pMipInfo[i].height = mipActualHeight;
3096 pOut->pMipInfo[i].depth = 1;
3097 pOut->pMipInfo[i].offset = mipSliceSize;
3098 pOut->pMipInfo[i].mipTailOffset = 0;
3099 pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3100 }
3101
3102 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3103 }
3104
3105 pOut->sliceSize = mipSliceSize;
3106 pOut->surfSize = mipSliceSize * pOut->numSlices;
3107 }
3108 else
3109 {
3110 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3111 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3112
3113 if (pOut->pMipInfo != NULL)
3114 {
3115 pOut->pMipInfo[0].pitch = pOut->pitch;
3116 pOut->pMipInfo[0].height = pOut->height;
3117 pOut->pMipInfo[0].depth = 1;
3118 pOut->pMipInfo[0].offset = 0;
3119 pOut->pMipInfo[0].mipTailOffset = 0;
3120 pOut->pMipInfo[0].macroBlockOffset = 0;
3121 }
3122 }
3123
3124 }
3125
3126 return ret;
3127 }
3128
3129 /**
3130 ************************************************************************************************************************
3131 * Gfx10Lib::ComputeSurfaceInfoMacroTiled
3132 *
3133 * @brief
3134 * Internal function to calculate alignment for macro tiled surface
3135 *
3136 * @return
3137 * ADDR_E_RETURNCODE
3138 ************************************************************************************************************************
3139 */
3140 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3141 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3142 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3143 ) const
3144 {
3145 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3146 &pOut->blockHeight,
3147 &pOut->blockSlices,
3148 pIn->bpp,
3149 pIn->numFrags,
3150 pIn->resourceType,
3151 pIn->swizzleMode);
3152
3153 if (returnCode == ADDR_OK)
3154 {
3155 UINT_32 heightAlign = pOut->blockHeight;
3156
3157 if (pIn->flags.qbStereo)
3158 {
3159 UINT_32 rightXor = 0;
3160 UINT_32 alignY = 1;
3161
3162 returnCode = ComputeStereoInfo(pIn, heightAlign, &alignY, &rightXor);
3163
3164 if (returnCode == ADDR_OK)
3165 {
3166 pOut->pStereoInfo->rightSwizzle = rightXor;
3167
3168 heightAlign *= alignY;
3169 }
3170 }
3171
3172 if (returnCode == ADDR_OK)
3173 {
3174 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3175 pOut->mipChainPitch = 0;
3176 pOut->mipChainHeight = 0;
3177 pOut->mipChainSlice = 0;
3178 pOut->epitchIsHeight = FALSE;
3179 pOut->mipChainInTail = FALSE;
3180 pOut->firstMipIdInTail = pIn->numMipLevels;
3181
3182 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3183 const UINT_32 blockSize = 1 << blockSizeLog2;
3184
3185 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3186 pOut->height = PowTwoAlign(pIn->height, heightAlign);
3187 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3188 pOut->baseAlign = blockSize;
3189
3190 if (pIn->numMipLevels > 1)
3191 {
3192 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3193 pIn->swizzleMode,
3194 pOut->blockWidth,
3195 pOut->blockHeight,
3196 pOut->blockSlices);
3197 const UINT_32 mip0Width = pIn->width;
3198 const UINT_32 mip0Height = pIn->height;
3199 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3200 const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;
3201 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3202 const UINT_32 index = Log2(pIn->bpp >> 3);
3203 UINT_32 firstMipInTail = pIn->numMipLevels;
3204 UINT_64 mipChainSliceSize = 0;
3205 UINT_64 mipSize[MaxMipLevels];
3206 UINT_64 mipSliceSize[MaxMipLevels];
3207
3208 Dim3d fixedTailMaxDim = tailMaxDim;
3209
3210 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3211 {
3212 fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3213 fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3214 }
3215
3216 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3217 {
3218 UINT_32 mipWidth, mipHeight, mipDepth;
3219
3220 GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3221
3222 if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3223 {
3224 firstMipInTail = i;
3225 mipChainSliceSize += blockSize / pOut->blockSlices;
3226 break;
3227 }
3228 else
3229 {
3230 const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);
3231 const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);
3232 const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);
3233 const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3234
3235 mipSize[i] = sliceSize * depth;
3236 mipSliceSize[i] = sliceSize * pOut->blockSlices;
3237 mipChainSliceSize += sliceSize;
3238
3239 if (pOut->pMipInfo != NULL)
3240 {
3241 pOut->pMipInfo[i].pitch = pitch;
3242 pOut->pMipInfo[i].height = height;
3243 pOut->pMipInfo[i].depth = depth;
3244 }
3245 }
3246 }
3247
3248 pOut->sliceSize = mipChainSliceSize;
3249 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
3250 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
3251 pOut->firstMipIdInTail = firstMipInTail;
3252
3253 if (pOut->pMipInfo != NULL)
3254 {
3255 UINT_64 offset = 0;
3256 UINT_64 macroBlkOffset = 0;
3257 UINT_32 tailMaxDepth = 0;
3258
3259 if (firstMipInTail != pIn->numMipLevels)
3260 {
3261 UINT_32 mipWidth, mipHeight;
3262
3263 GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3264 &mipWidth, &mipHeight, &tailMaxDepth);
3265
3266 offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3267 macroBlkOffset = blockSize;
3268 }
3269
3270 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3271 {
3272 pOut->pMipInfo[i].offset = offset;
3273 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3274 pOut->pMipInfo[i].mipTailOffset = 0;
3275
3276 offset += mipSize[i];
3277 macroBlkOffset += mipSliceSize[i];
3278 }
3279
3280 UINT_32 pitch = tailMaxDim.w;
3281 UINT_32 height = tailMaxDim.h;
3282 UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3283
3284 tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3285
3286 for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3287 {
3288 const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);
3289 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3290
3291 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
3292 pOut->pMipInfo[i].mipTailOffset = mipOffset;
3293 pOut->pMipInfo[i].macroBlockOffset = 0;
3294
3295 pOut->pMipInfo[i].pitch = pitch;
3296 pOut->pMipInfo[i].height = height;
3297 pOut->pMipInfo[i].depth = depth;
3298
3299 UINT_32 mipX = ((mipOffset >> 9) & 1) |
3300 ((mipOffset >> 10) & 2) |
3301 ((mipOffset >> 11) & 4) |
3302 ((mipOffset >> 12) & 8) |
3303 ((mipOffset >> 13) & 16) |
3304 ((mipOffset >> 14) & 32);
3305 UINT_32 mipY = ((mipOffset >> 8) & 1) |
3306 ((mipOffset >> 9) & 2) |
3307 ((mipOffset >> 10) & 4) |
3308 ((mipOffset >> 11) & 8) |
3309 ((mipOffset >> 12) & 16) |
3310 ((mipOffset >> 13) & 32);
3311
3312 if (blockSizeLog2 & 1)
3313 {
3314 const UINT_32 temp = mipX;
3315 mipX = mipY;
3316 mipY = temp;
3317
3318 if (index & 1)
3319 {
3320 mipY = (mipY << 1) | (mipX & 1);
3321 mipX = mipX >> 1;
3322 }
3323 }
3324
3325 if (isThin)
3326 {
3327 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3328 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3329 pOut->pMipInfo[i].mipTailCoordZ = 0;
3330
3331 pitch = Max(pitch >> 1, Block256_2d[index].w);
3332 height = Max(height >> 1, Block256_2d[index].h);
3333 depth = 1;
3334 }
3335 else
3336 {
3337 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3338 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3339 pOut->pMipInfo[i].mipTailCoordZ = 0;
3340
3341 pitch = Max(pitch >> 1, Block256_3d[index].w);
3342 height = Max(height >> 1, Block256_3d[index].h);
3343 depth = PowTwoAlign(Max(depth >> 1, 1u), Block256_3d[index].d);
3344 }
3345 }
3346 }
3347 }
3348 else
3349 {
3350 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3351 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3352
3353 if (pOut->pMipInfo != NULL)
3354 {
3355 pOut->pMipInfo[0].pitch = pOut->pitch;
3356 pOut->pMipInfo[0].height = pOut->height;
3357 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3358 pOut->pMipInfo[0].offset = 0;
3359 pOut->pMipInfo[0].mipTailOffset = 0;
3360 pOut->pMipInfo[0].macroBlockOffset = 0;
3361 pOut->pMipInfo[0].mipTailCoordX = 0;
3362 pOut->pMipInfo[0].mipTailCoordY = 0;
3363 pOut->pMipInfo[0].mipTailCoordZ = 0;
3364 }
3365 }
3366 }
3367 }
3368
3369 return returnCode;
3370 }
3371
3372 /**
3373 ************************************************************************************************************************
3374 * Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3375 *
3376 * @brief
3377 * Internal function to calculate address from coord for tiled swizzle surface
3378 *
3379 * @return
3380 * ADDR_E_RETURNCODE
3381 ************************************************************************************************************************
3382 */
3383 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3384 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3385 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3386 ) const
3387 {
3388 ADDR_E_RETURNCODE ret;
3389
3390 if (IsBlock256b(pIn->swizzleMode))
3391 {
3392 ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3393 }
3394 else
3395 {
3396 ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3397 }
3398
3399 return ret;
3400 }
3401
3402 /**
3403 ************************************************************************************************************************
3404 * Gfx10Lib::ComputeOffsetFromEquation
3405 *
3406 * @brief
3407 * Compute offset from equation
3408 *
3409 * @return
3410 * Offset
3411 ************************************************************************************************************************
3412 */
3413 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3414 const ADDR_EQUATION* pEq, ///< Equation
3415 UINT_32 x, ///< x coord in bytes
3416 UINT_32 y, ///< y coord in pixel
3417 UINT_32 z ///< z coord in slice
3418 ) const
3419 {
3420 UINT_32 offset = 0;
3421
3422 for (UINT_32 i = 0; i < pEq->numBits; i++)
3423 {
3424 UINT_32 v = 0;
3425
3426 if (pEq->addr[i].valid)
3427 {
3428 if (pEq->addr[i].channel == 0)
3429 {
3430 v ^= (x >> pEq->addr[i].index) & 1;
3431 }
3432 else if (pEq->addr[i].channel == 1)
3433 {
3434 v ^= (y >> pEq->addr[i].index) & 1;
3435 }
3436 else
3437 {
3438 ADDR_ASSERT(pEq->addr[i].channel == 2);
3439 v ^= (z >> pEq->addr[i].index) & 1;
3440 }
3441 }
3442
3443 if (pEq->xor1[i].valid)
3444 {
3445 if (pEq->xor1[i].channel == 0)
3446 {
3447 v ^= (x >> pEq->xor1[i].index) & 1;
3448 }
3449 else if (pEq->xor1[i].channel == 1)
3450 {
3451 v ^= (y >> pEq->xor1[i].index) & 1;
3452 }
3453 else
3454 {
3455 ADDR_ASSERT(pEq->xor1[i].channel == 2);
3456 v ^= (z >> pEq->xor1[i].index) & 1;
3457 }
3458 }
3459
3460 if (pEq->xor2[i].valid)
3461 {
3462 if (pEq->xor2[i].channel == 0)
3463 {
3464 v ^= (x >> pEq->xor2[i].index) & 1;
3465 }
3466 else if (pEq->xor2[i].channel == 1)
3467 {
3468 v ^= (y >> pEq->xor2[i].index) & 1;
3469 }
3470 else
3471 {
3472 ADDR_ASSERT(pEq->xor2[i].channel == 2);
3473 v ^= (z >> pEq->xor2[i].index) & 1;
3474 }
3475 }
3476
3477 offset |= (v << i);
3478 }
3479
3480 return offset;
3481 }
3482
3483 /**
3484 ************************************************************************************************************************
3485 * Gfx10Lib::ComputeOffsetFromSwizzlePattern
3486 *
3487 * @brief
3488 * Compute offset from swizzle pattern
3489 *
3490 * @return
3491 * Offset
3492 ************************************************************************************************************************
3493 */
3494 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3495 const UINT_64* pPattern, ///< Swizzle pattern
3496 UINT_32 numBits, ///< Number of bits in pattern
3497 UINT_32 x, ///< x coord in pixel
3498 UINT_32 y, ///< y coord in pixel
3499 UINT_32 z, ///< z coord in slice
3500 UINT_32 s ///< sample id
3501 ) const
3502 {
3503 UINT_32 offset = 0;
3504 const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3505
3506 for (UINT_32 i = 0; i < numBits; i++)
3507 {
3508 UINT_32 v = 0;
3509
3510 if (pSwizzlePattern[i].x != 0)
3511 {
3512 UINT_16 mask = pSwizzlePattern[i].x;
3513 UINT_32 xBits = x;
3514
3515 while (mask != 0)
3516 {
3517 if (mask & 1)
3518 {
3519 v ^= xBits & 1;
3520 }
3521
3522 xBits >>= 1;
3523 mask >>= 1;
3524 }
3525 }
3526
3527 if (pSwizzlePattern[i].y != 0)
3528 {
3529 UINT_16 mask = pSwizzlePattern[i].y;
3530 UINT_32 yBits = y;
3531
3532 while (mask != 0)
3533 {
3534 if (mask & 1)
3535 {
3536 v ^= yBits & 1;
3537 }
3538
3539 yBits >>= 1;
3540 mask >>= 1;
3541 }
3542 }
3543
3544 if (pSwizzlePattern[i].z != 0)
3545 {
3546 UINT_16 mask = pSwizzlePattern[i].z;
3547 UINT_32 zBits = z;
3548
3549 while (mask != 0)
3550 {
3551 if (mask & 1)
3552 {
3553 v ^= zBits & 1;
3554 }
3555
3556 zBits >>= 1;
3557 mask >>= 1;
3558 }
3559 }
3560
3561 if (pSwizzlePattern[i].s != 0)
3562 {
3563 UINT_16 mask = pSwizzlePattern[i].s;
3564 UINT_32 sBits = s;
3565
3566 while (mask != 0)
3567 {
3568 if (mask & 1)
3569 {
3570 v ^= sBits & 1;
3571 }
3572
3573 sBits >>= 1;
3574 mask >>= 1;
3575 }
3576 }
3577
3578 offset |= (v << i);
3579 }
3580
3581 return offset;
3582 }
3583
3584 /**
3585 ************************************************************************************************************************
3586 * Gfx10Lib::GetSwizzlePatternInfo
3587 *
3588 * @brief
3589 * Get swizzle pattern
3590 *
3591 * @return
3592 * Swizzle pattern information
3593 ************************************************************************************************************************
3594 */
3595 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
3596 AddrSwizzleMode swizzleMode, ///< Swizzle mode
3597 AddrResourceType resourceType, ///< Resource type
3598 UINT_32 elemLog2, ///< Element size in bytes log2
3599 UINT_32 numFrag ///< Number of fragment
3600 ) const
3601 {
3602 const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
3603 const ADDR_SW_PATINFO* patInfo = NULL;
3604 const UINT_32 swizzleMask = 1 << swizzleMode;
3605
3606 if (IsLinear(swizzleMode) == FALSE)
3607 {
3608 if (IsBlockVariable(swizzleMode))
3609 {
3610 if (m_blockVarSizeLog2 != 0)
3611 {
3612 ADDR_ASSERT(m_settings.supportRbPlus);
3613
3614 if (IsRtOptSwizzle(swizzleMode))
3615 {
3616 if (numFrag == 1)
3617 {
3618 patInfo = SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
3619 }
3620 else if (numFrag == 2)
3621 {
3622 patInfo = SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
3623 }
3624 else if (numFrag == 4)
3625 {
3626 patInfo = SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
3627 }
3628 else
3629 {
3630 ADDR_ASSERT(numFrag == 8);
3631 patInfo = SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
3632 }
3633 }
3634 else if (IsZOrderSwizzle(swizzleMode))
3635 {
3636 if (numFrag == 1)
3637 {
3638 patInfo = SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
3639 }
3640 else if (numFrag == 2)
3641 {
3642 patInfo = SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
3643 }
3644 else if (numFrag == 4)
3645 {
3646 patInfo = SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
3647 }
3648 else
3649 {
3650 ADDR_ASSERT(numFrag == 8);
3651 patInfo = SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
3652 }
3653 }
3654 }
3655 }
3656 else if (resourceType == ADDR_RSRC_TEX_3D)
3657 {
3658 ADDR_ASSERT(numFrag == 1);
3659
3660 if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
3661 {
3662 if (IsRtOptSwizzle(swizzleMode))
3663 {
3664 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_1xaa_RBPLUS_PATINFO : SW_64K_R_X_1xaa_PATINFO;
3665 }
3666 else if (IsZOrderSwizzle(swizzleMode))
3667 {
3668 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_1xaa_RBPLUS_PATINFO : SW_64K_Z_X_1xaa_PATINFO;
3669 }
3670 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3671 {
3672 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
3673 patInfo = m_settings.supportRbPlus ? SW_64K_D3_X_RBPLUS_PATINFO : SW_64K_D3_X_PATINFO;
3674 }
3675 else
3676 {
3677 ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
3678
3679 if (IsBlock4kb(swizzleMode))
3680 {
3681 if (swizzleMode == ADDR_SW_4KB_S)
3682 {
3683 patInfo = m_settings.supportRbPlus ? SW_4K_S3_RBPLUS_PATINFO : SW_4K_S3_PATINFO;
3684 }
3685 else
3686 {
3687 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3688 patInfo = m_settings.supportRbPlus ? SW_4K_S3_X_RBPLUS_PATINFO : SW_4K_S3_X_PATINFO;
3689 }
3690 }
3691 else
3692 {
3693 if (swizzleMode == ADDR_SW_64KB_S)
3694 {
3695 patInfo = m_settings.supportRbPlus ? SW_64K_S3_RBPLUS_PATINFO : SW_64K_S3_PATINFO;
3696 }
3697 else if (swizzleMode == ADDR_SW_64KB_S_X)
3698 {
3699 patInfo = m_settings.supportRbPlus ? SW_64K_S3_X_RBPLUS_PATINFO : SW_64K_S3_X_PATINFO;
3700 }
3701 else
3702 {
3703 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3704 patInfo = m_settings.supportRbPlus ? SW_64K_S3_T_RBPLUS_PATINFO : SW_64K_S3_T_PATINFO;
3705 }
3706 }
3707 }
3708 }
3709 }
3710 else
3711 {
3712 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
3713 {
3714 if (IsBlock256b(swizzleMode))
3715 {
3716 if (swizzleMode == ADDR_SW_256B_S)
3717 {
3718 patInfo = m_settings.supportRbPlus ? SW_256_S_RBPLUS_PATINFO : SW_256_S_PATINFO;
3719 }
3720 else
3721 {
3722 ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
3723 patInfo = m_settings.supportRbPlus ? SW_256_D_RBPLUS_PATINFO : SW_256_D_PATINFO;
3724 }
3725 }
3726 else if (IsBlock4kb(swizzleMode))
3727 {
3728 if (IsStandardSwizzle(resourceType, swizzleMode))
3729 {
3730 if (swizzleMode == ADDR_SW_4KB_S)
3731 {
3732 patInfo = m_settings.supportRbPlus ? SW_4K_S_RBPLUS_PATINFO : SW_4K_S_PATINFO;
3733 }
3734 else
3735 {
3736 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3737 patInfo = m_settings.supportRbPlus ? SW_4K_S_X_RBPLUS_PATINFO : SW_4K_S_X_PATINFO;
3738 }
3739 }
3740 else
3741 {
3742 if (swizzleMode == ADDR_SW_4KB_D)
3743 {
3744 patInfo = m_settings.supportRbPlus ? SW_4K_D_RBPLUS_PATINFO : SW_4K_D_PATINFO;
3745 }
3746 else
3747 {
3748 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
3749 patInfo = m_settings.supportRbPlus ? SW_4K_D_X_RBPLUS_PATINFO : SW_4K_D_X_PATINFO;
3750 }
3751 }
3752 }
3753 else
3754 {
3755 if (IsRtOptSwizzle(swizzleMode))
3756 {
3757 if (numFrag == 1)
3758 {
3759 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_1xaa_RBPLUS_PATINFO : SW_64K_R_X_1xaa_PATINFO;
3760 }
3761 else if (numFrag == 2)
3762 {
3763 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_2xaa_RBPLUS_PATINFO : SW_64K_R_X_2xaa_PATINFO;
3764 }
3765 else if (numFrag == 4)
3766 {
3767 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_4xaa_RBPLUS_PATINFO : SW_64K_R_X_4xaa_PATINFO;
3768 }
3769 else
3770 {
3771 ADDR_ASSERT(numFrag == 8);
3772 patInfo = m_settings.supportRbPlus ? SW_64K_R_X_8xaa_RBPLUS_PATINFO : SW_64K_R_X_8xaa_PATINFO;
3773 }
3774 }
3775 else if (IsZOrderSwizzle(swizzleMode))
3776 {
3777 if (numFrag == 1)
3778 {
3779 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_1xaa_RBPLUS_PATINFO : SW_64K_Z_X_1xaa_PATINFO;
3780 }
3781 else if (numFrag == 2)
3782 {
3783 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_2xaa_RBPLUS_PATINFO : SW_64K_Z_X_2xaa_PATINFO;
3784 }
3785 else if (numFrag == 4)
3786 {
3787 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_4xaa_RBPLUS_PATINFO : SW_64K_Z_X_4xaa_PATINFO;
3788 }
3789 else
3790 {
3791 ADDR_ASSERT(numFrag == 8);
3792 patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_8xaa_RBPLUS_PATINFO : SW_64K_Z_X_8xaa_PATINFO;
3793 }
3794 }
3795 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3796 {
3797 if (swizzleMode == ADDR_SW_64KB_D)
3798 {
3799 patInfo = m_settings.supportRbPlus ? SW_64K_D_RBPLUS_PATINFO : SW_64K_D_PATINFO;
3800 }
3801 else if (swizzleMode == ADDR_SW_64KB_D_X)
3802 {
3803 patInfo = m_settings.supportRbPlus ? SW_64K_D_X_RBPLUS_PATINFO : SW_64K_D_X_PATINFO;
3804 }
3805 else
3806 {
3807 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
3808 patInfo = m_settings.supportRbPlus ? SW_64K_D_T_RBPLUS_PATINFO : SW_64K_D_T_PATINFO;
3809 }
3810 }
3811 else
3812 {
3813 if (swizzleMode == ADDR_SW_64KB_S)
3814 {
3815 patInfo = m_settings.supportRbPlus ? SW_64K_S_RBPLUS_PATINFO : SW_64K_S_PATINFO;
3816 }
3817 else if (swizzleMode == ADDR_SW_64KB_S_X)
3818 {
3819 patInfo = m_settings.supportRbPlus ? SW_64K_S_X_RBPLUS_PATINFO : SW_64K_S_X_PATINFO;
3820 }
3821 else
3822 {
3823 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3824 patInfo = m_settings.supportRbPlus ? SW_64K_S_T_RBPLUS_PATINFO : SW_64K_S_T_PATINFO;
3825 }
3826 }
3827 }
3828 }
3829 }
3830 }
3831
3832 return (patInfo != NULL) ? &patInfo[index] : NULL;
3833 }
3834
3835 /**
3836 ************************************************************************************************************************
3837 * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
3838 *
3839 * @brief
3840 * Internal function to calculate address from coord for micro tiled swizzle surface
3841 *
3842 * @return
3843 * ADDR_E_RETURNCODE
3844 ************************************************************************************************************************
3845 */
3846 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
3847 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3848 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3849 ) const
3850 {
3851 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3852 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3853 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
3854
3855 localIn.swizzleMode = pIn->swizzleMode;
3856 localIn.flags = pIn->flags;
3857 localIn.resourceType = pIn->resourceType;
3858 localIn.bpp = pIn->bpp;
3859 localIn.width = Max(pIn->unalignedWidth, 1u);
3860 localIn.height = Max(pIn->unalignedHeight, 1u);
3861 localIn.numSlices = Max(pIn->numSlices, 1u);
3862 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3863 localIn.numSamples = Max(pIn->numSamples, 1u);
3864 localIn.numFrags = Max(pIn->numFrags, 1u);
3865 localOut.pMipInfo = mipInfo;
3866
3867 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
3868
3869 if (ret == ADDR_OK)
3870 {
3871 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3872 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3873 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3874 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3875
3876 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3877 {
3878 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3879 const UINT_32 yb = pIn->y / localOut.blockHeight;
3880 const UINT_32 xb = pIn->x / localOut.blockWidth;
3881 const UINT_32 blockIndex = yb * pb + xb;
3882 const UINT_32 blockSize = 256;
3883 const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3884 pIn->x << elemLog2,
3885 pIn->y,
3886 0);
3887 pOut->addr = localOut.sliceSize * pIn->slice +
3888 mipInfo[pIn->mipId].macroBlockOffset +
3889 (blockIndex * blockSize) +
3890 blk256Offset;
3891 }
3892 else
3893 {
3894 ret = ADDR_INVALIDPARAMS;
3895 }
3896 }
3897
3898 return ret;
3899 }
3900
3901 /**
3902 ************************************************************************************************************************
3903 * Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
3904 *
3905 * @brief
3906 * Internal function to calculate address from coord for macro tiled swizzle surface
3907 *
3908 * @return
3909 * ADDR_E_RETURNCODE
3910 ************************************************************************************************************************
3911 */
3912 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
3913 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3914 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3915 ) const
3916 {
3917 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3918 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3919 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
3920
3921 localIn.swizzleMode = pIn->swizzleMode;
3922 localIn.flags = pIn->flags;
3923 localIn.resourceType = pIn->resourceType;
3924 localIn.bpp = pIn->bpp;
3925 localIn.width = Max(pIn->unalignedWidth, 1u);
3926 localIn.height = Max(pIn->unalignedHeight, 1u);
3927 localIn.numSlices = Max(pIn->numSlices, 1u);
3928 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3929 localIn.numSamples = Max(pIn->numSamples, 1u);
3930 localIn.numFrags = Max(pIn->numFrags, 1u);
3931 localOut.pMipInfo = mipInfo;
3932
3933 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
3934
3935 if (ret == ADDR_OK)
3936 {
3937 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3938 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3939 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
3940 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
3941 const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
3942 const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
3943 (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
3944
3945 if (localIn.numFrags > 1)
3946 {
3947 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
3948 pIn->resourceType,
3949 elemLog2,
3950 localIn.numFrags);
3951
3952 if (pPatInfo != NULL)
3953 {
3954 const UINT_32 pb = localOut.pitch / localOut.blockWidth;
3955 const UINT_32 yb = pIn->y / localOut.blockHeight;
3956 const UINT_32 xb = pIn->x / localOut.blockWidth;
3957 const UINT_64 blkIdx = yb * pb + xb;
3958
3959 ADDR_BIT_SETTING fullSwizzlePattern[20];
3960 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
3961
3962 const UINT_32 blkOffset =
3963 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
3964 blkSizeLog2,
3965 pIn->x,
3966 pIn->y,
3967 pIn->slice,
3968 pIn->sample);
3969
3970 pOut->addr = (localOut.sliceSize * pIn->slice) +
3971 (blkIdx << blkSizeLog2) +
3972 (blkOffset ^ pipeBankXor);
3973 }
3974 else
3975 {
3976 ret = ADDR_INVALIDPARAMS;
3977 }
3978 }
3979 else
3980 {
3981 const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
3982 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3983 const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
3984
3985 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3986 {
3987 const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
3988 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3989 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
3990 const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
3991 const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
3992 const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
3993 const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
3994 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3995 const UINT_32 yb = pIn->y / localOut.blockHeight;
3996 const UINT_32 xb = pIn->x / localOut.blockWidth;
3997 const UINT_64 blkIdx = yb * pb + xb;
3998 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3999 x << elemLog2,
4000 y,
4001 z);
4002 pOut->addr = sliceSize * sliceId +
4003 mipInfo[pIn->mipId].macroBlockOffset +
4004 (blkIdx << blkSizeLog2) +
4005 (blkOffset ^ pipeBankXor);
4006 }
4007 else
4008 {
4009 ret = ADDR_INVALIDPARAMS;
4010 }
4011 }
4012 }
4013
4014 return ret;
4015 }
4016
4017 /**
4018 ************************************************************************************************************************
4019 * Gfx10Lib::HwlComputeMaxBaseAlignments
4020 *
4021 * @brief
4022 * Gets maximum alignments
4023 * @return
4024 * maximum alignments
4025 ************************************************************************************************************************
4026 */
4027 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4028 {
4029 return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4030 }
4031
4032 /**
4033 ************************************************************************************************************************
4034 * Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4035 *
4036 * @brief
4037 * Gets maximum alignments for metadata
4038 * @return
4039 * maximum alignments for metadata
4040 ************************************************************************************************************************
4041 */
4042 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4043 {
4044 Dim3d metaBlk;
4045
4046 const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4047 {
4048 ADDR_SW_64KB_Z_X,
4049 m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4050 };
4051
4052 UINT_32 maxBaseAlignHtile = 0;
4053 UINT_32 maxBaseAlignCmask = 0;
4054
4055 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4056 {
4057 for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4058 {
4059 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4060 {
4061 // Max base alignment for Htile
4062 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4063 ADDR_RSRC_TEX_2D,
4064 ValidSwizzleModeForXmask[swIdx],
4065 bppLog2,
4066 numFragLog2,
4067 TRUE,
4068 &metaBlk);
4069
4070 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4071 }
4072 }
4073
4074 // Max base alignment for Cmask
4075 const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4076 ADDR_RSRC_TEX_2D,
4077 ValidSwizzleModeForXmask[swIdx],
4078 0,
4079 0,
4080 TRUE,
4081 &metaBlk);
4082
4083 maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4084 }
4085
4086 // Max base alignment for 2D Dcc
4087 const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4088 {
4089 ADDR_SW_64KB_S_X,
4090 ADDR_SW_64KB_D_X,
4091 ADDR_SW_64KB_R_X,
4092 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4093 };
4094
4095 UINT_32 maxBaseAlignDcc2D = 0;
4096
4097 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4098 {
4099 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4100 {
4101 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4102 {
4103 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4104 ADDR_RSRC_TEX_2D,
4105 ValidSwizzleModeForDcc2D[swIdx],
4106 bppLog2,
4107 numFragLog2,
4108 TRUE,
4109 &metaBlk);
4110
4111 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4112 }
4113 }
4114 }
4115
4116 // Max base alignment for 3D Dcc
4117 const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4118 {
4119 ADDR_SW_64KB_Z_X,
4120 ADDR_SW_64KB_S_X,
4121 ADDR_SW_64KB_D_X,
4122 ADDR_SW_64KB_R_X,
4123 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4124 };
4125
4126 UINT_32 maxBaseAlignDcc3D = 0;
4127
4128 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4129 {
4130 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4131 {
4132 const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4133 ADDR_RSRC_TEX_3D,
4134 ValidSwizzleModeForDcc3D[swIdx],
4135 bppLog2,
4136 0,
4137 TRUE,
4138 &metaBlk);
4139
4140 maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4141 }
4142 }
4143
4144 return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4145 }
4146
4147 /**
4148 ************************************************************************************************************************
4149 * Gfx10Lib::GetMetaElementSizeLog2
4150 *
4151 * @brief
4152 * Gets meta data element size log2
4153 * @return
4154 * Meta data element size log2
4155 ************************************************************************************************************************
4156 */
4157 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4158 Gfx10DataType dataType) ///< Data surface type
4159 {
4160 INT_32 elemSizeLog2 = 0;
4161
4162 if (dataType == Gfx10DataColor)
4163 {
4164 elemSizeLog2 = 0;
4165 }
4166 else if (dataType == Gfx10DataDepthStencil)
4167 {
4168 elemSizeLog2 = 2;
4169 }
4170 else
4171 {
4172 ADDR_ASSERT(dataType == Gfx10DataFmask);
4173 elemSizeLog2 = -1;
4174 }
4175
4176 return elemSizeLog2;
4177 }
4178
4179 /**
4180 ************************************************************************************************************************
4181 * Gfx10Lib::GetMetaCacheSizeLog2
4182 *
4183 * @brief
4184 * Gets meta data cache line size log2
4185 * @return
4186 * Meta data cache line size log2
4187 ************************************************************************************************************************
4188 */
4189 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4190 Gfx10DataType dataType) ///< Data surface type
4191 {
4192 INT_32 cacheSizeLog2 = 0;
4193
4194 if (dataType == Gfx10DataColor)
4195 {
4196 cacheSizeLog2 = 6;
4197 }
4198 else if (dataType == Gfx10DataDepthStencil)
4199 {
4200 cacheSizeLog2 = 8;
4201 }
4202 else
4203 {
4204 ADDR_ASSERT(dataType == Gfx10DataFmask);
4205 cacheSizeLog2 = 8;
4206 }
4207 return cacheSizeLog2;
4208 }
4209
4210 /**
4211 ************************************************************************************************************************
4212 * Gfx10Lib::HwlComputeSurfaceInfoLinear
4213 *
4214 * @brief
4215 * Internal function to calculate alignment for linear surface
4216 *
4217 * @return
4218 * ADDR_E_RETURNCODE
4219 ************************************************************************************************************************
4220 */
4221 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4222 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4223 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4224 ) const
4225 {
4226 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4227
4228 if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4229 {
4230 returnCode = ADDR_INVALIDPARAMS;
4231 }
4232 else
4233 {
4234 const UINT_32 elementBytes = pIn->bpp >> 3;
4235 const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4236 const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4237 UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);
4238 UINT_32 actualHeight = pIn->height;
4239 UINT_64 sliceSize = 0;
4240
4241 if (pIn->numMipLevels > 1)
4242 {
4243 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4244 {
4245 UINT_32 mipWidth, mipHeight;
4246
4247 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4248
4249 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4250
4251 if (pOut->pMipInfo != NULL)
4252 {
4253 pOut->pMipInfo[i].pitch = mipActualWidth;
4254 pOut->pMipInfo[i].height = mipHeight;
4255 pOut->pMipInfo[i].depth = mipDepth;
4256 pOut->pMipInfo[i].offset = sliceSize;
4257 pOut->pMipInfo[i].mipTailOffset = 0;
4258 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4259 }
4260
4261 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4262 }
4263 }
4264 else
4265 {
4266 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4267
4268 if (returnCode == ADDR_OK)
4269 {
4270 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4271
4272 if (pOut->pMipInfo != NULL)
4273 {
4274 pOut->pMipInfo[0].pitch = pitch;
4275 pOut->pMipInfo[0].height = actualHeight;
4276 pOut->pMipInfo[0].depth = mipDepth;
4277 pOut->pMipInfo[0].offset = 0;
4278 pOut->pMipInfo[0].mipTailOffset = 0;
4279 pOut->pMipInfo[0].macroBlockOffset = 0;
4280 }
4281 }
4282 }
4283
4284 if (returnCode == ADDR_OK)
4285 {
4286 pOut->pitch = pitch;
4287 pOut->height = actualHeight;
4288 pOut->numSlices = pIn->numSlices;
4289 pOut->sliceSize = sliceSize;
4290 pOut->surfSize = sliceSize * pOut->numSlices;
4291 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4292 pOut->blockWidth = pitchAlign;
4293 pOut->blockHeight = 1;
4294 pOut->blockSlices = 1;
4295
4296 // Following members are useless on GFX10
4297 pOut->mipChainPitch = 0;
4298 pOut->mipChainHeight = 0;
4299 pOut->mipChainSlice = 0;
4300 pOut->epitchIsHeight = FALSE;
4301
4302 // Post calculation validate
4303 ADDR_ASSERT(pOut->sliceSize > 0);
4304 }
4305 }
4306
4307 return returnCode;
4308 }
4309
4310 } // V2
4311 } // Addr