amd/addrlib/gfx10: forbid DCC for swizzle modes which the hardware does not support
[mesa.git] / src / amd / addrlib / src / gfx10 / gfx10addrlib.cpp
1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx10addrlib.cpp
30 * @brief Contain the implementation for the Gfx10Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx10addrlib.h"
35 #include "gfx10_gb_reg.h"
36 #include "gfx10SwizzlePattern.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
41 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
42
43 namespace Addr
44 {
45 /**
46 ************************************************************************************************************************
47 * Gfx10HwlInit
48 *
49 * @brief
50 * Creates an Gfx10Lib object.
51 *
52 * @return
53 * Returns an Gfx10Lib object pointer.
54 ************************************************************************************************************************
55 */
56 Addr::Lib* Gfx10HwlInit(const Client* pClient)
57 {
58 return V2::Gfx10Lib::CreateObj(pClient);
59 }
60
61 namespace V2
62 {
63
64 ////////////////////////////////////////////////////////////////////////////////////////////////////
65 // Static Const Member
66 ////////////////////////////////////////////////////////////////////////////////////////////////////
67
68 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
69 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
70 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
71 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
72 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
73 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
74
75 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
76 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
77 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
78 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
79
80 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
81 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
82 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
83 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
84
85 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
86 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
87 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
88 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
89
90 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
91 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
92 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
93 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
94
95 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
96 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_X
97 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_X
98 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
99
100 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
101 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
102 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
103 {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1}, // ADDR_SW_64KB_R_X
104
105 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
106 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
107 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
108 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
109 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
110 };
111
112 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
113
114 const Dim3d Gfx10Lib::Block64K_3d[] = {{64, 32, 32}, {32 , 32, 32}, {32, 32, 16}, {32, 16, 16}, {16, 16, 16}};
115 const Dim3d Gfx10Lib::Block4K_3d[] = {{16, 16, 16}, {8, 16, 16}, {8, 16, 8}, {8, 8, 8}, {4, 8, 8}};
116
117 const Dim2d Gfx10Lib::Block64K_2d[] = {{256, 256}, {256 , 128}, {128, 128}, {128, 64}, {64, 64}};
118 const Dim2d Gfx10Lib::Block4K_2d[] = {{64, 64}, {64, 32}, {32, 32}, {32, 16}, {16, 16}};
119
120 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
121 const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
122
123 const Dim2d Gfx10Lib::Block64K_Log2_2d[] = {{8, 8}, {8, 7}, {7, 7}, {7, 6}, {6, 6}};
124 const Dim2d Gfx10Lib::Block4K_Log2_2d[] = {{6, 6}, {6, 5}, {5, 5}, {5, 4}, {4, 4}};
125
126 /**
127 ************************************************************************************************************************
128 * Gfx10Lib::Gfx10Lib
129 *
130 * @brief
131 * Constructor
132 *
133 ************************************************************************************************************************
134 */
135 Gfx10Lib::Gfx10Lib(const Client* pClient)
136 :
137 Lib(pClient),
138 m_numEquations(0)
139 {
140 m_class = AI_ADDRLIB;
141 memset(&m_settings, 0, sizeof(m_settings));
142 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
143 }
144
145 /**
146 ************************************************************************************************************************
147 * Gfx10Lib::~Gfx10Lib
148 *
149 * @brief
150 * Destructor
151 ************************************************************************************************************************
152 */
153 Gfx10Lib::~Gfx10Lib()
154 {
155 }
156
157 /**
158 ************************************************************************************************************************
159 * Gfx10Lib::HwlComputeHtileInfo
160 *
161 * @brief
162 * Interface function stub of AddrComputeHtilenfo
163 *
164 * @return
165 * ADDR_E_RETURNCODE
166 ************************************************************************************************************************
167 */
168 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
169 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
170 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
171 ) const
172 {
173 ADDR_E_RETURNCODE ret = ADDR_OK;
174
175 if ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) ||
176 (pIn->hTileFlags.pipeAligned != TRUE))
177 {
178 ret = ADDR_INVALIDPARAMS;
179 }
180 else
181 {
182 Dim3d metaBlk = {0};
183 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
184 ADDR_RSRC_TEX_2D,
185 ADDR_SW_64KB_Z_X,
186 0,
187 0,
188 TRUE,
189 &metaBlk);
190
191 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
192 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
193 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
194 pOut->metaBlkWidth = metaBlk.w;
195 pOut->metaBlkHeight = metaBlk.h;
196
197 if (pIn->numMipLevels > 1)
198 {
199 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
200
201 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
202
203 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
204 {
205 UINT_32 mipWidth, mipHeight;
206
207 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
208
209 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
210 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
211
212 const UINT_32 pitchInM = mipWidth / metaBlk.w;
213 const UINT_32 heightInM = mipHeight / metaBlk.h;
214 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
215
216 if (pOut->pMipInfo != NULL)
217 {
218 pOut->pMipInfo[i].inMiptail = FALSE;
219 pOut->pMipInfo[i].offset = offset;
220 pOut->pMipInfo[i].sliceSize = mipSliceSize;
221 }
222
223 offset += mipSliceSize;
224 }
225
226 pOut->sliceSize = offset;
227 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
228 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
229
230 if (pOut->pMipInfo != NULL)
231 {
232 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
233 {
234 pOut->pMipInfo[i].inMiptail = TRUE;
235 pOut->pMipInfo[i].offset = 0;
236 pOut->pMipInfo[i].sliceSize = 0;
237 }
238
239 if (pIn->firstMipIdInTail != pIn->numMipLevels)
240 {
241 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
242 }
243 }
244 }
245 else
246 {
247 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
248 const UINT_32 heightInM = pOut->height / metaBlk.h;
249
250 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
251 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
252 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
253
254 if (pOut->pMipInfo != NULL)
255 {
256 pOut->pMipInfo[0].inMiptail = FALSE;
257 pOut->pMipInfo[0].offset = 0;
258 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
259 }
260 }
261 }
262
263 return ret;
264 }
265
266 /**
267 ************************************************************************************************************************
268 * Gfx10Lib::HwlComputeCmaskInfo
269 *
270 * @brief
271 * Interface function stub of AddrComputeCmaskInfo
272 *
273 * @return
274 * ADDR_E_RETURNCODE
275 ************************************************************************************************************************
276 */
277 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
278 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
279 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
280 ) const
281 {
282 ADDR_E_RETURNCODE ret = ADDR_OK;
283
284 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
285 (pIn->cMaskFlags.pipeAligned != TRUE))
286 {
287 ret = ADDR_INVALIDPARAMS;
288 }
289 else
290 {
291 Dim3d metaBlk = {0};
292 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
293 ADDR_RSRC_TEX_2D,
294 ADDR_SW_64KB_Z_X,
295 0,
296 0,
297 TRUE,
298 &metaBlk);
299
300 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
301 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
302 pOut->baseAlign = metaBlkSize;
303 pOut->metaBlkWidth = metaBlk.w;
304 pOut->metaBlkHeight = metaBlk.h;
305
306 if (pIn->numMipLevels > 1)
307 {
308 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
309
310 UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
311
312 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
313 {
314 UINT_32 mipWidth, mipHeight;
315
316 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
317
318 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
319 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
320
321 const UINT_32 pitchInM = mipWidth / metaBlk.w;
322 const UINT_32 heightInM = mipHeight / metaBlk.h;
323
324 if (pOut->pMipInfo != NULL)
325 {
326 pOut->pMipInfo[i].inMiptail = FALSE;
327 pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize;
328 pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
329 }
330
331 metaBlkPerSlice += pitchInM * heightInM;
332 }
333
334 pOut->metaBlkNumPerSlice = metaBlkPerSlice;
335
336 if (pOut->pMipInfo != NULL)
337 {
338 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
339 {
340 pOut->pMipInfo[i].inMiptail = TRUE;
341 pOut->pMipInfo[i].offset = 0;
342 pOut->pMipInfo[i].sliceSize = 0;
343 }
344
345 if (pIn->firstMipIdInTail != pIn->numMipLevels)
346 {
347 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
348 }
349 }
350 }
351 else
352 {
353 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
354 const UINT_32 heightInM = pOut->height / metaBlk.h;
355
356 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
357
358 if (pOut->pMipInfo != NULL)
359 {
360 pOut->pMipInfo[0].inMiptail = FALSE;
361 pOut->pMipInfo[0].offset = 0;
362 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
363 }
364 }
365
366 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
367 pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
368 }
369
370 return ret;
371 }
372
373 /**
374 ************************************************************************************************************************
375 * Gfx10Lib::HwlComputeDccInfo
376 *
377 * @brief
378 * Interface function to compute DCC key info
379 *
380 * @return
381 * ADDR_E_RETURNCODE
382 ************************************************************************************************************************
383 */
384 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
385 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
386 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
387 ) const
388 {
389 ADDR_E_RETURNCODE ret = ADDR_OK;
390
391 if (pIn->swizzleMode != ADDR_SW_64KB_Z_X && pIn->swizzleMode != ADDR_SW_64KB_R_X)
392 {
393 // Hardware does not support DCC for this swizzle mode.
394 ret = ADDR_INVALIDPARAMS;
395 }
396 else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
397 {
398 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
399 ret = ADDR_INVALIDPARAMS;
400 }
401 else
402 {
403 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
404 ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
405
406 Dim3d metaBlk = {0};
407 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
408 const UINT_32 numFragLog2 = Log2(pIn->numFrags);
409 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
410 pIn->resourceType,
411 pIn->swizzleMode,
412 elemLog2,
413 numFragLog2,
414 pIn->dccKeyFlags.pipeAligned,
415 &metaBlk);
416 const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
417
418 pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
419 pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
420 pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;
421
422 pOut->dccRamBaseAlign = metaBlkSize;
423 pOut->metaBlkWidth = metaBlk.w;
424 pOut->metaBlkHeight = metaBlk.h;
425 pOut->metaBlkDepth = metaBlk.d;
426
427 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
428 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
429 pOut->depth = PowTwoAlign(pIn->numSlices, metaBlk.d);
430
431 if (pIn->numMipLevels > 1)
432 {
433 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
434
435 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
436
437 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
438 {
439 UINT_32 mipWidth, mipHeight;
440
441 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
442
443 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
444 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
445
446 const UINT_32 pitchInM = mipWidth / metaBlk.w;
447 const UINT_32 heightInM = mipHeight / metaBlk.h;
448 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
449
450 if (pOut->pMipInfo != NULL)
451 {
452 pOut->pMipInfo[i].inMiptail = FALSE;
453 pOut->pMipInfo[i].offset = offset;
454 pOut->pMipInfo[i].sliceSize = mipSliceSize;
455 }
456
457 offset += mipSliceSize;
458 }
459
460 pOut->dccRamSliceSize = offset;
461 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
462 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
463
464 if (pOut->pMipInfo != NULL)
465 {
466 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
467 {
468 pOut->pMipInfo[i].inMiptail = TRUE;
469 pOut->pMipInfo[i].offset = 0;
470 pOut->pMipInfo[i].sliceSize = 0;
471 }
472
473 if (pIn->firstMipIdInTail != pIn->numMipLevels)
474 {
475 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
476 }
477 }
478 }
479 else
480 {
481 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
482 const UINT_32 heightInM = pOut->height / metaBlk.h;
483
484 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
485 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
486 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
487
488 if (pOut->pMipInfo != NULL)
489 {
490 pOut->pMipInfo[0].inMiptail = FALSE;
491 pOut->pMipInfo[0].offset = 0;
492 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
493 }
494 }
495 }
496
497 return ret;
498 }
499
500 /**
501 ************************************************************************************************************************
502 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
503 *
504 * @brief
505 * Interface function stub of AddrComputeCmaskAddrFromCoord
506 *
507 * @return
508 * ADDR_E_RETURNCODE
509 ************************************************************************************************************************
510 */
511 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
512 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
513 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
514 {
515 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
516 input.size = sizeof(input);
517 input.cMaskFlags = pIn->cMaskFlags;
518 input.colorFlags = pIn->colorFlags;
519 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
520 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
521 input.numSlices = Max(pIn->numSlices, 1u);
522 input.swizzleMode = pIn->swizzleMode;
523 input.resourceType = pIn->resourceType;
524
525 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
526 output.size = sizeof(output);
527
528 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
529
530 if (returnCode == ADDR_OK)
531 {
532 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
533 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
534 const UINT_32 numPipeLog2 = m_pipesLog2;
535 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
536 const UINT_32 fmaskBppType = 4;
537 const UINT_32 numPipeType = 8;
538 const UINT_32 index = ((m_pipeInterleaveLog2 - 8) * (fmaskBppType * numPipeType)) +
539 ((numPipeLog2 + 1) * fmaskBppType) +
540 fmaskElemLog2;
541
542 const UINT_64* pPattern = CMASK_64K[index];
543 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
544 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
545 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(pPattern,
546 blkSizeLog2 + 1, // +1 for nibble offset
547 pIn->x,
548 pIn->y,
549 pIn->slice,
550 0);
551 const UINT_32 xb = pIn->x / output.metaBlkWidth;
552 const UINT_32 yb = pIn->y / output.metaBlkHeight;
553 const UINT_32 pb = output.pitch / output.metaBlkWidth;
554 const UINT_32 blkIndex = (yb * pb) + xb;
555 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
556
557 pOut->addr = (output.sliceSize * pIn->slice) +
558 (blkIndex * (1 << blkSizeLog2)) +
559 ((blkOffset >> 1) ^ pipeXor);
560 pOut->bitPosition = (blkOffset & 1) << 2;
561 }
562
563 return returnCode;
564 }
565
566 /**
567 ************************************************************************************************************************
568 * Gfx10Lib::HwlComputeHtileAddrFromCoord
569 *
570 * @brief
571 * Interface function stub of AddrComputeHtileAddrFromCoord
572 *
573 * @return
574 * ADDR_E_RETURNCODE
575 ************************************************************************************************************************
576 */
577 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
578 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
579 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
580 {
581 ADDR_E_RETURNCODE returnCode = ADDR_OK;
582
583 if (pIn->numMipLevels > 1)
584 {
585 returnCode = ADDR_NOTIMPLEMENTED;
586 }
587 else
588 {
589 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
590 input.size = sizeof(input);
591 input.hTileFlags = pIn->hTileFlags;
592 input.depthFlags = pIn->depthflags;
593 input.swizzleMode = pIn->swizzleMode;
594 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
595 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
596 input.numSlices = Max(pIn->numSlices, 1u);
597 input.numMipLevels = 1;
598
599 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
600 output.size = sizeof(output);
601
602 returnCode = ComputeHtileInfo(&input, &output);
603
604 if (returnCode == ADDR_OK)
605 {
606 const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
607 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
608 const UINT_32 index = m_htileBaseIndex + numSampleLog2;
609 const UINT_64* pPattern = HTILE_64K[index];
610 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
611 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
612 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(pPattern,
613 blkSizeLog2 + 1, // +1 for nibble offset
614 pIn->x,
615 pIn->y,
616 pIn->slice,
617 0);
618 const UINT_32 xb = pIn->x / output.metaBlkWidth;
619 const UINT_32 yb = pIn->y / output.metaBlkHeight;
620 const UINT_32 pb = output.pitch / output.metaBlkWidth;
621 const UINT_32 blkIndex = (yb * pb) + xb;
622 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
623
624 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
625 (blkIndex * (1 << blkSizeLog2)) +
626 ((blkOffset >> 1) ^ pipeXor);
627 }
628 }
629
630 return returnCode;
631 }
632
633 /**
634 ************************************************************************************************************************
635 * Gfx10Lib::HwlComputeHtileCoordFromAddr
636 *
637 * @brief
638 * Interface function stub of AddrComputeHtileCoordFromAddr
639 *
640 * @return
641 * ADDR_E_RETURNCODE
642 ************************************************************************************************************************
643 */
644 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
645 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
646 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
647 {
648 ADDR_NOT_IMPLEMENTED();
649
650 return ADDR_OK;
651 }
652
653 /**
654 ************************************************************************************************************************
655 * Gfx10Lib::HwlComputeDccAddrFromCoord
656 *
657 * @brief
658 * Interface function stub of AddrComputeDccAddrFromCoord
659 *
660 * @return
661 * ADDR_E_RETURNCODE
662 ************************************************************************************************************************
663 */
664 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord(
665 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
666 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
667 {
668 ADDR_E_RETURNCODE returnCode = ADDR_OK;
669
670 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
671 (pIn->swizzleMode != ADDR_SW_64KB_R_X) ||
672 (pIn->dccKeyFlags.linear == TRUE) ||
673 (pIn->numFrags > 1) ||
674 (pIn->numMipLevels > 1) ||
675 (pIn->mipId > 0))
676 {
677 returnCode = ADDR_NOTSUPPORTED;
678 }
679 else
680 {
681 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
682 input.size = sizeof(input);
683 input.dccKeyFlags = pIn->dccKeyFlags;
684 input.colorFlags = pIn->colorFlags;
685 input.swizzleMode = pIn->swizzleMode;
686 input.resourceType = pIn->resourceType;
687 input.bpp = pIn->bpp;
688 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
689 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
690 input.numSlices = Max(pIn->numSlices, 1u);
691 input.numFrags = Max(pIn->numFrags, 1u);
692 input.numMipLevels = Max(pIn->numMipLevels, 1u);
693
694 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
695 output.size = sizeof(output);
696
697 returnCode = ComputeDccInfo(&input, &output);
698
699 if (returnCode == ADDR_OK)
700 {
701 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
702 const UINT_32 numPipeLog2 = m_pipesLog2;
703 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
704 const UINT_32 alignPipeType = 7;
705 const UINT_32 unalignPipeType = 3;
706 const UINT_32 numPipeType = alignPipeType + unalignPipeType;
707 UINT_32 index = ((m_pipeInterleaveLog2 - 8) * (MaxNumOfBpp * numPipeType)) + elemLog2;
708
709 if (pIn->dccKeyFlags.pipeAligned)
710 {
711 index += (numPipeLog2 + unalignPipeType) * MaxNumOfBpp;
712 }
713 else
714 {
715 index += Min(numPipeLog2, 2u) * MaxNumOfBpp;
716 }
717
718 const UINT_64* pPattern = DCC_64K_R_X[index];
719 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) + elemLog2 - 8;
720 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
721 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(pPattern,
722 blkSizeLog2 + 1, // +1 for nibble offset
723 pIn->x,
724 pIn->y,
725 pIn->slice,
726 0);
727 const UINT_32 xb = pIn->x / output.metaBlkWidth;
728 const UINT_32 yb = pIn->y / output.metaBlkHeight;
729 const UINT_32 pb = output.pitch / output.metaBlkWidth;
730 const UINT_32 blkIndex = (yb * pb) + xb;
731 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
732
733 pOut->addr = (static_cast<UINT_64>(output.dccRamSliceSize) * pIn->slice) +
734 (blkIndex * (1 << blkSizeLog2)) +
735 ((blkOffset >> 1) ^ pipeXor);
736 }
737 }
738
739 return returnCode;
740 }
741
742 /**
743 ************************************************************************************************************************
744 * Gfx10Lib::HwlInitGlobalParams
745 *
746 * @brief
747 * Initializes global parameters
748 *
749 * @return
750 * TRUE if all settings are valid
751 *
752 ************************************************************************************************************************
753 */
754 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
755 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
756 {
757 BOOL_32 valid = TRUE;
758 GB_ADDR_CONFIG gbAddrConfig;
759
760 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
761
762 // These values are copied from CModel code
763 switch (gbAddrConfig.bits.NUM_PIPES)
764 {
765 case ADDR_CONFIG_1_PIPE:
766 m_pipes = 1;
767 m_pipesLog2 = 0;
768 break;
769 case ADDR_CONFIG_2_PIPE:
770 m_pipes = 2;
771 m_pipesLog2 = 1;
772 break;
773 case ADDR_CONFIG_4_PIPE:
774 m_pipes = 4;
775 m_pipesLog2 = 2;
776 break;
777 case ADDR_CONFIG_8_PIPE:
778 m_pipes = 8;
779 m_pipesLog2 = 3;
780 break;
781 case ADDR_CONFIG_16_PIPE:
782 m_pipes = 16;
783 m_pipesLog2 = 4;
784 break;
785 case ADDR_CONFIG_32_PIPE:
786 m_pipes = 32;
787 m_pipesLog2 = 5;
788 break;
789 case ADDR_CONFIG_64_PIPE:
790 m_pipes = 64;
791 m_pipesLog2 = 6;
792 break;
793 default:
794 ADDR_ASSERT_ALWAYS();
795 valid = FALSE;
796 break;
797 }
798
799 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
800 {
801 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
802 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
803 m_pipeInterleaveLog2 = 8;
804 break;
805 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
806 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
807 m_pipeInterleaveLog2 = 9;
808 break;
809 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
810 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
811 m_pipeInterleaveLog2 = 10;
812 break;
813 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
814 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
815 m_pipeInterleaveLog2 = 11;
816 break;
817 default:
818 ADDR_ASSERT_ALWAYS();
819 valid = FALSE;
820 break;
821 }
822
823 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
824 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
825 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
826
827 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
828 {
829 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
830 m_maxCompFrag = 1;
831 m_maxCompFragLog2 = 0;
832 break;
833 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
834 m_maxCompFrag = 2;
835 m_maxCompFragLog2 = 1;
836 break;
837 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
838 m_maxCompFrag = 4;
839 m_maxCompFragLog2 = 2;
840 break;
841 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
842 m_maxCompFrag = 8;
843 m_maxCompFragLog2 = 3;
844 break;
845 default:
846 ADDR_ASSERT_ALWAYS();
847 valid = FALSE;
848 break;
849 }
850
851 if (m_settings.supportRbPlus)
852 {
853 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
854 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
855
856 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
857
858 const UINT_32 maxPipeInterleaveType = 3;
859
860 m_colorBaseIndex = sizeof(SW_64K_R_X_1xaa_RBPLUS) /
861 sizeof(SW_64K_R_X_1xaa_RBPLUS[0]) /
862 maxPipeInterleaveType *
863 (m_pipeInterleaveLog2 - 8);
864 m_htileBaseIndex = sizeof(HTILE_64K_RBPLUS) /
865 sizeof(HTILE_64K_RBPLUS[0]) /
866 maxPipeInterleaveType *
867 (m_pipeInterleaveLog2 - 8);
868
869 // Skip unaligned case
870 m_htileBaseIndex += MaxNumOfAA;
871
872 if (m_numPkrLog2 < 2)
873 {
874 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
875 m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
876 }
877 else
878 {
879 m_colorBaseIndex += (2 * m_numPkrLog2 - 2 + m_pipesLog2) * MaxNumOfBpp;
880
881 const UINT_32 htilePipePerPkr = 4;
882
883 m_htileBaseIndex += (m_numPkrLog2 - 1) * htilePipePerPkr * MaxNumOfAA +
884 (m_pipesLog2 + 1 - m_numPkrLog2) * MaxNumOfAA;
885 }
886 }
887 else
888 {
889 const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
890 static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) +
891 1;
892
893 m_colorBaseIndex = (m_pipeInterleaveLog2 - 8) * (MaxNumOfBpp * numPipeType) +
894 (m_pipesLog2 * MaxNumOfBpp);
895
896 m_htileBaseIndex = (m_pipeInterleaveLog2 - 8) * (MaxNumOfAA * (numPipeType + 1)) +
897 (m_pipesLog2 + 1) * MaxNumOfAA;
898 }
899
900 if (valid)
901 {
902 InitEquationTable();
903 }
904
905 return valid;
906 }
907
908 /**
909 ************************************************************************************************************************
910 * Gfx10Lib::HwlConvertChipFamily
911 *
912 * @brief
913 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
914 * @return
915 * ChipFamily
916 ************************************************************************************************************************
917 */
918 ChipFamily Gfx10Lib::HwlConvertChipFamily(
919 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
920 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
921 {
922 ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
923
924 m_settings.dccUnsup3DSwDis = 1;
925
926 switch (chipFamily)
927 {
928 case FAMILY_NV:
929 m_settings.isDcn2 = 1;
930 break;
931 default:
932 ADDR_ASSERT(!"Unknown chip family");
933 break;
934 }
935
936 m_settings.dsMipmapHtileFix = 1;
937
938 if (ASICREV_IS_NAVI10_P(chipRevision))
939 {
940 m_settings.dsMipmapHtileFix = 0;
941 }
942
943 m_configFlags.use32bppFor422Fmt = TRUE;
944
945 return family;
946 }
947
948 /**
949 ************************************************************************************************************************
950 * Gfx10Lib::GetBlk256SizeLog2
951 *
952 * @brief
953 * Get block 256 size
954 *
955 * @return
956 * N/A
957 ************************************************************************************************************************
958 */
959 void Gfx10Lib::GetBlk256SizeLog2(
960 AddrResourceType resourceType, ///< [in] Resource type
961 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
962 UINT_32 elemLog2, ///< [in] element size log2
963 UINT_32 numSamplesLog2, ///< [in] number of samples
964 Dim3d* pBlock ///< [out] block size
965 ) const
966 {
967 if (IsThin(resourceType, swizzleMode))
968 {
969 UINT_32 blockBits = 8 - elemLog2;
970
971 if (IsZOrderSwizzle(swizzleMode))
972 {
973 blockBits -= numSamplesLog2;
974 }
975
976 pBlock->w = (blockBits >> 1) + (blockBits & 1);
977 pBlock->h = (blockBits >> 1);
978 pBlock->d = 0;
979 }
980 else
981 {
982 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
983
984 UINT_32 blockBits = 8 - elemLog2;
985
986 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
987 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
988 pBlock->h = (blockBits / 3);
989 }
990 }
991
992 /**
993 ************************************************************************************************************************
994 * Gfx10Lib::GetCompressedBlockSizeLog2
995 *
996 * @brief
997 * Get compress block size
998 *
999 * @return
1000 * N/A
1001 ************************************************************************************************************************
1002 */
1003 void Gfx10Lib::GetCompressedBlockSizeLog2(
1004 Gfx10DataType dataType, ///< [in] Data type
1005 AddrResourceType resourceType, ///< [in] Resource type
1006 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1007 UINT_32 elemLog2, ///< [in] element size log2
1008 UINT_32 numSamplesLog2, ///< [in] number of samples
1009 Dim3d* pBlock ///< [out] block size
1010 ) const
1011 {
1012 if (dataType == Gfx10DataColor)
1013 {
1014 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1015 }
1016 else
1017 {
1018 ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1019 pBlock->w = 3;
1020 pBlock->h = 3;
1021 pBlock->d = 0;
1022 }
1023 }
1024
1025 /**
1026 ************************************************************************************************************************
1027 * Gfx10Lib::GetMetaOverlapLog2
1028 *
1029 * @brief
1030 * Get meta block overlap
1031 *
1032 * @return
1033 * N/A
1034 ************************************************************************************************************************
1035 */
1036 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1037 Gfx10DataType dataType, ///< [in] Data type
1038 AddrResourceType resourceType, ///< [in] Resource type
1039 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1040 UINT_32 elemLog2, ///< [in] element size log2
1041 UINT_32 numSamplesLog2 ///< [in] number of samples
1042 ) const
1043 {
1044 Dim3d compBlock;
1045 Dim3d microBlock;
1046
1047 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1048 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
1049
1050 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
1051 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1052 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
1053 const INT_32 numPipesLog2 = GetEffectiveNumPipes();
1054 INT_32 overlap = numPipesLog2 - maxSizeLog2;
1055
1056 if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1057 {
1058 overlap++;
1059 }
1060
1061 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1062 if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1063 {
1064 overlap--;
1065 }
1066 overlap = Max(overlap, 0);
1067 return overlap;
1068 }
1069
1070 /**
1071 ************************************************************************************************************************
1072 * Gfx10Lib::Get3DMetaOverlapLog2
1073 *
1074 * @brief
1075 * Get 3d meta block overlap
1076 *
1077 * @return
1078 * N/A
1079 ************************************************************************************************************************
1080 */
1081 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1082 AddrResourceType resourceType, ///< [in] Resource type
1083 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1084 UINT_32 elemLog2 ///< [in] element size log2
1085 ) const
1086 {
1087 Dim3d microBlock;
1088 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
1089
1090 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1091
1092 if (m_settings.supportRbPlus)
1093 {
1094 overlap++;
1095 }
1096
1097 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1098 {
1099 overlap = 0;
1100 }
1101 return overlap;
1102 }
1103
1104 /**
1105 ************************************************************************************************************************
1106 * Gfx10Lib::GetPipeRotateAmount
1107 *
1108 * @brief
1109 * Get pipe rotate amount
1110 *
1111 * @return
1112 * Pipe rotate amount
1113 ************************************************************************************************************************
1114 */
1115
1116 INT_32 Gfx10Lib::GetPipeRotateAmount(
1117 AddrResourceType resourceType, ///< [in] Resource type
1118 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
1119 ) const
1120 {
1121 INT_32 amount = 0;
1122
1123 if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1124 {
1125 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1126 1 : m_pipesLog2 - (m_numSaLog2 + 1);
1127 }
1128
1129 return amount;
1130 }
1131
1132 /**
1133 ************************************************************************************************************************
1134 * Gfx10Lib::GetMetaBlkSize
1135 *
1136 * @brief
1137 * Get metadata block size
1138 *
1139 * @return
1140 * Meta block size
1141 ************************************************************************************************************************
1142 */
1143 UINT_32 Gfx10Lib::GetMetaBlkSize(
1144 Gfx10DataType dataType, ///< [in] Data type
1145 AddrResourceType resourceType, ///< [in] Resource type
1146 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1147 UINT_32 elemLog2, ///< [in] element size log2
1148 UINT_32 numSamplesLog2, ///< [in] number of samples
1149 BOOL_32 pipeAlign, ///< [in] pipe align
1150 Dim3d* pBlock ///< [out] block size
1151 ) const
1152 {
1153 INT_32 metablkSizeLog2;
1154 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
1155 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
1156 const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1157 const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1158 numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1159 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
1160 INT_32 numPipesLog2 = m_pipesLog2;
1161
1162 if (IsThin(resourceType, swizzleMode))
1163 {
1164 if ((pipeAlign == FALSE) ||
1165 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1166 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
1167 {
1168 if (pipeAlign)
1169 {
1170 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1171 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1172 }
1173 else
1174 {
1175 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1176 }
1177 }
1178 else
1179 {
1180 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1181 {
1182 numPipesLog2++;
1183 }
1184
1185 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1186
1187 if (numPipesLog2 >= 4)
1188 {
1189 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1190
1191 // In 16Bpe 8xaa, we have an extra overlap bit
1192 if ((pipeRotateLog2 > 0) &&
1193 (elemLog2 == 4) &&
1194 (numSamplesLog2 == 3) &&
1195 (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1196 {
1197 overlapLog2++;
1198 }
1199
1200 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1201 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1202
1203 if (m_settings.supportRbPlus &&
1204 IsRtOptSwizzle(swizzleMode) &&
1205 (numPipesLog2 == 6) &&
1206 (numSamplesLog2 == 3) &&
1207 (m_maxCompFragLog2 == 3) &&
1208 (metablkSizeLog2 < 15))
1209 {
1210 metablkSizeLog2 = 15;
1211 }
1212 }
1213 else
1214 {
1215 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1216 }
1217
1218 if (dataType == Gfx10DataDepthStencil)
1219 {
1220 // For htile surfaces, pad meta block size to 2K * num_pipes
1221 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1222 }
1223
1224 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1225
1226 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1227 {
1228 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1229
1230 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1231 }
1232 }
1233
1234 const INT_32 metablkBitsLog2 =
1235 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1236 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1237 pBlock->h = 1 << (metablkBitsLog2 >> 1);
1238 pBlock->d = 1;
1239 }
1240 else
1241 {
1242 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1243
1244 if (pipeAlign)
1245 {
1246 if (m_settings.supportRbPlus &&
1247 (m_pipesLog2 == m_numSaLog2 + 1) &&
1248 (m_pipesLog2 > 1) &&
1249 IsRbAligned(resourceType, swizzleMode))
1250 {
1251 numPipesLog2++;
1252 }
1253
1254 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1255
1256 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1257 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1258 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1259 }
1260 else
1261 {
1262 metablkSizeLog2 = 12;
1263 }
1264
1265 const INT_32 metablkBitsLog2 =
1266 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1267 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1268 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1269 pBlock->d = 1 << (metablkBitsLog2 / 3);
1270 }
1271
1272 return (1 << static_cast<UINT_32>(metablkSizeLog2));
1273 }
1274
1275 /**
1276 ************************************************************************************************************************
1277 * Gfx10Lib::ConvertSwizzlePatternToEquation
1278 *
1279 * @brief
1280 * Convert swizzle pattern to equation.
1281 *
1282 * @return
1283 * N/A
1284 ************************************************************************************************************************
1285 */
1286 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1287 UINT_32 elemLog2, ///< [in] element bytes log2
1288 AddrResourceType rsrcType, ///< [in] resource type
1289 AddrSwizzleMode swMode, ///< [in] swizzle mode
1290 const UINT_64* pPattern, ///< [in] swizzle pattern
1291 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1292 const
1293 {
1294 const ADDR_BIT_SETTING* pSwizzle = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
1295 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1296
1297 pEquation->numBits = blockSizeLog2;
1298 pEquation->stackedDepthSlices = FALSE;
1299
1300 for (UINT_32 i = 0; i < elemLog2; i++)
1301 {
1302 pEquation->addr[i].channel = 0;
1303 pEquation->addr[i].valid = 1;
1304 pEquation->addr[i].index = i;
1305 }
1306
1307 if (IsXor(swMode) == FALSE)
1308 {
1309 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1310 {
1311 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1312
1313 if (pSwizzle[i].x != 0)
1314 {
1315 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1316
1317 pEquation->addr[i].channel = 0;
1318 pEquation->addr[i].valid = 1;
1319 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1320 }
1321 else if (pSwizzle[i].y != 0)
1322 {
1323 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1324
1325 pEquation->addr[i].channel = 1;
1326 pEquation->addr[i].valid = 1;
1327 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1328 }
1329 else
1330 {
1331 ADDR_ASSERT(pSwizzle[i].z != 0);
1332 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1333
1334 pEquation->addr[i].channel = 2;
1335 pEquation->addr[i].valid = 1;
1336 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1337 }
1338
1339 pEquation->xor1[i].value = 0;
1340 pEquation->xor2[i].value = 0;
1341 }
1342 }
1343 else if (IsThin(rsrcType, swMode))
1344 {
1345 const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_2d[elemLog2].w : Block64K_Log2_2d[elemLog2].w;
1346 const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_2d[elemLog2].h : Block64K_Log2_2d[elemLog2].h;
1347 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1348 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1349
1350 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1351 UINT_32 xMask = 0;
1352 UINT_32 yMask = 0;
1353 UINT_32 bMask = (1 << elemLog2) - 1;
1354
1355 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1356 {
1357 if (IsPow2(pSwizzle[i].value))
1358 {
1359 if (pSwizzle[i].x != 0)
1360 {
1361 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1362 xMask |= pSwizzle[i].x;
1363
1364 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1365
1366 ADDR_ASSERT(xLog2 < blkXLog2);
1367
1368 pEquation->addr[i].channel = 0;
1369 pEquation->addr[i].valid = 1;
1370 pEquation->addr[i].index = xLog2 + elemLog2;
1371 }
1372 else
1373 {
1374 ADDR_ASSERT(pSwizzle[i].y != 0);
1375 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1376 yMask |= pSwizzle[i].y;
1377
1378 pEquation->addr[i].channel = 1;
1379 pEquation->addr[i].valid = 1;
1380 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1381
1382 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1383 }
1384
1385 swizzle[i].value = 0;
1386 bMask |= 1 << i;
1387 }
1388 else
1389 {
1390 if (pSwizzle[i].z != 0)
1391 {
1392 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1393
1394 pEquation->xor2[i].channel = 2;
1395 pEquation->xor2[i].valid = 1;
1396 pEquation->xor2[i].index = Log2(pSwizzle[i].z);
1397 }
1398
1399 swizzle[i].x = pSwizzle[i].x;
1400 swizzle[i].y = pSwizzle[i].y;
1401 swizzle[i].z = swizzle[i].s = 0;
1402
1403 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1404
1405 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1406
1407 if (xHi != 0)
1408 {
1409 ADDR_ASSERT(IsPow2(xHi));
1410 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1411
1412 pEquation->xor1[i].channel = 0;
1413 pEquation->xor1[i].valid = 1;
1414 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1415
1416 swizzle[i].x &= blkXMask;
1417 }
1418
1419 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1420
1421 if (yHi != 0)
1422 {
1423 ADDR_ASSERT(IsPow2(yHi));
1424
1425 if (xHi == 0)
1426 {
1427 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1428 pEquation->xor1[i].channel = 1;
1429 pEquation->xor1[i].valid = 1;
1430 pEquation->xor1[i].index = Log2(yHi);
1431 }
1432 else
1433 {
1434 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1435 pEquation->xor2[i].channel = 1;
1436 pEquation->xor2[i].valid = 1;
1437 pEquation->xor2[i].index = Log2(yHi);
1438 }
1439
1440 swizzle[i].y &= blkYMask;
1441 }
1442
1443 if (swizzle[i].value == 0)
1444 {
1445 bMask |= 1 << i;
1446 }
1447 }
1448 }
1449
1450 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1451 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1452
1453 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1454
1455 while (bMask != blockMask)
1456 {
1457 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1458 {
1459 if ((bMask & (1 << i)) == 0)
1460 {
1461 if (IsPow2(swizzle[i].value))
1462 {
1463 if (swizzle[i].x != 0)
1464 {
1465 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1466 xMask |= swizzle[i].x;
1467
1468 const UINT_32 xLog2 = Log2(swizzle[i].x);
1469
1470 ADDR_ASSERT(xLog2 < blkXLog2);
1471
1472 pEquation->addr[i].channel = 0;
1473 pEquation->addr[i].valid = 1;
1474 pEquation->addr[i].index = xLog2 + elemLog2;
1475 }
1476 else
1477 {
1478 ADDR_ASSERT(swizzle[i].y != 0);
1479 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1480 yMask |= swizzle[i].y;
1481
1482 pEquation->addr[i].channel = 1;
1483 pEquation->addr[i].valid = 1;
1484 pEquation->addr[i].index = Log2(swizzle[i].y);
1485
1486 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1487 }
1488
1489 swizzle[i].value = 0;
1490 bMask |= 1 << i;
1491 }
1492 else
1493 {
1494 const UINT_32 x = swizzle[i].x & xMask;
1495 const UINT_32 y = swizzle[i].y & yMask;
1496
1497 if (x != 0)
1498 {
1499 ADDR_ASSERT(IsPow2(x));
1500
1501 if (pEquation->xor1[i].value == 0)
1502 {
1503 pEquation->xor1[i].channel = 0;
1504 pEquation->xor1[i].valid = 1;
1505 pEquation->xor1[i].index = Log2(x) + elemLog2;
1506 }
1507 else
1508 {
1509 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1510 pEquation->xor2[i].channel = 0;
1511 pEquation->xor2[i].valid = 1;
1512 pEquation->xor2[i].index = Log2(x) + elemLog2;
1513 }
1514 }
1515
1516 if (y != 0)
1517 {
1518 ADDR_ASSERT(IsPow2(y));
1519
1520 if (pEquation->xor1[i].value == 0)
1521 {
1522 pEquation->xor1[i].channel = 1;
1523 pEquation->xor1[i].valid = 1;
1524 pEquation->xor1[i].index = Log2(y);
1525 }
1526 else
1527 {
1528 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1529 pEquation->xor2[i].channel = 1;
1530 pEquation->xor2[i].valid = 1;
1531 pEquation->xor2[i].index = Log2(y);
1532 }
1533 }
1534
1535 swizzle[i].x &= ~x;
1536 swizzle[i].y &= ~y;
1537 }
1538 }
1539 }
1540 }
1541
1542 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1543 }
1544 else if (IsEquationCompatibleThick(rsrcType, swMode))
1545 {
1546 const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1547 const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1548 const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1549 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1550 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1551 const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1552
1553 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1554 UINT_32 xMask = 0;
1555 UINT_32 yMask = 0;
1556 UINT_32 zMask = 0;
1557 UINT_32 bMask = (1 << elemLog2) - 1;
1558
1559 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1560 {
1561 if (IsPow2(pSwizzle[i].value))
1562 {
1563 if (pSwizzle[i].x != 0)
1564 {
1565 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1566 xMask |= pSwizzle[i].x;
1567
1568 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1569
1570 ADDR_ASSERT(xLog2 < blkXLog2);
1571
1572 pEquation->addr[i].channel = 0;
1573 pEquation->addr[i].valid = 1;
1574 pEquation->addr[i].index = xLog2 + elemLog2;
1575 }
1576 else if (pSwizzle[i].y != 0)
1577 {
1578 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1579 yMask |= pSwizzle[i].y;
1580
1581 pEquation->addr[i].channel = 1;
1582 pEquation->addr[i].valid = 1;
1583 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1584
1585 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1586 }
1587 else
1588 {
1589 ADDR_ASSERT(pSwizzle[i].z != 0);
1590 ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1591 zMask |= pSwizzle[i].z;
1592
1593 pEquation->addr[i].channel = 2;
1594 pEquation->addr[i].valid = 1;
1595 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1596
1597 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1598 }
1599
1600 swizzle[i].value = 0;
1601 bMask |= 1 << i;
1602 }
1603 else
1604 {
1605 swizzle[i].x = pSwizzle[i].x;
1606 swizzle[i].y = pSwizzle[i].y;
1607 swizzle[i].z = pSwizzle[i].z;
1608 swizzle[i].s = 0;
1609
1610 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1611
1612 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1613 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1614 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1615
1616 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1617
1618 if (xHi != 0)
1619 {
1620 ADDR_ASSERT(IsPow2(xHi));
1621 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1622
1623 pEquation->xor1[i].channel = 0;
1624 pEquation->xor1[i].valid = 1;
1625 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1626
1627 swizzle[i].x &= blkXMask;
1628 }
1629
1630 if (yHi != 0)
1631 {
1632 ADDR_ASSERT(IsPow2(yHi));
1633
1634 if (pEquation->xor1[i].value == 0)
1635 {
1636 pEquation->xor1[i].channel = 1;
1637 pEquation->xor1[i].valid = 1;
1638 pEquation->xor1[i].index = Log2(yHi);
1639 }
1640 else
1641 {
1642 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1643 pEquation->xor2[i].channel = 1;
1644 pEquation->xor2[i].valid = 1;
1645 pEquation->xor2[i].index = Log2(yHi);
1646 }
1647
1648 swizzle[i].y &= blkYMask;
1649 }
1650
1651 if (zHi != 0)
1652 {
1653 ADDR_ASSERT(IsPow2(zHi));
1654
1655 if (pEquation->xor1[i].value == 0)
1656 {
1657 pEquation->xor1[i].channel = 2;
1658 pEquation->xor1[i].valid = 1;
1659 pEquation->xor1[i].index = Log2(zHi);
1660 }
1661 else
1662 {
1663 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1664 pEquation->xor2[i].channel = 2;
1665 pEquation->xor2[i].valid = 1;
1666 pEquation->xor2[i].index = Log2(zHi);
1667 }
1668
1669 swizzle[i].z &= blkZMask;
1670 }
1671
1672 if (swizzle[i].value == 0)
1673 {
1674 bMask |= 1 << i;
1675 }
1676 }
1677 }
1678
1679 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1680 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1681
1682 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1683
1684 while (bMask != blockMask)
1685 {
1686 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1687 {
1688 if ((bMask & (1 << i)) == 0)
1689 {
1690 if (IsPow2(swizzle[i].value))
1691 {
1692 if (swizzle[i].x != 0)
1693 {
1694 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1695 xMask |= swizzle[i].x;
1696
1697 const UINT_32 xLog2 = Log2(swizzle[i].x);
1698
1699 ADDR_ASSERT(xLog2 < blkXLog2);
1700
1701 pEquation->addr[i].channel = 0;
1702 pEquation->addr[i].valid = 1;
1703 pEquation->addr[i].index = xLog2 + elemLog2;
1704 }
1705 else if (swizzle[i].y != 0)
1706 {
1707 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1708 yMask |= swizzle[i].y;
1709
1710 pEquation->addr[i].channel = 1;
1711 pEquation->addr[i].valid = 1;
1712 pEquation->addr[i].index = Log2(swizzle[i].y);
1713
1714 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1715 }
1716 else
1717 {
1718 ADDR_ASSERT(swizzle[i].z != 0);
1719 ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1720 zMask |= swizzle[i].z;
1721
1722 pEquation->addr[i].channel = 2;
1723 pEquation->addr[i].valid = 1;
1724 pEquation->addr[i].index = Log2(swizzle[i].z);
1725
1726 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1727 }
1728
1729 swizzle[i].value = 0;
1730 bMask |= 1 << i;
1731 }
1732 else
1733 {
1734 const UINT_32 x = swizzle[i].x & xMask;
1735 const UINT_32 y = swizzle[i].y & yMask;
1736 const UINT_32 z = swizzle[i].z & zMask;
1737
1738 if (x != 0)
1739 {
1740 ADDR_ASSERT(IsPow2(x));
1741
1742 if (pEquation->xor1[i].value == 0)
1743 {
1744 pEquation->xor1[i].channel = 0;
1745 pEquation->xor1[i].valid = 1;
1746 pEquation->xor1[i].index = Log2(x) + elemLog2;
1747 }
1748 else
1749 {
1750 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1751 pEquation->xor2[i].channel = 0;
1752 pEquation->xor2[i].valid = 1;
1753 pEquation->xor2[i].index = Log2(x) + elemLog2;
1754 }
1755 }
1756
1757 if (y != 0)
1758 {
1759 ADDR_ASSERT(IsPow2(y));
1760
1761 if (pEquation->xor1[i].value == 0)
1762 {
1763 pEquation->xor1[i].channel = 1;
1764 pEquation->xor1[i].valid = 1;
1765 pEquation->xor1[i].index = Log2(y);
1766 }
1767 else
1768 {
1769 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1770 pEquation->xor2[i].channel = 1;
1771 pEquation->xor2[i].valid = 1;
1772 pEquation->xor2[i].index = Log2(y);
1773 }
1774 }
1775
1776 if (z != 0)
1777 {
1778 ADDR_ASSERT(IsPow2(z));
1779
1780 if (pEquation->xor1[i].value == 0)
1781 {
1782 pEquation->xor1[i].channel = 2;
1783 pEquation->xor1[i].valid = 1;
1784 pEquation->xor1[i].index = Log2(z);
1785 }
1786 else
1787 {
1788 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1789 pEquation->xor2[i].channel = 2;
1790 pEquation->xor2[i].valid = 1;
1791 pEquation->xor2[i].index = Log2(z);
1792 }
1793 }
1794
1795 swizzle[i].x &= ~x;
1796 swizzle[i].y &= ~y;
1797 swizzle[i].z &= ~z;
1798 }
1799 }
1800 }
1801 }
1802
1803 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1804 }
1805 }
1806
1807 /**
1808 ************************************************************************************************************************
1809 * Gfx10Lib::InitEquationTable
1810 *
1811 * @brief
1812 * Initialize Equation table.
1813 *
1814 * @return
1815 * N/A
1816 ************************************************************************************************************************
1817 */
1818 VOID Gfx10Lib::InitEquationTable()
1819 {
1820 memset(m_equationTable, 0, sizeof(m_equationTable));
1821
1822 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1823 {
1824 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1825
1826 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
1827 {
1828 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1829
1830 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1831 {
1832 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1833 const UINT_64* pPattern = GetSwizzlePattern(swMode, rsrcType, elemLog2, 1);
1834
1835 if (pPattern != NULL)
1836 {
1837 ADDR_EQUATION equation = {};
1838
1839 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPattern, &equation);
1840
1841 equationIndex = m_numEquations;
1842 ADDR_ASSERT(equationIndex < EquationTableSize);
1843
1844 m_equationTable[equationIndex] = equation;
1845
1846 m_numEquations++;
1847 }
1848
1849 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1850 }
1851 }
1852 }
1853 }
1854
1855 /**
1856 ************************************************************************************************************************
1857 * Gfx10Lib::HwlGetEquationIndex
1858 *
1859 * @brief
1860 * Interface function stub of GetEquationIndex
1861 *
1862 * @return
1863 * ADDR_E_RETURNCODE
1864 ************************************************************************************************************************
1865 */
1866 UINT_32 Gfx10Lib::HwlGetEquationIndex(
1867 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
1868 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
1869 ) const
1870 {
1871 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1872
1873 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1874 (pIn->resourceType == ADDR_RSRC_TEX_3D))
1875 {
1876 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1877 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
1878 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
1879
1880 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1881 }
1882
1883 if (pOut->pMipInfo != NULL)
1884 {
1885 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1886 {
1887 pOut->pMipInfo[i].equationIndex = equationIdx;
1888 }
1889 }
1890
1891 return equationIdx;
1892 }
1893
1894 /**
1895 ************************************************************************************************************************
1896 * Gfx10Lib::IsValidDisplaySwizzleMode
1897 *
1898 * @brief
1899 * Check if a swizzle mode is supported by display engine
1900 *
1901 * @return
1902 * TRUE is swizzle mode is supported by display engine
1903 ************************************************************************************************************************
1904 */
1905 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
1906 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
1907 ) const
1908 {
1909 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
1910
1911 BOOL_32 support = FALSE;
1912
1913 if (m_settings.isDcn2)
1914 {
1915 switch (pIn->swizzleMode)
1916 {
1917 case ADDR_SW_4KB_D:
1918 case ADDR_SW_4KB_D_X:
1919 case ADDR_SW_64KB_D:
1920 case ADDR_SW_64KB_D_T:
1921 case ADDR_SW_64KB_D_X:
1922 support = (pIn->bpp == 64);
1923 break;
1924
1925 case ADDR_SW_LINEAR:
1926 case ADDR_SW_4KB_S:
1927 case ADDR_SW_4KB_S_X:
1928 case ADDR_SW_64KB_S:
1929 case ADDR_SW_64KB_S_T:
1930 case ADDR_SW_64KB_S_X:
1931 case ADDR_SW_64KB_R_X:
1932 support = (pIn->bpp <= 64);
1933 break;
1934
1935 default:
1936 break;
1937 }
1938 }
1939 else
1940 {
1941 ADDR_NOT_IMPLEMENTED();
1942 }
1943
1944 return support;
1945 }
1946
1947 /**
1948 ************************************************************************************************************************
1949 * Gfx10Lib::GetMaxNumMipsInTail
1950 *
1951 * @brief
1952 * Return max number of mips in tails
1953 *
1954 * @return
1955 * Max number of mips in tails
1956 ************************************************************************************************************************
1957 */
1958 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
1959 UINT_32 blockSizeLog2, ///< block size log2
1960 BOOL_32 isThin ///< is thin or thick
1961 ) const
1962 {
1963 UINT_32 effectiveLog2 = blockSizeLog2;
1964
1965 if (isThin == FALSE)
1966 {
1967 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
1968 }
1969
1970 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
1971 }
1972
1973 /**
1974 ************************************************************************************************************************
1975 * Gfx10Lib::HwlComputePipeBankXor
1976 *
1977 * @brief
1978 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
1979 *
1980 * @return
1981 * PipeBankXor value
1982 ************************************************************************************************************************
1983 */
1984 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
1985 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
1986 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
1987 ) const
1988 {
1989 if (IsNonPrtXor(pIn->swizzleMode))
1990 {
1991 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
1992 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
1993 const UINT_32 bankBits = GetBankXorBits(blockBits);
1994
1995 UINT_32 pipeXor = 0;
1996 UINT_32 bankXor = 0;
1997
1998 if (bankBits != 0)
1999 {
2000 if (blockBits == 16)
2001 {
2002 const UINT_32 XorPatternLen = 8;
2003 static const UINT_32 XorBank1b[XorPatternLen] = {0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80};
2004 static const UINT_32 XorBank2b[XorPatternLen] = {0x00, 0x80, 0x40, 0xC0, 0x80, 0x00, 0xC0, 0x40};
2005 static const UINT_32 XorBank3b[XorPatternLen] = {0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0};
2006
2007 const UINT_32 index = pIn->surfIndex % XorPatternLen;
2008
2009 if (bankBits == 1)
2010 {
2011 bankXor = XorBank1b[index];
2012 }
2013 else if (bankBits == 2)
2014 {
2015 bankXor = XorBank2b[index];
2016 }
2017 else
2018 {
2019 bankXor = XorBank3b[index];
2020
2021 if (bankBits == 4)
2022 {
2023 bankXor >>= (2 - pipeBits);
2024 }
2025 }
2026 }
2027 }
2028
2029 pOut->pipeBankXor = bankXor | pipeXor;
2030 }
2031 else
2032 {
2033 pOut->pipeBankXor = 0;
2034 }
2035
2036 return ADDR_OK;
2037 }
2038
2039 /**
2040 ************************************************************************************************************************
2041 * Gfx10Lib::HwlComputeSlicePipeBankXor
2042 *
2043 * @brief
2044 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2045 *
2046 * @return
2047 * PipeBankXor value
2048 ************************************************************************************************************************
2049 */
2050 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2051 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2052 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2053 ) const
2054 {
2055 if (IsNonPrtXor(pIn->swizzleMode))
2056 {
2057 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2058 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2059 const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2060
2061 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2062 }
2063 else
2064 {
2065 pOut->pipeBankXor = 0;
2066 }
2067
2068 return ADDR_OK;
2069 }
2070
2071 /**
2072 ************************************************************************************************************************
2073 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2074 *
2075 * @brief
2076 * Compute sub resource offset to support swizzle pattern
2077 *
2078 * @return
2079 * Offset
2080 ************************************************************************************************************************
2081 */
2082 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2083 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
2084 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
2085 ) const
2086 {
2087 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2088
2089 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2090
2091 return ADDR_OK;
2092 }
2093
2094 /**
2095 ************************************************************************************************************************
2096 * Gfx10Lib::ValidateNonSwModeParams
2097 *
2098 * @brief
2099 * Validate compute surface info params except swizzle mode
2100 *
2101 * @return
2102 * TRUE if parameters are valid, FALSE otherwise
2103 ************************************************************************************************************************
2104 */
2105 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2106 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2107 {
2108 BOOL_32 valid = TRUE;
2109
2110 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2111 {
2112 ADDR_ASSERT_ALWAYS();
2113 valid = FALSE;
2114 }
2115
2116 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2117 {
2118 ADDR_ASSERT_ALWAYS();
2119 valid = FALSE;
2120 }
2121
2122 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2123 const AddrResourceType rsrcType = pIn->resourceType;
2124 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2125 const BOOL_32 msaa = (pIn->numFrags > 1);
2126 const BOOL_32 display = flags.display;
2127 const BOOL_32 tex3d = IsTex3d(rsrcType);
2128 const BOOL_32 tex2d = IsTex2d(rsrcType);
2129 const BOOL_32 tex1d = IsTex1d(rsrcType);
2130 const BOOL_32 stereo = flags.qbStereo;
2131
2132 // Resource type check
2133 if (tex1d)
2134 {
2135 if (msaa || display || stereo)
2136 {
2137 ADDR_ASSERT_ALWAYS();
2138 valid = FALSE;
2139 }
2140 }
2141 else if (tex2d)
2142 {
2143 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2144 {
2145 ADDR_ASSERT_ALWAYS();
2146 valid = FALSE;
2147 }
2148 }
2149 else if (tex3d)
2150 {
2151 if (msaa || display || stereo)
2152 {
2153 ADDR_ASSERT_ALWAYS();
2154 valid = FALSE;
2155 }
2156 }
2157 else
2158 {
2159 ADDR_ASSERT_ALWAYS();
2160 valid = FALSE;
2161 }
2162
2163 return valid;
2164 }
2165
2166 /**
2167 ************************************************************************************************************************
2168 * Gfx10Lib::ValidateSwModeParams
2169 *
2170 * @brief
2171 * Validate compute surface info related to swizzle mode
2172 *
2173 * @return
2174 * TRUE if parameters are valid, FALSE otherwise
2175 ************************************************************************************************************************
2176 */
2177 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2178 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2179 {
2180 BOOL_32 valid = TRUE;
2181
2182 if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2183 {
2184 ADDR_ASSERT_ALWAYS();
2185 valid = FALSE;
2186 }
2187
2188 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2189 const AddrResourceType rsrcType = pIn->resourceType;
2190 const AddrSwizzleMode swizzle = pIn->swizzleMode;
2191 const BOOL_32 msaa = (pIn->numFrags > 1);
2192 const BOOL_32 zbuffer = flags.depth || flags.stencil;
2193 const BOOL_32 color = flags.color;
2194 const BOOL_32 display = flags.display;
2195 const BOOL_32 tex3d = IsTex3d(rsrcType);
2196 const BOOL_32 tex2d = IsTex2d(rsrcType);
2197 const BOOL_32 tex1d = IsTex1d(rsrcType);
2198 const BOOL_32 thin3d = flags.view3dAs2dArray;
2199 const BOOL_32 linear = IsLinear(swizzle);
2200 const BOOL_32 blk256B = IsBlock256b(swizzle);
2201 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2202 const BOOL_32 prt = flags.prt;
2203
2204 // Misc check
2205 if ((pIn->numFrags > 1) &&
2206 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2207 {
2208 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2209 ADDR_ASSERT_ALWAYS();
2210 valid = FALSE;
2211 }
2212
2213 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2214 {
2215 ADDR_ASSERT_ALWAYS();
2216 valid = FALSE;
2217 }
2218
2219 if ((pIn->bpp == 96) && (linear == FALSE))
2220 {
2221 ADDR_ASSERT_ALWAYS();
2222 valid = FALSE;
2223 }
2224
2225 const UINT_32 swizzleMask = 1 << swizzle;
2226
2227 // Resource type check
2228 if (tex1d)
2229 {
2230 if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2231 {
2232 ADDR_ASSERT_ALWAYS();
2233 valid = FALSE;
2234 }
2235 }
2236 else if (tex2d)
2237 {
2238 if (((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0) ||
2239 (prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)))
2240 {
2241 ADDR_ASSERT_ALWAYS();
2242 valid = FALSE;
2243 }
2244 }
2245 else if (tex3d)
2246 {
2247 if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2248 (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2249 (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2250 {
2251 ADDR_ASSERT_ALWAYS();
2252 valid = FALSE;
2253 }
2254 }
2255
2256 // Swizzle type check
2257 if (linear)
2258 {
2259 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2260 {
2261 ADDR_ASSERT_ALWAYS();
2262 valid = FALSE;
2263 }
2264 }
2265 else if (IsZOrderSwizzle(swizzle))
2266 {
2267 if ((pIn->bpp > 64) ||
2268 (msaa && (color || (pIn->bpp > 32))) ||
2269 ElemLib::IsBlockCompressed(pIn->format) ||
2270 ElemLib::IsMacroPixelPacked(pIn->format))
2271 {
2272 ADDR_ASSERT_ALWAYS();
2273 valid = FALSE;
2274 }
2275 }
2276 else if (IsStandardSwizzle(rsrcType, swizzle))
2277 {
2278 if (zbuffer || msaa)
2279 {
2280 ADDR_ASSERT_ALWAYS();
2281 valid = FALSE;
2282 }
2283 }
2284 else if (IsDisplaySwizzle(rsrcType, swizzle))
2285 {
2286 if (zbuffer || msaa)
2287 {
2288 ADDR_ASSERT_ALWAYS();
2289 valid = FALSE;
2290 }
2291 }
2292 else if (IsRtOptSwizzle(swizzle))
2293 {
2294 if (zbuffer)
2295 {
2296 ADDR_ASSERT_ALWAYS();
2297 valid = FALSE;
2298 }
2299 }
2300 else
2301 {
2302 ADDR_ASSERT_ALWAYS();
2303 valid = FALSE;
2304 }
2305
2306 // Block type check
2307 if (blk256B)
2308 {
2309 if (zbuffer || tex3d || msaa)
2310 {
2311 ADDR_ASSERT_ALWAYS();
2312 valid = FALSE;
2313 }
2314 }
2315
2316 return valid;
2317 }
2318
2319 /**
2320 ************************************************************************************************************************
2321 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2322 *
2323 * @brief
2324 * Compute surface info sanity check
2325 *
2326 * @return
2327 * Offset
2328 ************************************************************************************************************************
2329 */
2330 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2331 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2332 ) const
2333 {
2334 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2335 }
2336
2337 /**
2338 ************************************************************************************************************************
2339 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2340 *
2341 * @brief
2342 * Internal function to get suggested surface information for cliet to use
2343 *
2344 * @return
2345 * ADDR_E_RETURNCODE
2346 ************************************************************************************************************************
2347 */
2348 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2349 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2350 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2351 ) const
2352 {
2353 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2354
2355 if (pIn->flags.fmask)
2356 {
2357 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2358 pOut->resourceType = ADDR_RSRC_TEX_2D;
2359 pOut->validBlockSet.value = AddrBlockSetMacro64KB;
2360 pOut->canXor = TRUE;
2361 pOut->validSwTypeSet.value = AddrSwSetZ;
2362 pOut->clientPreferredSwSet = pOut->validSwTypeSet;
2363 pOut->validSwModeSet.value = Gfx10ZSwModeMask;
2364 }
2365 else
2366 {
2367 UINT_32 bpp = pIn->bpp;
2368 UINT_32 width = Max(pIn->width, 1u);
2369 UINT_32 height = Max(pIn->height, 1u);
2370
2371 // Set format to INVALID will skip this conversion
2372 if (pIn->format != ADDR_FMT_INVALID)
2373 {
2374 ElemMode elemMode = ADDR_UNCOMPRESSED;
2375 UINT_32 expandX, expandY;
2376
2377 // Get compression/expansion factors and element mode which indicates compression/expansion
2378 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2379 &elemMode,
2380 &expandX,
2381 &expandY);
2382
2383 UINT_32 basePitch = 0;
2384 GetElemLib()->AdjustSurfaceInfo(elemMode,
2385 expandX,
2386 expandY,
2387 &bpp,
2388 &basePitch,
2389 &width,
2390 &height);
2391 }
2392
2393 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2394 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2395 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2396 const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2397 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
2398
2399 // Pre sanity check on non swizzle mode parameters
2400 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2401 localIn.flags = pIn->flags;
2402 localIn.resourceType = pIn->resourceType;
2403 localIn.format = pIn->format;
2404 localIn.bpp = bpp;
2405 localIn.width = width;
2406 localIn.height = height;
2407 localIn.numSlices = numSlices;
2408 localIn.numMipLevels = numMipLevels;
2409 localIn.numSamples = numSamples;
2410 localIn.numFrags = numFrags;
2411
2412 if (ValidateNonSwModeParams(&localIn))
2413 {
2414 // Forbid swizzle mode(s) by client setting
2415 ADDR2_SWMODE_SET allowedSwModeSet = {};
2416 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2417 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;
2418 allowedSwModeSet.value |= pIn->forbiddenBlock.macro4KB ? 0 : Gfx10Blk4KBSwModeMask;
2419 allowedSwModeSet.value |= pIn->forbiddenBlock.macro64KB ? 0 : Gfx10Blk64KBSwModeMask;
2420
2421 if (pIn->preferredSwSet.value != 0)
2422 {
2423 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2424 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2425 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2426 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2427 }
2428
2429 if (pIn->noXor)
2430 {
2431 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2432 }
2433
2434 if (pIn->maxAlign > 0)
2435 {
2436 if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
2437 {
2438 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2439 }
2440
2441 if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
2442 {
2443 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2444 }
2445
2446 if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
2447 {
2448 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2449 }
2450 }
2451
2452 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2453 switch (pIn->resourceType)
2454 {
2455 case ADDR_RSRC_TEX_1D:
2456 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2457 break;
2458
2459 case ADDR_RSRC_TEX_2D:
2460 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2461 break;
2462
2463 case ADDR_RSRC_TEX_3D:
2464 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2465
2466 if (m_settings.supportRbPlus)
2467 {
2468 allowedSwModeSet.value &= ~Gfx10DisplaySwModeMask;
2469 }
2470
2471 if (pIn->flags.view3dAs2dArray)
2472 {
2473 allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2474 }
2475 break;
2476
2477 default:
2478 ADDR_ASSERT_ALWAYS();
2479 allowedSwModeSet.value = 0;
2480 break;
2481 }
2482
2483 if (ElemLib::IsBlockCompressed(pIn->format) ||
2484 ElemLib::IsMacroPixelPacked(pIn->format) ||
2485 (bpp > 64) ||
2486 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2487 {
2488 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2489 }
2490
2491 if (pIn->format == ADDR_FMT_32_32_32)
2492 {
2493 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2494 }
2495
2496 if (msaa)
2497 {
2498 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2499 }
2500
2501 if (pIn->flags.depth || pIn->flags.stencil)
2502 {
2503 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2504 }
2505
2506 if (pIn->flags.display)
2507 {
2508 if (m_settings.isDcn2)
2509 {
2510 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
2511 }
2512 else
2513 {
2514 ADDR_NOT_IMPLEMENTED();
2515 }
2516 }
2517
2518 if (allowedSwModeSet.value != 0)
2519 {
2520 #if DEBUG
2521 // Post sanity check, at least AddrLib should accept the output generated by its own
2522 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2523
2524 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2525 {
2526 if (validateSwModeSet & 1)
2527 {
2528 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2529 ADDR_ASSERT(ValidateSwModeParams(&localIn));
2530 }
2531
2532 validateSwModeSet >>= 1;
2533 }
2534 #endif
2535
2536 pOut->resourceType = pIn->resourceType;
2537 pOut->validSwModeSet = allowedSwModeSet;
2538 pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
2539 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet);
2540 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
2541
2542 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2543
2544 if (pOut->clientPreferredSwSet.value == 0)
2545 {
2546 pOut->clientPreferredSwSet.value = AddrSwSetAll;
2547 }
2548
2549 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
2550 {
2551 pOut->swizzleMode = ADDR_SW_LINEAR;
2552 }
2553 else
2554 {
2555 // Always ignore linear swizzle mode if there is other choice.
2556 allowedSwModeSet.swLinear = 0;
2557
2558 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet);
2559
2560 // Determine block size if there is 2 or more block type candidates
2561 if (IsPow2(allowedBlockSet.value) == FALSE)
2562 {
2563 const AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_256B, ADDR_SW_4KB, ADDR_SW_64KB};
2564 Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
2565 Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
2566 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
2567
2568 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2569 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2570 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2571 UINT_32 minSizeBlk = AddrBlockMicro;
2572 UINT_64 minSize = 0;
2573
2574 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2575 {
2576 if (allowedBlockSet.value & (1 << i))
2577 {
2578 ComputeBlockDimensionForSurf(&blkDim[i].w,
2579 &blkDim[i].h,
2580 &blkDim[i].d,
2581 bpp,
2582 numFrags,
2583 pOut->resourceType,
2584 swMode[i]);
2585
2586 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2587 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2588
2589 if ((minSize == 0) ||
2590 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
2591 {
2592 minSize = padSize[i];
2593 minSizeBlk = i;
2594 }
2595 }
2596 }
2597
2598 if ((allowedBlockSet.micro == TRUE) &&
2599 (width <= blkDim[AddrBlockMicro].w) &&
2600 (height <= blkDim[AddrBlockMicro].h))
2601 {
2602 minSizeBlk = AddrBlockMicro;
2603 }
2604
2605 if (minSizeBlk == AddrBlockMicro)
2606 {
2607 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
2608 }
2609 else if (minSizeBlk == AddrBlock4KB)
2610 {
2611 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
2612 }
2613 else
2614 {
2615 ADDR_ASSERT(minSizeBlk == AddrBlock64KB);
2616 allowedSwModeSet.value &= Gfx10Blk64KBSwModeMask;
2617 }
2618 }
2619
2620 // Block type should be determined.
2621 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet).value));
2622
2623 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
2624
2625 // Determine swizzle type if there is 2 or more swizzle type candidates
2626 if (IsPow2(allowedSwSet.value) == FALSE)
2627 {
2628 if (ElemLib::IsBlockCompressed(pIn->format))
2629 {
2630 if (allowedSwSet.sw_D)
2631 {
2632 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2633 }
2634 else if (allowedSwSet.sw_S)
2635 {
2636 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2637 }
2638 else
2639 {
2640 ADDR_ASSERT(allowedSwSet.sw_R);
2641 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2642 }
2643 }
2644 else if (ElemLib::IsMacroPixelPacked(pIn->format))
2645 {
2646 if (allowedSwSet.sw_S)
2647 {
2648 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2649 }
2650 else if (allowedSwSet.sw_D)
2651 {
2652 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2653 }
2654 else
2655 {
2656 ADDR_ASSERT(allowedSwSet.sw_R);
2657 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2658 }
2659 }
2660 else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2661 {
2662 if (pIn->flags.color && GetAllowedBlockSet(allowedSwModeSet).macro64KB && allowedSwSet.sw_D)
2663 {
2664 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2665 }
2666 else if (allowedSwSet.sw_S)
2667 {
2668 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2669 }
2670 else if (allowedSwSet.sw_R)
2671 {
2672 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2673 }
2674 else
2675 {
2676 ADDR_ASSERT(allowedSwSet.sw_Z);
2677 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2678 }
2679 }
2680 else
2681 {
2682 if (allowedSwSet.sw_R)
2683 {
2684 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2685 }
2686 else if (allowedSwSet.sw_D)
2687 {
2688 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2689 }
2690 else if (allowedSwSet.sw_S)
2691 {
2692 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2693 }
2694 else
2695 {
2696 ADDR_ASSERT(allowedSwSet.sw_Z);
2697 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2698 }
2699 }
2700 }
2701
2702 // Swizzle type should be determined.
2703 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
2704
2705 // Determine swizzle mode now - always select the "largest" swizzle mode for a given block type +
2706 // swizzle type combination. For example, for AddrBlock64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2707 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2708 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
2709 }
2710 }
2711 else
2712 {
2713 // Invalid combination...
2714 ADDR_ASSERT_ALWAYS();
2715 returnCode = ADDR_INVALIDPARAMS;
2716 }
2717 }
2718 else
2719 {
2720 // Invalid combination...
2721 ADDR_ASSERT_ALWAYS();
2722 returnCode = ADDR_INVALIDPARAMS;
2723 }
2724 }
2725
2726 return returnCode;
2727 }
2728
2729 /**
2730 ************************************************************************************************************************
2731 * Gfx10Lib::ComputeStereoInfo
2732 *
2733 * @brief
2734 * Compute height alignment and right eye pipeBankXor for stereo surface
2735 *
2736 * @return
2737 * Error code
2738 *
2739 ************************************************************************************************************************
2740 */
2741 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
2742 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
2743 UINT_32 blkHeight, ///< Block height
2744 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
2745 UINT_32* pRightXor ///< Right eye xor
2746 ) const
2747 {
2748 ADDR_E_RETURNCODE ret = ADDR_OK;
2749
2750 *pAlignY = 1;
2751 *pRightXor = 0;
2752
2753 if (IsNonPrtXor(pIn->swizzleMode))
2754 {
2755 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
2756 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
2757 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
2758 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
2759 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
2760
2761 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
2762 {
2763 UINT_32 yMax = 0;
2764 UINT_32 yPos = 0;
2765
2766 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
2767 {
2768 if (m_equationTable[eqIndex].xor1[i].value == 0)
2769 {
2770 break;
2771 }
2772
2773 ADDR_ASSERT(m_equationTable[eqIndex].xor1[i].valid == 1);
2774
2775 if ((m_equationTable[eqIndex].xor1[i].channel == 1) &&
2776 (m_equationTable[eqIndex].xor1[i].index > yMax))
2777 {
2778 yMax = m_equationTable[eqIndex].xor1[i].index;
2779 yPos = i;
2780 }
2781 }
2782
2783 const UINT_32 additionalAlign = 1 << yMax;
2784
2785 if (additionalAlign >= blkHeight)
2786 {
2787 *pAlignY *= (additionalAlign / blkHeight);
2788
2789 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
2790
2791 if ((alignedHeight >> yMax) & 1)
2792 {
2793 *pRightXor = 1 << (yPos - m_pipeInterleaveLog2);
2794 }
2795 }
2796 }
2797 else
2798 {
2799 ret = ADDR_INVALIDPARAMS;
2800 }
2801 }
2802
2803 return ret;
2804 }
2805
2806 /**
2807 ************************************************************************************************************************
2808 * Gfx10Lib::HwlComputeSurfaceInfoTiled
2809 *
2810 * @brief
2811 * Internal function to calculate alignment for tiled surface
2812 *
2813 * @return
2814 * ADDR_E_RETURNCODE
2815 ************************************************************************************************************************
2816 */
2817 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
2818 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
2819 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
2820 ) const
2821 {
2822 ADDR_E_RETURNCODE ret;
2823
2824 if (IsBlock256b(pIn->swizzleMode))
2825 {
2826 ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
2827 }
2828 else
2829 {
2830 ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
2831 }
2832
2833 return ret;
2834 }
2835
2836 /**
2837 ************************************************************************************************************************
2838 * Gfx10Lib::ComputeSurfaceInfoMicroTiled
2839 *
2840 * @brief
2841 * Internal function to calculate alignment for micro tiled surface
2842 *
2843 * @return
2844 * ADDR_E_RETURNCODE
2845 ************************************************************************************************************************
2846 */
2847 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
2848 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
2849 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
2850 ) const
2851 {
2852 ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
2853 &pOut->blockHeight,
2854 &pOut->blockSlices,
2855 pIn->bpp,
2856 pIn->numFrags,
2857 pIn->resourceType,
2858 pIn->swizzleMode);
2859
2860 if (ret == ADDR_OK)
2861 {
2862 pOut->mipChainPitch = 0;
2863 pOut->mipChainHeight = 0;
2864 pOut->mipChainSlice = 0;
2865 pOut->epitchIsHeight = FALSE;
2866 pOut->mipChainInTail = FALSE;
2867 pOut->firstMipIdInTail = pIn->numMipLevels;
2868
2869 const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
2870
2871 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
2872 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
2873 pOut->numSlices = pIn->numSlices;
2874 pOut->baseAlign = blockSize;
2875
2876 if (pIn->numMipLevels > 1)
2877 {
2878 const UINT_32 mip0Width = pIn->width;
2879 const UINT_32 mip0Height = pIn->height;
2880 UINT_64 mipSliceSize = 0;
2881
2882 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
2883 {
2884 UINT_32 mipWidth, mipHeight;
2885
2886 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
2887
2888 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);
2889 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
2890
2891 if (pOut->pMipInfo != NULL)
2892 {
2893 pOut->pMipInfo[i].pitch = mipActualWidth;
2894 pOut->pMipInfo[i].height = mipActualHeight;
2895 pOut->pMipInfo[i].depth = 1;
2896 pOut->pMipInfo[i].offset = mipSliceSize;
2897 pOut->pMipInfo[i].mipTailOffset = 0;
2898 pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
2899 }
2900
2901 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
2902 }
2903
2904 pOut->sliceSize = mipSliceSize;
2905 pOut->surfSize = mipSliceSize * pOut->numSlices;
2906 }
2907 else
2908 {
2909 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
2910 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
2911
2912 if (pOut->pMipInfo != NULL)
2913 {
2914 pOut->pMipInfo[0].pitch = pOut->pitch;
2915 pOut->pMipInfo[0].height = pOut->height;
2916 pOut->pMipInfo[0].depth = 1;
2917 pOut->pMipInfo[0].offset = 0;
2918 pOut->pMipInfo[0].mipTailOffset = 0;
2919 pOut->pMipInfo[0].macroBlockOffset = 0;
2920 }
2921 }
2922
2923 }
2924
2925 return ret;
2926 }
2927
2928 /**
2929 ************************************************************************************************************************
2930 * Gfx10Lib::ComputeSurfaceInfoMacroTiled
2931 *
2932 * @brief
2933 * Internal function to calculate alignment for macro tiled surface
2934 *
2935 * @return
2936 * ADDR_E_RETURNCODE
2937 ************************************************************************************************************************
2938 */
2939 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
2940 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
2941 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
2942 ) const
2943 {
2944 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
2945 &pOut->blockHeight,
2946 &pOut->blockSlices,
2947 pIn->bpp,
2948 pIn->numFrags,
2949 pIn->resourceType,
2950 pIn->swizzleMode);
2951
2952 if (returnCode == ADDR_OK)
2953 {
2954 UINT_32 heightAlign = pOut->blockHeight;
2955
2956 if (pIn->flags.qbStereo)
2957 {
2958 UINT_32 rightXor = 0;
2959 UINT_32 alignY = 1;
2960
2961 returnCode = ComputeStereoInfo(pIn, heightAlign, &alignY, &rightXor);
2962
2963 if (returnCode == ADDR_OK)
2964 {
2965 pOut->pStereoInfo->rightSwizzle = rightXor;
2966
2967 heightAlign *= alignY;
2968 }
2969 }
2970
2971 if (returnCode == ADDR_OK)
2972 {
2973 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
2974 pOut->mipChainPitch = 0;
2975 pOut->mipChainHeight = 0;
2976 pOut->mipChainSlice = 0;
2977 pOut->epitchIsHeight = FALSE;
2978 pOut->mipChainInTail = FALSE;
2979 pOut->firstMipIdInTail = pIn->numMipLevels;
2980
2981 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
2982 const UINT_32 blockSize = 1 << blockSizeLog2;
2983
2984 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
2985 pOut->height = PowTwoAlign(pIn->height, heightAlign);
2986 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
2987 pOut->baseAlign = blockSize;
2988
2989 if (pIn->numMipLevels > 1)
2990 {
2991 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
2992 pIn->swizzleMode,
2993 pOut->blockWidth,
2994 pOut->blockHeight,
2995 pOut->blockSlices);
2996 const UINT_32 mip0Width = pIn->width;
2997 const UINT_32 mip0Height = pIn->height;
2998 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
2999 const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;
3000 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3001 const UINT_32 index = Log2(pIn->bpp >> 3);
3002 UINT_32 firstMipInTail = pIn->numMipLevels;
3003 UINT_64 mipChainSliceSize = 0;
3004 UINT_64 mipSize[MaxMipLevels];
3005 UINT_64 mipSliceSize[MaxMipLevels];
3006
3007 Dim3d fixedTailMaxDim = tailMaxDim;
3008
3009 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3010 {
3011 fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3012 fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3013 }
3014
3015 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3016 {
3017 UINT_32 mipWidth, mipHeight, mipDepth;
3018
3019 GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3020
3021 if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3022 {
3023 firstMipInTail = i;
3024 mipChainSliceSize += blockSize / pOut->blockSlices;
3025 break;
3026 }
3027 else
3028 {
3029 const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);
3030 const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);
3031 const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);
3032 const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3033
3034 mipSize[i] = sliceSize * depth;
3035 mipSliceSize[i] = sliceSize * pOut->blockSlices;
3036 mipChainSliceSize += sliceSize;
3037
3038 if (pOut->pMipInfo != NULL)
3039 {
3040 pOut->pMipInfo[i].pitch = pitch;
3041 pOut->pMipInfo[i].height = height;
3042 pOut->pMipInfo[i].depth = depth;
3043 }
3044 }
3045 }
3046
3047 pOut->sliceSize = mipChainSliceSize;
3048 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
3049 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
3050 pOut->firstMipIdInTail = firstMipInTail;
3051
3052 if (pOut->pMipInfo != NULL)
3053 {
3054 UINT_64 offset = 0;
3055 UINT_64 macroBlkOffset = 0;
3056 UINT_32 tailMaxDepth = 0;
3057
3058 if (firstMipInTail != pIn->numMipLevels)
3059 {
3060 UINT_32 mipWidth, mipHeight;
3061
3062 GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3063 &mipWidth, &mipHeight, &tailMaxDepth);
3064
3065 offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3066 macroBlkOffset = blockSize;
3067 }
3068
3069 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3070 {
3071 pOut->pMipInfo[i].offset = offset;
3072 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3073 pOut->pMipInfo[i].mipTailOffset = 0;
3074
3075 offset += mipSize[i];
3076 macroBlkOffset += mipSliceSize[i];
3077 }
3078
3079 UINT_32 pitch = tailMaxDim.w;
3080 UINT_32 height = tailMaxDim.h;
3081 UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3082
3083 tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3084
3085 for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3086 {
3087 const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);
3088 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3089
3090 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
3091 pOut->pMipInfo[i].mipTailOffset = mipOffset;
3092 pOut->pMipInfo[i].macroBlockOffset = 0;
3093
3094 pOut->pMipInfo[i].pitch = pitch;
3095 pOut->pMipInfo[i].height = height;
3096 pOut->pMipInfo[i].depth = depth;
3097
3098 UINT_32 mipX = ((mipOffset >> 9) & 1) |
3099 ((mipOffset >> 10) & 2) |
3100 ((mipOffset >> 11) & 4) |
3101 ((mipOffset >> 12) & 8) |
3102 ((mipOffset >> 13) & 16) |
3103 ((mipOffset >> 14) & 32);
3104 UINT_32 mipY = ((mipOffset >> 8) & 1) |
3105 ((mipOffset >> 9) & 2) |
3106 ((mipOffset >> 10) & 4) |
3107 ((mipOffset >> 11) & 8) |
3108 ((mipOffset >> 12) & 16) |
3109 ((mipOffset >> 13) & 32);
3110
3111 if (blockSizeLog2 & 1)
3112 {
3113 const UINT_32 temp = mipX;
3114 mipX = mipY;
3115 mipY = temp;
3116
3117 if (index & 1)
3118 {
3119 mipY = (mipY << 1) | (mipX & 1);
3120 mipX = mipX >> 1;
3121 }
3122 }
3123
3124 if (isThin)
3125 {
3126 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3127 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3128 pOut->pMipInfo[i].mipTailCoordZ = 0;
3129
3130 pitch = Max(pitch >> 1, Block256_2d[index].w);
3131 height = Max(height >> 1, Block256_2d[index].h);
3132 depth = 1;
3133 }
3134 else
3135 {
3136 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3137 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3138 pOut->pMipInfo[i].mipTailCoordZ = 0;
3139
3140 pitch = Max(pitch >> 1, Block256_3d[index].w);
3141 height = Max(height >> 1, Block256_3d[index].h);
3142 depth = PowTwoAlign(Max(depth >> 1, 1u), Block256_3d[index].d);
3143 }
3144 }
3145 }
3146 }
3147 else
3148 {
3149 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3150 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3151
3152 if (pOut->pMipInfo != NULL)
3153 {
3154 pOut->pMipInfo[0].pitch = pOut->pitch;
3155 pOut->pMipInfo[0].height = pOut->height;
3156 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3157 pOut->pMipInfo[0].offset = 0;
3158 pOut->pMipInfo[0].mipTailOffset = 0;
3159 pOut->pMipInfo[0].macroBlockOffset = 0;
3160 pOut->pMipInfo[0].mipTailCoordX = 0;
3161 pOut->pMipInfo[0].mipTailCoordY = 0;
3162 pOut->pMipInfo[0].mipTailCoordZ = 0;
3163 }
3164 }
3165 }
3166 }
3167
3168 return returnCode;
3169 }
3170
3171 /**
3172 ************************************************************************************************************************
3173 * Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3174 *
3175 * @brief
3176 * Internal function to calculate address from coord for tiled swizzle surface
3177 *
3178 * @return
3179 * ADDR_E_RETURNCODE
3180 ************************************************************************************************************************
3181 */
3182 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3183 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3184 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3185 ) const
3186 {
3187 ADDR_E_RETURNCODE ret;
3188
3189 if (IsBlock256b(pIn->swizzleMode))
3190 {
3191 ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3192 }
3193 else
3194 {
3195 ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3196 }
3197
3198 return ret;
3199 }
3200
3201 /**
3202 ************************************************************************************************************************
3203 * Gfx10Lib::ComputeOffsetFromEquation
3204 *
3205 * @brief
3206 * Compute offset from equation
3207 *
3208 * @return
3209 * Offset
3210 ************************************************************************************************************************
3211 */
3212 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3213 const ADDR_EQUATION* pEq, ///< Equation
3214 UINT_32 x, ///< x coord in bytes
3215 UINT_32 y, ///< y coord in pixel
3216 UINT_32 z ///< z coord in slice
3217 ) const
3218 {
3219 UINT_32 offset = 0;
3220
3221 for (UINT_32 i = 0; i < pEq->numBits; i++)
3222 {
3223 UINT_32 v = 0;
3224
3225 if (pEq->addr[i].valid)
3226 {
3227 if (pEq->addr[i].channel == 0)
3228 {
3229 v ^= (x >> pEq->addr[i].index) & 1;
3230 }
3231 else if (pEq->addr[i].channel == 1)
3232 {
3233 v ^= (y >> pEq->addr[i].index) & 1;
3234 }
3235 else
3236 {
3237 ADDR_ASSERT(pEq->addr[i].channel == 2);
3238 v ^= (z >> pEq->addr[i].index) & 1;
3239 }
3240 }
3241
3242 if (pEq->xor1[i].valid)
3243 {
3244 if (pEq->xor1[i].channel == 0)
3245 {
3246 v ^= (x >> pEq->xor1[i].index) & 1;
3247 }
3248 else if (pEq->xor1[i].channel == 1)
3249 {
3250 v ^= (y >> pEq->xor1[i].index) & 1;
3251 }
3252 else
3253 {
3254 ADDR_ASSERT(pEq->xor1[i].channel == 2);
3255 v ^= (z >> pEq->xor1[i].index) & 1;
3256 }
3257 }
3258
3259 if (pEq->xor2[i].valid)
3260 {
3261 if (pEq->xor2[i].channel == 0)
3262 {
3263 v ^= (x >> pEq->xor2[i].index) & 1;
3264 }
3265 else if (pEq->xor2[i].channel == 1)
3266 {
3267 v ^= (y >> pEq->xor2[i].index) & 1;
3268 }
3269 else
3270 {
3271 ADDR_ASSERT(pEq->xor2[i].channel == 2);
3272 v ^= (z >> pEq->xor2[i].index) & 1;
3273 }
3274 }
3275
3276 offset |= (v << i);
3277 }
3278
3279 return offset;
3280 }
3281
3282 /**
3283 ************************************************************************************************************************
3284 * Gfx10Lib::ComputeOffsetFromSwizzlePattern
3285 *
3286 * @brief
3287 * Compute offset from swizzle pattern
3288 *
3289 * @return
3290 * Offset
3291 ************************************************************************************************************************
3292 */
3293 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3294 const UINT_64* pPattern, ///< Swizzle pattern
3295 UINT_32 numBits, ///< Number of bits in pattern
3296 UINT_32 x, ///< x coord in pixel
3297 UINT_32 y, ///< y coord in pixel
3298 UINT_32 z, ///< z coord in slice
3299 UINT_32 s ///< sample id
3300 ) const
3301 {
3302 UINT_32 offset = 0;
3303 const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3304
3305 for (UINT_32 i = 0; i < numBits; i++)
3306 {
3307 UINT_32 v = 0;
3308
3309 if (pSwizzlePattern[i].x != 0)
3310 {
3311 UINT_16 mask = pSwizzlePattern[i].x;
3312 UINT_32 xBits = x;
3313
3314 while (mask != 0)
3315 {
3316 if (mask & 1)
3317 {
3318 v ^= xBits & 1;
3319 }
3320
3321 xBits >>= 1;
3322 mask >>= 1;
3323 }
3324 }
3325
3326 if (pSwizzlePattern[i].y != 0)
3327 {
3328 UINT_16 mask = pSwizzlePattern[i].y;
3329 UINT_32 yBits = y;
3330
3331 while (mask != 0)
3332 {
3333 if (mask & 1)
3334 {
3335 v ^= yBits & 1;
3336 }
3337
3338 yBits >>= 1;
3339 mask >>= 1;
3340 }
3341 }
3342
3343 if (pSwizzlePattern[i].z != 0)
3344 {
3345 UINT_16 mask = pSwizzlePattern[i].z;
3346 UINT_32 zBits = z;
3347
3348 while (mask != 0)
3349 {
3350 if (mask & 1)
3351 {
3352 v ^= zBits & 1;
3353 }
3354
3355 zBits >>= 1;
3356 mask >>= 1;
3357 }
3358 }
3359
3360 if (pSwizzlePattern[i].s != 0)
3361 {
3362 UINT_16 mask = pSwizzlePattern[i].s;
3363 UINT_32 sBits = s;
3364
3365 while (mask != 0)
3366 {
3367 if (mask & 1)
3368 {
3369 v ^= sBits & 1;
3370 }
3371
3372 sBits >>= 1;
3373 mask >>= 1;
3374 }
3375 }
3376
3377 offset |= (v << i);
3378 }
3379
3380 return offset;
3381 }
3382
3383 /**
3384 ************************************************************************************************************************
3385 * Gfx10Lib::GetSwizzlePattern
3386 *
3387 * @brief
3388 * Get swizzle pattern
3389 *
3390 * @return
3391 * Swizzle pattern
3392 ************************************************************************************************************************
3393 */
3394 const UINT_64* Gfx10Lib::GetSwizzlePattern(
3395 AddrSwizzleMode swizzleMode, ///< Swizzle mode
3396 AddrResourceType resourceType, ///< Resource type
3397 UINT_32 elemLog2, ///< Element size in bytes log2
3398 UINT_32 numFrag ///< Number of fragment
3399 ) const
3400 {
3401 const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
3402 const UINT_64* pSwizzlePattern = NULL;
3403 const UINT_32 swizzleMask = 1 << swizzleMode;
3404
3405 if (IsLinear(swizzleMode))
3406 {
3407 pSwizzlePattern = NULL;
3408 }
3409 else if (resourceType == ADDR_RSRC_TEX_3D)
3410 {
3411 ADDR_ASSERT(numFrag == 1);
3412
3413 if ((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0)
3414 {
3415 pSwizzlePattern = NULL;
3416 }
3417 else if (IsRtOptSwizzle(swizzleMode))
3418 {
3419 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_R_X_1xaa_RBPLUS[index] : SW_64K_R_X_1xaa[index];
3420 }
3421 else if (IsZOrderSwizzle(swizzleMode))
3422 {
3423 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_Z_X_1xaa_RBPLUS[index] : SW_64K_Z_X_1xaa[index];
3424 }
3425 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3426 {
3427 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
3428 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_D3_X_RBPLUS[index] : SW_64K_D3_X[index];
3429 }
3430 else
3431 {
3432 ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
3433
3434 if (IsBlock4kb(swizzleMode))
3435 {
3436 if (swizzleMode == ADDR_SW_4KB_S)
3437 {
3438 pSwizzlePattern = m_settings.supportRbPlus ? SW_4K_S3_RBPLUS[index] : SW_4K_S3[index];
3439 }
3440 else
3441 {
3442 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3443 pSwizzlePattern = m_settings.supportRbPlus ? SW_4K_S3_X_RBPLUS[index] : SW_4K_S3_X[index];
3444 }
3445 }
3446 else
3447 {
3448 if (swizzleMode == ADDR_SW_64KB_S)
3449 {
3450 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_S3_RBPLUS[index] : SW_64K_S3[index];
3451 }
3452 else if (swizzleMode == ADDR_SW_64KB_S_X)
3453 {
3454 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_S3_X_RBPLUS[index] : SW_64K_S3_X[index];
3455 }
3456 else
3457 {
3458 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3459 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_S3_T_RBPLUS[index] : SW_64K_S3_T[index];
3460 }
3461 }
3462 }
3463
3464 }
3465 else
3466 {
3467 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
3468 {
3469 pSwizzlePattern = NULL;
3470 }
3471 else if (IsBlock256b(swizzleMode))
3472 {
3473 if (swizzleMode == ADDR_SW_256B_S)
3474 {
3475 pSwizzlePattern = m_settings.supportRbPlus ? SW_256_S_RBPLUS[index] : SW_256_S[index];
3476 }
3477 else
3478 {
3479 ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
3480 pSwizzlePattern = m_settings.supportRbPlus ? SW_256_D_RBPLUS[index] : SW_256_D[index];
3481 }
3482 }
3483 else if (IsBlock4kb(swizzleMode))
3484 {
3485 if (IsStandardSwizzle(resourceType, swizzleMode))
3486 {
3487 if (swizzleMode == ADDR_SW_4KB_S)
3488 {
3489 pSwizzlePattern = m_settings.supportRbPlus ? SW_4K_S_RBPLUS[index] : SW_4K_S[index];
3490 }
3491 else
3492 {
3493 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3494 pSwizzlePattern = m_settings.supportRbPlus ? SW_4K_S_X_RBPLUS[index] : SW_4K_S_X[index];
3495 }
3496 }
3497 else
3498 {
3499 if (swizzleMode == ADDR_SW_4KB_D)
3500 {
3501 pSwizzlePattern = m_settings.supportRbPlus ? SW_4K_D_RBPLUS[index] : SW_4K_D[index];
3502 }
3503 else
3504 {
3505 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
3506 pSwizzlePattern = m_settings.supportRbPlus ? SW_4K_D_X_RBPLUS[index] : SW_4K_D_X[index];
3507 }
3508 }
3509 }
3510 else
3511 {
3512 if (IsRtOptSwizzle(swizzleMode))
3513 {
3514 if (numFrag == 1)
3515 {
3516 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_R_X_1xaa_RBPLUS[index] : SW_64K_R_X_1xaa[index];
3517 }
3518 else if (numFrag == 2)
3519 {
3520 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_R_X_2xaa_RBPLUS[index] : SW_64K_R_X_2xaa[index];
3521 }
3522 else if (numFrag == 4)
3523 {
3524 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_R_X_4xaa_RBPLUS[index] : SW_64K_R_X_4xaa[index];
3525 }
3526 else
3527 {
3528 ADDR_ASSERT(numFrag == 8);
3529 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_R_X_8xaa_RBPLUS[index] : SW_64K_R_X_8xaa[index];
3530 }
3531 }
3532 else if (IsZOrderSwizzle(swizzleMode))
3533 {
3534 if (numFrag == 1)
3535 {
3536 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_Z_X_1xaa_RBPLUS[index] : SW_64K_Z_X_1xaa[index];
3537 }
3538 else if (numFrag == 2)
3539 {
3540 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_Z_X_2xaa_RBPLUS[index] : SW_64K_Z_X_2xaa[index];
3541 }
3542 else if (numFrag == 4)
3543 {
3544 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_Z_X_4xaa_RBPLUS[index] : SW_64K_Z_X_4xaa[index];
3545 }
3546 else
3547 {
3548 ADDR_ASSERT(numFrag == 8);
3549 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_Z_X_8xaa_RBPLUS[index] : SW_64K_Z_X_8xaa[index];
3550 }
3551 }
3552 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3553 {
3554 if (swizzleMode == ADDR_SW_64KB_D)
3555 {
3556 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_D_RBPLUS[index] : SW_64K_D[index];
3557 }
3558 else if (swizzleMode == ADDR_SW_64KB_D_X)
3559 {
3560 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_D_X_RBPLUS[index] : SW_64K_D_X[index];
3561 }
3562 else
3563 {
3564 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
3565 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_D_T_RBPLUS[index] : SW_64K_D_T[index];
3566 }
3567 }
3568 else
3569 {
3570 if (swizzleMode == ADDR_SW_64KB_S)
3571 {
3572 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_S_RBPLUS[index] : SW_64K_S[index];
3573 }
3574 else if (swizzleMode == ADDR_SW_64KB_S_X)
3575 {
3576 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_S_X_RBPLUS[index] : SW_64K_S_X[index];
3577 }
3578 else
3579 {
3580 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3581 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_S_T_RBPLUS[index] : SW_64K_S_T[index];
3582 }
3583 }
3584 }
3585 }
3586
3587 return pSwizzlePattern;
3588 }
3589
3590 /**
3591 ************************************************************************************************************************
3592 * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
3593 *
3594 * @brief
3595 * Internal function to calculate address from coord for micro tiled swizzle surface
3596 *
3597 * @return
3598 * ADDR_E_RETURNCODE
3599 ************************************************************************************************************************
3600 */
3601 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
3602 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3603 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3604 ) const
3605 {
3606 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3607 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3608 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
3609
3610 localIn.swizzleMode = pIn->swizzleMode;
3611 localIn.flags = pIn->flags;
3612 localIn.resourceType = pIn->resourceType;
3613 localIn.bpp = pIn->bpp;
3614 localIn.width = Max(pIn->unalignedWidth, 1u);
3615 localIn.height = Max(pIn->unalignedHeight, 1u);
3616 localIn.numSlices = Max(pIn->numSlices, 1u);
3617 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3618 localIn.numSamples = Max(pIn->numSamples, 1u);
3619 localIn.numFrags = Max(pIn->numFrags, 1u);
3620 localOut.pMipInfo = mipInfo;
3621
3622 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
3623
3624 if (ret == ADDR_OK)
3625 {
3626 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3627 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3628 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3629 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3630
3631 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3632 {
3633 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3634 const UINT_32 yb = pIn->y / localOut.blockHeight;
3635 const UINT_32 xb = pIn->x / localOut.blockWidth;
3636 const UINT_32 blockIndex = yb * pb + xb;
3637 const UINT_32 blockSize = 256;
3638 const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3639 pIn->x << elemLog2,
3640 pIn->y,
3641 0);
3642 pOut->addr = localOut.sliceSize * pIn->slice +
3643 mipInfo[pIn->mipId].macroBlockOffset +
3644 (blockIndex * blockSize) +
3645 blk256Offset;
3646 }
3647 else
3648 {
3649 ret = ADDR_INVALIDPARAMS;
3650 }
3651 }
3652
3653 return ret;
3654 }
3655
3656 /**
3657 ************************************************************************************************************************
3658 * Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
3659 *
3660 * @brief
3661 * Internal function to calculate address from coord for macro tiled swizzle surface
3662 *
3663 * @return
3664 * ADDR_E_RETURNCODE
3665 ************************************************************************************************************************
3666 */
3667 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
3668 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3669 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3670 ) const
3671 {
3672 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3673 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3674 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
3675
3676 localIn.swizzleMode = pIn->swizzleMode;
3677 localIn.flags = pIn->flags;
3678 localIn.resourceType = pIn->resourceType;
3679 localIn.bpp = pIn->bpp;
3680 localIn.width = Max(pIn->unalignedWidth, 1u);
3681 localIn.height = Max(pIn->unalignedHeight, 1u);
3682 localIn.numSlices = Max(pIn->numSlices, 1u);
3683 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3684 localIn.numSamples = Max(pIn->numSamples, 1u);
3685 localIn.numFrags = Max(pIn->numFrags, 1u);
3686 localOut.pMipInfo = mipInfo;
3687
3688 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
3689
3690 if (ret == ADDR_OK)
3691 {
3692 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3693 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3694 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
3695 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
3696 const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
3697 const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
3698 (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
3699
3700 if (localIn.numFrags > 1)
3701 {
3702 const UINT_64* pPattern = GetSwizzlePattern(pIn->swizzleMode,
3703 pIn->resourceType,
3704 elemLog2,
3705 localIn.numFrags);
3706
3707 if (pPattern != NULL)
3708 {
3709 const UINT_32 pb = localOut.pitch / localOut.blockWidth;
3710 const UINT_32 yb = pIn->y / localOut.blockHeight;
3711 const UINT_32 xb = pIn->x / localOut.blockWidth;
3712 const UINT_64 blkIdx = yb * pb + xb;
3713 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(pPattern,
3714 blkSizeLog2,
3715 pIn->x,
3716 pIn->y,
3717 pIn->slice,
3718 pIn->sample);
3719 pOut->addr = (localOut.sliceSize * pIn->slice) +
3720 (blkIdx << blkSizeLog2) +
3721 (blkOffset ^ pipeBankXor);
3722 }
3723 else
3724 {
3725 ret = ADDR_INVALIDPARAMS;
3726 }
3727 }
3728 else
3729 {
3730 const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
3731 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3732 const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
3733
3734 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3735 {
3736 const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
3737 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3738 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
3739 const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
3740 const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
3741 const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
3742 const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
3743 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3744 const UINT_32 yb = pIn->y / localOut.blockHeight;
3745 const UINT_32 xb = pIn->x / localOut.blockWidth;
3746 const UINT_64 blkIdx = yb * pb + xb;
3747 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3748 x << elemLog2,
3749 y,
3750 z);
3751 pOut->addr = sliceSize * sliceId +
3752 mipInfo[pIn->mipId].macroBlockOffset +
3753 (blkIdx << blkSizeLog2) +
3754 (blkOffset ^ pipeBankXor);
3755 }
3756 else
3757 {
3758 ret = ADDR_INVALIDPARAMS;
3759 }
3760 }
3761 }
3762
3763 return ret;
3764 }
3765
3766 /**
3767 ************************************************************************************************************************
3768 * Gfx10Lib::HwlComputeMaxBaseAlignments
3769 *
3770 * @brief
3771 * Gets maximum alignments
3772 * @return
3773 * maximum alignments
3774 ************************************************************************************************************************
3775 */
3776 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
3777 {
3778 return GetBlockSize(ADDR_SW_64KB);
3779 }
3780
3781 /**
3782 ************************************************************************************************************************
3783 * Gfx10Lib::HwlComputeMaxMetaBaseAlignments
3784 *
3785 * @brief
3786 * Gets maximum alignments for metadata
3787 * @return
3788 * maximum alignments for metadata
3789 ************************************************************************************************************************
3790 */
3791 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
3792 {
3793 // Max base alignment for Htile
3794 Dim3d metaBlk = {0};
3795 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
3796 ADDR_RSRC_TEX_2D,
3797 ADDR_SW_64KB_Z_X,
3798 0,
3799 0,
3800 TRUE,
3801 &metaBlk);
3802
3803 const UINT_32 maxBaseAlignHtile = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
3804
3805 // Max base alignment for Cmask
3806 const UINT_32 maxBaseAlignCmask = GetMetaBlkSize(Gfx10DataFmask,
3807 ADDR_RSRC_TEX_2D,
3808 ADDR_SW_64KB_Z_X,
3809 0,
3810 0,
3811 TRUE,
3812 &metaBlk);
3813
3814 // Max base alignment for 2D Dcc
3815 const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
3816 {
3817 ADDR_SW_64KB_S_X,
3818 ADDR_SW_64KB_D_X,
3819 ADDR_SW_64KB_R_X,
3820 };
3821
3822 UINT_32 maxBaseAlignDcc2D = 0;
3823
3824 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
3825 {
3826 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
3827 {
3828 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
3829 {
3830 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
3831 ADDR_RSRC_TEX_2D,
3832 ValidSwizzleModeForDcc2D[swIdx],
3833 bppLog2,
3834 numFragLog2,
3835 TRUE,
3836 &metaBlk);
3837
3838 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
3839 }
3840 }
3841 }
3842
3843 // Max base alignment for 3D Dcc
3844 const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
3845 {
3846 ADDR_SW_64KB_Z_X,
3847 ADDR_SW_64KB_S_X,
3848 ADDR_SW_64KB_D_X,
3849 ADDR_SW_64KB_R_X,
3850 };
3851
3852 UINT_32 maxBaseAlignDcc3D = 0;
3853
3854 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
3855 {
3856 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
3857 {
3858 const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
3859 ADDR_RSRC_TEX_3D,
3860 ValidSwizzleModeForDcc3D[swIdx],
3861 bppLog2,
3862 0,
3863 TRUE,
3864 &metaBlk);
3865
3866 maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
3867 }
3868 }
3869
3870 return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
3871 }
3872
3873 /**
3874 ************************************************************************************************************************
3875 * Gfx10Lib::GetMetaElementSizeLog2
3876 *
3877 * @brief
3878 * Gets meta data element size log2
3879 * @return
3880 * Meta data element size log2
3881 ************************************************************************************************************************
3882 */
3883 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
3884 Gfx10DataType dataType) ///< Data surface type
3885 {
3886 INT_32 elemSizeLog2 = 0;
3887
3888 if (dataType == Gfx10DataColor)
3889 {
3890 elemSizeLog2 = 0;
3891 }
3892 else if (dataType == Gfx10DataDepthStencil)
3893 {
3894 elemSizeLog2 = 2;
3895 }
3896 else
3897 {
3898 ADDR_ASSERT(dataType == Gfx10DataFmask);
3899 elemSizeLog2 = -1;
3900 }
3901
3902 return elemSizeLog2;
3903 }
3904
3905 /**
3906 ************************************************************************************************************************
3907 * Gfx10Lib::GetMetaCacheSizeLog2
3908 *
3909 * @brief
3910 * Gets meta data cache line size log2
3911 * @return
3912 * Meta data cache line size log2
3913 ************************************************************************************************************************
3914 */
3915 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
3916 Gfx10DataType dataType) ///< Data surface type
3917 {
3918 INT_32 cacheSizeLog2 = 0;
3919
3920 if (dataType == Gfx10DataColor)
3921 {
3922 cacheSizeLog2 = 6;
3923 }
3924 else if (dataType == Gfx10DataDepthStencil)
3925 {
3926 cacheSizeLog2 = 8;
3927 }
3928 else
3929 {
3930 ADDR_ASSERT(dataType == Gfx10DataFmask);
3931 cacheSizeLog2 = 8;
3932 }
3933 return cacheSizeLog2;
3934 }
3935
3936 /**
3937 ************************************************************************************************************************
3938 * Gfx10Lib::HwlComputeSurfaceInfoLinear
3939 *
3940 * @brief
3941 * Internal function to calculate alignment for linear surface
3942 *
3943 * @return
3944 * ADDR_E_RETURNCODE
3945 ************************************************************************************************************************
3946 */
3947 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
3948 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3949 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3950 ) const
3951 {
3952 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3953
3954 if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
3955 {
3956 returnCode = ADDR_INVALIDPARAMS;
3957 }
3958 else
3959 {
3960 const UINT_32 elementBytes = pIn->bpp >> 3;
3961 const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
3962 const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
3963 UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);
3964 UINT_32 actualHeight = pIn->height;
3965 UINT_64 sliceSize = 0;
3966
3967 if (pIn->numMipLevels > 1)
3968 {
3969 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3970 {
3971 UINT_32 mipWidth, mipHeight;
3972
3973 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
3974
3975 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
3976
3977 if (pOut->pMipInfo != NULL)
3978 {
3979 pOut->pMipInfo[i].pitch = mipActualWidth;
3980 pOut->pMipInfo[i].height = mipHeight;
3981 pOut->pMipInfo[i].depth = mipDepth;
3982 pOut->pMipInfo[i].offset = sliceSize;
3983 pOut->pMipInfo[i].mipTailOffset = 0;
3984 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
3985 }
3986
3987 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
3988 }
3989 }
3990 else
3991 {
3992 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
3993
3994 if (returnCode == ADDR_OK)
3995 {
3996 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
3997
3998 if (pOut->pMipInfo != NULL)
3999 {
4000 pOut->pMipInfo[0].pitch = pitch;
4001 pOut->pMipInfo[0].height = actualHeight;
4002 pOut->pMipInfo[0].depth = mipDepth;
4003 pOut->pMipInfo[0].offset = 0;
4004 pOut->pMipInfo[0].mipTailOffset = 0;
4005 pOut->pMipInfo[0].macroBlockOffset = 0;
4006 }
4007 }
4008 }
4009
4010 if (returnCode == ADDR_OK)
4011 {
4012 pOut->pitch = pitch;
4013 pOut->height = actualHeight;
4014 pOut->numSlices = pIn->numSlices;
4015 pOut->sliceSize = sliceSize;
4016 pOut->surfSize = sliceSize * pOut->numSlices;
4017 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4018 pOut->blockWidth = pitchAlign;
4019 pOut->blockHeight = 1;
4020 pOut->blockSlices = 1;
4021
4022 // Following members are useless on GFX10
4023 pOut->mipChainPitch = 0;
4024 pOut->mipChainHeight = 0;
4025 pOut->mipChainSlice = 0;
4026 pOut->epitchIsHeight = FALSE;
4027
4028 // Post calculation validate
4029 ADDR_ASSERT(pOut->sliceSize > 0);
4030 }
4031 }
4032
4033 return returnCode;
4034 }
4035
4036 } // V2
4037 } // Addr