amd/addrlib: add gfx10 support
[mesa.git] / src / amd / addrlib / src / gfx10 / gfx10addrlib.cpp
1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx10addrlib.cpp
30 * @brief Contain the implementation for the Gfx10Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx10addrlib.h"
35 #include "gfx10_gb_reg.h"
36 #include "gfx10SwizzlePattern.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
41 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
42
43 namespace Addr
44 {
45 /**
46 ************************************************************************************************************************
47 * Gfx10HwlInit
48 *
49 * @brief
50 * Creates an Gfx10Lib object.
51 *
52 * @return
53 * Returns an Gfx10Lib object pointer.
54 ************************************************************************************************************************
55 */
56 Addr::Lib* Gfx10HwlInit(const Client* pClient)
57 {
58 return V2::Gfx10Lib::CreateObj(pClient);
59 }
60
61 namespace V2
62 {
63
64 ////////////////////////////////////////////////////////////////////////////////////////////////////
65 // Static Const Member
66 ////////////////////////////////////////////////////////////////////////////////////////////////////
67
68 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
69 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
70 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
71 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
72 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
73 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
74
75 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
76 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
77 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
78 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
79
80 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
81 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
82 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
83 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
84
85 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
86 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
87 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
88 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
89
90 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
91 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
92 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
93 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
94
95 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
96 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_X
97 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_X
98 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
99
100 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
101 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
102 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
103 {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1}, // ADDR_SW_64KB_R_X
104
105 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
106 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
107 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
108 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
109 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
110 };
111
112 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
113
114 const Dim3d Gfx10Lib::Block64K_3d[] = {{64, 32, 32}, {32 , 32, 32}, {32, 32, 16}, {32, 16, 16}, {16, 16, 16}};
115 const Dim3d Gfx10Lib::Block4K_3d[] = {{16, 16, 16}, {8, 16, 16}, {8, 16, 8}, {8, 8, 8}, {4, 8, 8}};
116
117 const Dim2d Gfx10Lib::Block64K_2d[] = {{256, 256}, {256 , 128}, {128, 128}, {128, 64}, {64, 64}};
118 const Dim2d Gfx10Lib::Block4K_2d[] = {{64, 64}, {64, 32}, {32, 32}, {32, 16}, {16, 16}};
119
120 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
121 const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
122
123 const Dim2d Gfx10Lib::Block64K_Log2_2d[] = {{8, 8}, {8, 7}, {7, 7}, {7, 6}, {6, 6}};
124 const Dim2d Gfx10Lib::Block4K_Log2_2d[] = {{6, 6}, {6, 5}, {5, 5}, {5, 4}, {4, 4}};
125
126 /**
127 ************************************************************************************************************************
128 * Gfx10Lib::Gfx10Lib
129 *
130 * @brief
131 * Constructor
132 *
133 ************************************************************************************************************************
134 */
135 Gfx10Lib::Gfx10Lib(const Client* pClient)
136 :
137 Lib(pClient),
138 m_numEquations(0)
139 {
140 m_class = AI_ADDRLIB;
141 memset(&m_settings, 0, sizeof(m_settings));
142 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
143 }
144
145 /**
146 ************************************************************************************************************************
147 * Gfx10Lib::~Gfx10Lib
148 *
149 * @brief
150 * Destructor
151 ************************************************************************************************************************
152 */
153 Gfx10Lib::~Gfx10Lib()
154 {
155 }
156
157 /**
158 ************************************************************************************************************************
159 * Gfx10Lib::HwlComputeHtileInfo
160 *
161 * @brief
162 * Interface function stub of AddrComputeHtilenfo
163 *
164 * @return
165 * ADDR_E_RETURNCODE
166 ************************************************************************************************************************
167 */
168 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
169 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
170 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
171 ) const
172 {
173 ADDR_E_RETURNCODE ret = ADDR_OK;
174
175 if ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) ||
176 (pIn->hTileFlags.pipeAligned != TRUE))
177 {
178 ret = ADDR_INVALIDPARAMS;
179 }
180 else
181 {
182 Dim3d metaBlk = {0};
183 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
184 ADDR_RSRC_TEX_2D,
185 ADDR_SW_64KB_Z_X,
186 0,
187 0,
188 TRUE,
189 &metaBlk);
190
191 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
192 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
193 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
194 pOut->metaBlkWidth = metaBlk.w;
195 pOut->metaBlkHeight = metaBlk.h;
196
197 if (pIn->numMipLevels > 1)
198 {
199 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
200
201 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
202
203 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
204 {
205 UINT_32 mipWidth, mipHeight;
206
207 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
208
209 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
210 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
211
212 const UINT_32 pitchInM = mipWidth / metaBlk.w;
213 const UINT_32 heightInM = mipHeight / metaBlk.h;
214 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
215
216 if (pOut->pMipInfo != NULL)
217 {
218 pOut->pMipInfo[i].inMiptail = FALSE;
219 pOut->pMipInfo[i].offset = offset;
220 pOut->pMipInfo[i].sliceSize = mipSliceSize;
221 }
222
223 offset += mipSliceSize;
224 }
225
226 pOut->sliceSize = offset;
227 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
228 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
229
230 if (pOut->pMipInfo != NULL)
231 {
232 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
233 {
234 pOut->pMipInfo[i].inMiptail = TRUE;
235 pOut->pMipInfo[i].offset = 0;
236 pOut->pMipInfo[i].sliceSize = 0;
237 }
238
239 if (pIn->firstMipIdInTail != pIn->numMipLevels)
240 {
241 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
242 }
243 }
244 }
245 else
246 {
247 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
248 const UINT_32 heightInM = pOut->height / metaBlk.h;
249
250 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
251 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
252 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
253
254 if (pOut->pMipInfo != NULL)
255 {
256 pOut->pMipInfo[0].inMiptail = FALSE;
257 pOut->pMipInfo[0].offset = 0;
258 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
259 }
260 }
261 }
262
263 return ret;
264 }
265
266 /**
267 ************************************************************************************************************************
268 * Gfx10Lib::HwlComputeCmaskInfo
269 *
270 * @brief
271 * Interface function stub of AddrComputeCmaskInfo
272 *
273 * @return
274 * ADDR_E_RETURNCODE
275 ************************************************************************************************************************
276 */
277 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
278 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
279 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
280 ) const
281 {
282 ADDR_E_RETURNCODE ret = ADDR_OK;
283
284 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
285 (pIn->cMaskFlags.pipeAligned != TRUE))
286 {
287 ret = ADDR_INVALIDPARAMS;
288 }
289 else
290 {
291 Dim3d metaBlk = {0};
292 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
293 ADDR_RSRC_TEX_2D,
294 ADDR_SW_64KB_Z_X,
295 0,
296 0,
297 TRUE,
298 &metaBlk);
299
300 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
301 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
302 pOut->baseAlign = metaBlkSize;
303 pOut->metaBlkWidth = metaBlk.w;
304 pOut->metaBlkHeight = metaBlk.h;
305
306 if (pIn->numMipLevels > 1)
307 {
308 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
309
310 UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
311
312 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
313 {
314 UINT_32 mipWidth, mipHeight;
315
316 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
317
318 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
319 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
320
321 const UINT_32 pitchInM = mipWidth / metaBlk.w;
322 const UINT_32 heightInM = mipHeight / metaBlk.h;
323
324 if (pOut->pMipInfo != NULL)
325 {
326 pOut->pMipInfo[i].inMiptail = FALSE;
327 pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize;
328 pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
329 }
330
331 metaBlkPerSlice += pitchInM * heightInM;
332 }
333
334 pOut->metaBlkNumPerSlice = metaBlkPerSlice;
335
336 if (pOut->pMipInfo != NULL)
337 {
338 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
339 {
340 pOut->pMipInfo[i].inMiptail = TRUE;
341 pOut->pMipInfo[i].offset = 0;
342 pOut->pMipInfo[i].sliceSize = 0;
343 }
344
345 if (pIn->firstMipIdInTail != pIn->numMipLevels)
346 {
347 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
348 }
349 }
350 }
351 else
352 {
353 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
354 const UINT_32 heightInM = pOut->height / metaBlk.h;
355
356 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
357
358 if (pOut->pMipInfo != NULL)
359 {
360 pOut->pMipInfo[0].inMiptail = FALSE;
361 pOut->pMipInfo[0].offset = 0;
362 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
363 }
364 }
365
366 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
367 pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
368 }
369
370 return ret;
371 }
372
373 /**
374 ************************************************************************************************************************
375 * Gfx10Lib::HwlComputeDccInfo
376 *
377 * @brief
378 * Interface function to compute DCC key info
379 *
380 * @return
381 * ADDR_E_RETURNCODE
382 ************************************************************************************************************************
383 */
384 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
385 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
386 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
387 ) const
388 {
389 ADDR_E_RETURNCODE ret = ADDR_OK;
390
391 if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
392 {
393 // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only
394 // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.
395 ret = ADDR_INVALIDPARAMS;
396 }
397 else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
398 {
399 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
400 ret = ADDR_INVALIDPARAMS;
401 }
402 else
403 {
404 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
405 ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
406
407 Dim3d metaBlk = {0};
408 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
409 const UINT_32 numFragLog2 = Log2(pIn->numFrags);
410 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
411 pIn->resourceType,
412 pIn->swizzleMode,
413 elemLog2,
414 numFragLog2,
415 pIn->dccKeyFlags.pipeAligned,
416 &metaBlk);
417 const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
418
419 pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
420 pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
421 pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;
422
423 pOut->dccRamBaseAlign = metaBlkSize;
424 pOut->metaBlkWidth = metaBlk.w;
425 pOut->metaBlkHeight = metaBlk.h;
426 pOut->metaBlkDepth = metaBlk.d;
427
428 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
429 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
430 pOut->depth = PowTwoAlign(pIn->numSlices, metaBlk.d);
431
432 if (pIn->numMipLevels > 1)
433 {
434 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
435
436 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
437
438 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
439 {
440 UINT_32 mipWidth, mipHeight;
441
442 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
443
444 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
445 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
446
447 const UINT_32 pitchInM = mipWidth / metaBlk.w;
448 const UINT_32 heightInM = mipHeight / metaBlk.h;
449 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
450
451 if (pOut->pMipInfo != NULL)
452 {
453 pOut->pMipInfo[i].inMiptail = FALSE;
454 pOut->pMipInfo[i].offset = offset;
455 pOut->pMipInfo[i].sliceSize = mipSliceSize;
456 }
457
458 offset += mipSliceSize;
459 }
460
461 pOut->dccRamSliceSize = offset;
462 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
463 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
464
465 if (pOut->pMipInfo != NULL)
466 {
467 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
468 {
469 pOut->pMipInfo[i].inMiptail = TRUE;
470 pOut->pMipInfo[i].offset = 0;
471 pOut->pMipInfo[i].sliceSize = 0;
472 }
473
474 if (pIn->firstMipIdInTail != pIn->numMipLevels)
475 {
476 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
477 }
478 }
479 }
480 else
481 {
482 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
483 const UINT_32 heightInM = pOut->height / metaBlk.h;
484
485 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
486 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
487 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
488
489 if (pOut->pMipInfo != NULL)
490 {
491 pOut->pMipInfo[0].inMiptail = FALSE;
492 pOut->pMipInfo[0].offset = 0;
493 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
494 }
495 }
496 }
497
498 return ret;
499 }
500
501 /**
502 ************************************************************************************************************************
503 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
504 *
505 * @brief
506 * Interface function stub of AddrComputeCmaskAddrFromCoord
507 *
508 * @return
509 * ADDR_E_RETURNCODE
510 ************************************************************************************************************************
511 */
512 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
513 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
514 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
515 {
516 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
517 input.size = sizeof(input);
518 input.cMaskFlags = pIn->cMaskFlags;
519 input.colorFlags = pIn->colorFlags;
520 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
521 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
522 input.numSlices = Max(pIn->numSlices, 1u);
523 input.swizzleMode = pIn->swizzleMode;
524 input.resourceType = pIn->resourceType;
525
526 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
527 output.size = sizeof(output);
528
529 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
530
531 if (returnCode == ADDR_OK)
532 {
533 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
534 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
535 const UINT_32 numPipeLog2 = m_pipesLog2;
536 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
537 const UINT_32 fmaskBppType = 4;
538 const UINT_32 numPipeType = 8;
539 const UINT_32 index = ((m_pipeInterleaveLog2 - 8) * (fmaskBppType * numPipeType)) +
540 ((numPipeLog2 + 1) * fmaskBppType) +
541 fmaskElemLog2;
542
543 const UINT_64* pPattern = CMASK_64K[index];
544 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
545 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
546 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(pPattern,
547 blkSizeLog2 + 1, // +1 for nibble offset
548 pIn->x,
549 pIn->y,
550 pIn->slice,
551 0);
552 const UINT_32 xb = pIn->x / output.metaBlkWidth;
553 const UINT_32 yb = pIn->y / output.metaBlkHeight;
554 const UINT_32 pb = output.pitch / output.metaBlkWidth;
555 const UINT_32 blkIndex = (yb * pb) + xb;
556 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
557
558 pOut->addr = (output.sliceSize * pIn->slice) +
559 (blkIndex * (1 << blkSizeLog2)) +
560 ((blkOffset >> 1) ^ pipeXor);
561 pOut->bitPosition = (blkOffset & 1) << 2;
562 }
563
564 return returnCode;
565 }
566
567 /**
568 ************************************************************************************************************************
569 * Gfx10Lib::HwlComputeHtileAddrFromCoord
570 *
571 * @brief
572 * Interface function stub of AddrComputeHtileAddrFromCoord
573 *
574 * @return
575 * ADDR_E_RETURNCODE
576 ************************************************************************************************************************
577 */
578 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
579 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
580 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
581 {
582 ADDR_E_RETURNCODE returnCode = ADDR_OK;
583
584 if (pIn->numMipLevels > 1)
585 {
586 returnCode = ADDR_NOTIMPLEMENTED;
587 }
588 else
589 {
590 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
591 input.size = sizeof(input);
592 input.hTileFlags = pIn->hTileFlags;
593 input.depthFlags = pIn->depthflags;
594 input.swizzleMode = pIn->swizzleMode;
595 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
596 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
597 input.numSlices = Max(pIn->numSlices, 1u);
598 input.numMipLevels = 1;
599
600 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
601 output.size = sizeof(output);
602
603 returnCode = ComputeHtileInfo(&input, &output);
604
605 if (returnCode == ADDR_OK)
606 {
607 const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
608 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
609 const UINT_32 index = m_htileBaseIndex + numSampleLog2;
610 const UINT_64* pPattern = HTILE_64K[index];
611 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
612 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
613 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(pPattern,
614 blkSizeLog2 + 1, // +1 for nibble offset
615 pIn->x,
616 pIn->y,
617 pIn->slice,
618 0);
619 const UINT_32 xb = pIn->x / output.metaBlkWidth;
620 const UINT_32 yb = pIn->y / output.metaBlkHeight;
621 const UINT_32 pb = output.pitch / output.metaBlkWidth;
622 const UINT_32 blkIndex = (yb * pb) + xb;
623 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
624
625 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
626 (blkIndex * (1 << blkSizeLog2)) +
627 ((blkOffset >> 1) ^ pipeXor);
628 }
629 }
630
631 return returnCode;
632 }
633
634 /**
635 ************************************************************************************************************************
636 * Gfx10Lib::HwlComputeHtileCoordFromAddr
637 *
638 * @brief
639 * Interface function stub of AddrComputeHtileCoordFromAddr
640 *
641 * @return
642 * ADDR_E_RETURNCODE
643 ************************************************************************************************************************
644 */
645 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
646 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
647 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
648 {
649 ADDR_NOT_IMPLEMENTED();
650
651 return ADDR_OK;
652 }
653
654 /**
655 ************************************************************************************************************************
656 * Gfx10Lib::HwlComputeDccAddrFromCoord
657 *
658 * @brief
659 * Interface function stub of AddrComputeDccAddrFromCoord
660 *
661 * @return
662 * ADDR_E_RETURNCODE
663 ************************************************************************************************************************
664 */
665 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord(
666 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
667 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
668 {
669 ADDR_E_RETURNCODE returnCode = ADDR_OK;
670
671 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
672 (pIn->swizzleMode != ADDR_SW_64KB_R_X) ||
673 (pIn->dccKeyFlags.linear == TRUE) ||
674 (pIn->numFrags > 1) ||
675 (pIn->numMipLevels > 1) ||
676 (pIn->mipId > 0))
677 {
678 returnCode = ADDR_NOTSUPPORTED;
679 }
680 else
681 {
682 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
683 input.size = sizeof(input);
684 input.dccKeyFlags = pIn->dccKeyFlags;
685 input.colorFlags = pIn->colorFlags;
686 input.swizzleMode = pIn->swizzleMode;
687 input.resourceType = pIn->resourceType;
688 input.bpp = pIn->bpp;
689 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
690 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
691 input.numSlices = Max(pIn->numSlices, 1u);
692 input.numFrags = Max(pIn->numFrags, 1u);
693 input.numMipLevels = Max(pIn->numMipLevels, 1u);
694
695 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
696 output.size = sizeof(output);
697
698 returnCode = ComputeDccInfo(&input, &output);
699
700 if (returnCode == ADDR_OK)
701 {
702 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
703 const UINT_32 numPipeLog2 = m_pipesLog2;
704 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
705 const UINT_32 alignPipeType = 7;
706 const UINT_32 unalignPipeType = 3;
707 const UINT_32 numPipeType = alignPipeType + unalignPipeType;
708 UINT_32 index = ((m_pipeInterleaveLog2 - 8) * (MaxNumOfBpp * numPipeType)) + elemLog2;
709
710 if (pIn->dccKeyFlags.pipeAligned)
711 {
712 index += (numPipeLog2 + unalignPipeType) * MaxNumOfBpp;
713 }
714 else
715 {
716 index += Min(numPipeLog2, 2u) * MaxNumOfBpp;
717 }
718
719 const UINT_64* pPattern = DCC_64K_R_X[index];
720 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) + elemLog2 - 8;
721 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
722 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(pPattern,
723 blkSizeLog2 + 1, // +1 for nibble offset
724 pIn->x,
725 pIn->y,
726 pIn->slice,
727 0);
728 const UINT_32 xb = pIn->x / output.metaBlkWidth;
729 const UINT_32 yb = pIn->y / output.metaBlkHeight;
730 const UINT_32 pb = output.pitch / output.metaBlkWidth;
731 const UINT_32 blkIndex = (yb * pb) + xb;
732 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
733
734 pOut->addr = (static_cast<UINT_64>(output.dccRamSliceSize) * pIn->slice) +
735 (blkIndex * (1 << blkSizeLog2)) +
736 ((blkOffset >> 1) ^ pipeXor);
737 }
738 }
739
740 return returnCode;
741 }
742
743 /**
744 ************************************************************************************************************************
745 * Gfx10Lib::HwlInitGlobalParams
746 *
747 * @brief
748 * Initializes global parameters
749 *
750 * @return
751 * TRUE if all settings are valid
752 *
753 ************************************************************************************************************************
754 */
755 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
756 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
757 {
758 BOOL_32 valid = TRUE;
759 GB_ADDR_CONFIG gbAddrConfig;
760
761 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
762
763 // These values are copied from CModel code
764 switch (gbAddrConfig.bits.NUM_PIPES)
765 {
766 case ADDR_CONFIG_1_PIPE:
767 m_pipes = 1;
768 m_pipesLog2 = 0;
769 break;
770 case ADDR_CONFIG_2_PIPE:
771 m_pipes = 2;
772 m_pipesLog2 = 1;
773 break;
774 case ADDR_CONFIG_4_PIPE:
775 m_pipes = 4;
776 m_pipesLog2 = 2;
777 break;
778 case ADDR_CONFIG_8_PIPE:
779 m_pipes = 8;
780 m_pipesLog2 = 3;
781 break;
782 case ADDR_CONFIG_16_PIPE:
783 m_pipes = 16;
784 m_pipesLog2 = 4;
785 break;
786 case ADDR_CONFIG_32_PIPE:
787 m_pipes = 32;
788 m_pipesLog2 = 5;
789 break;
790 case ADDR_CONFIG_64_PIPE:
791 m_pipes = 64;
792 m_pipesLog2 = 6;
793 break;
794 default:
795 ADDR_ASSERT_ALWAYS();
796 valid = FALSE;
797 break;
798 }
799
800 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
801 {
802 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
803 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
804 m_pipeInterleaveLog2 = 8;
805 break;
806 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
807 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
808 m_pipeInterleaveLog2 = 9;
809 break;
810 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
811 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
812 m_pipeInterleaveLog2 = 10;
813 break;
814 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
815 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
816 m_pipeInterleaveLog2 = 11;
817 break;
818 default:
819 ADDR_ASSERT_ALWAYS();
820 valid = FALSE;
821 break;
822 }
823
824 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
825 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
826 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
827
828 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
829 {
830 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
831 m_maxCompFrag = 1;
832 m_maxCompFragLog2 = 0;
833 break;
834 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
835 m_maxCompFrag = 2;
836 m_maxCompFragLog2 = 1;
837 break;
838 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
839 m_maxCompFrag = 4;
840 m_maxCompFragLog2 = 2;
841 break;
842 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
843 m_maxCompFrag = 8;
844 m_maxCompFragLog2 = 3;
845 break;
846 default:
847 ADDR_ASSERT_ALWAYS();
848 valid = FALSE;
849 break;
850 }
851
852 if (m_settings.supportRbPlus)
853 {
854 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
855 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
856
857 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
858
859 const UINT_32 maxPipeInterleaveType = 3;
860
861 m_colorBaseIndex = sizeof(SW_64K_R_X_1xaa_RBPLUS) /
862 sizeof(SW_64K_R_X_1xaa_RBPLUS[0]) /
863 maxPipeInterleaveType *
864 (m_pipeInterleaveLog2 - 8);
865 m_htileBaseIndex = sizeof(HTILE_64K_RBPLUS) /
866 sizeof(HTILE_64K_RBPLUS[0]) /
867 maxPipeInterleaveType *
868 (m_pipeInterleaveLog2 - 8);
869
870 // Skip unaligned case
871 m_htileBaseIndex += MaxNumOfAA;
872
873 if (m_numPkrLog2 < 2)
874 {
875 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
876 m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
877 }
878 else
879 {
880 m_colorBaseIndex += (2 * m_numPkrLog2 - 2 + m_pipesLog2) * MaxNumOfBpp;
881
882 const UINT_32 htilePipePerPkr = 4;
883
884 m_htileBaseIndex += (m_numPkrLog2 - 1) * htilePipePerPkr * MaxNumOfAA +
885 (m_pipesLog2 + 1 - m_numPkrLog2) * MaxNumOfAA;
886 }
887 }
888 else
889 {
890 const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
891 static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) +
892 1;
893
894 m_colorBaseIndex = (m_pipeInterleaveLog2 - 8) * (MaxNumOfBpp * numPipeType) +
895 (m_pipesLog2 * MaxNumOfBpp);
896
897 m_htileBaseIndex = (m_pipeInterleaveLog2 - 8) * (MaxNumOfAA * (numPipeType + 1)) +
898 (m_pipesLog2 + 1) * MaxNumOfAA;
899 }
900
901 if (valid)
902 {
903 InitEquationTable();
904 }
905
906 return valid;
907 }
908
909 /**
910 ************************************************************************************************************************
911 * Gfx10Lib::HwlConvertChipFamily
912 *
913 * @brief
914 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
915 * @return
916 * ChipFamily
917 ************************************************************************************************************************
918 */
919 ChipFamily Gfx10Lib::HwlConvertChipFamily(
920 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
921 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
922 {
923 ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
924
925 m_settings.dccUnsup3DSwDis = 1;
926
927 switch (chipFamily)
928 {
929 case FAMILY_NV:
930 m_settings.isDcn2 = 1;
931 break;
932 default:
933 ADDR_ASSERT(!"Unknown chip family");
934 break;
935 }
936
937 m_settings.dsMipmapHtileFix = 1;
938
939 if (ASICREV_IS_NAVI10_P(chipRevision))
940 {
941 m_settings.dsMipmapHtileFix = 0;
942 }
943
944 m_configFlags.use32bppFor422Fmt = TRUE;
945
946 return family;
947 }
948
949 /**
950 ************************************************************************************************************************
951 * Gfx10Lib::GetBlk256SizeLog2
952 *
953 * @brief
954 * Get block 256 size
955 *
956 * @return
957 * N/A
958 ************************************************************************************************************************
959 */
960 void Gfx10Lib::GetBlk256SizeLog2(
961 AddrResourceType resourceType, ///< [in] Resource type
962 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
963 UINT_32 elemLog2, ///< [in] element size log2
964 UINT_32 numSamplesLog2, ///< [in] number of samples
965 Dim3d* pBlock ///< [out] block size
966 ) const
967 {
968 if (IsThin(resourceType, swizzleMode))
969 {
970 UINT_32 blockBits = 8 - elemLog2;
971
972 if (IsZOrderSwizzle(swizzleMode))
973 {
974 blockBits -= numSamplesLog2;
975 }
976
977 pBlock->w = (blockBits >> 1) + (blockBits & 1);
978 pBlock->h = (blockBits >> 1);
979 pBlock->d = 0;
980 }
981 else
982 {
983 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
984
985 UINT_32 blockBits = 8 - elemLog2;
986
987 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
988 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
989 pBlock->h = (blockBits / 3);
990 }
991 }
992
993 /**
994 ************************************************************************************************************************
995 * Gfx10Lib::GetCompressedBlockSizeLog2
996 *
997 * @brief
998 * Get compress block size
999 *
1000 * @return
1001 * N/A
1002 ************************************************************************************************************************
1003 */
1004 void Gfx10Lib::GetCompressedBlockSizeLog2(
1005 Gfx10DataType dataType, ///< [in] Data type
1006 AddrResourceType resourceType, ///< [in] Resource type
1007 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1008 UINT_32 elemLog2, ///< [in] element size log2
1009 UINT_32 numSamplesLog2, ///< [in] number of samples
1010 Dim3d* pBlock ///< [out] block size
1011 ) const
1012 {
1013 if (dataType == Gfx10DataColor)
1014 {
1015 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1016 }
1017 else
1018 {
1019 ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1020 pBlock->w = 3;
1021 pBlock->h = 3;
1022 pBlock->d = 0;
1023 }
1024 }
1025
1026 /**
1027 ************************************************************************************************************************
1028 * Gfx10Lib::GetMetaOverlapLog2
1029 *
1030 * @brief
1031 * Get meta block overlap
1032 *
1033 * @return
1034 * N/A
1035 ************************************************************************************************************************
1036 */
1037 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1038 Gfx10DataType dataType, ///< [in] Data type
1039 AddrResourceType resourceType, ///< [in] Resource type
1040 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1041 UINT_32 elemLog2, ///< [in] element size log2
1042 UINT_32 numSamplesLog2 ///< [in] number of samples
1043 ) const
1044 {
1045 Dim3d compBlock;
1046 Dim3d microBlock;
1047
1048 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1049 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
1050
1051 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
1052 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1053 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
1054 const INT_32 numPipesLog2 = GetEffectiveNumPipes();
1055 INT_32 overlap = numPipesLog2 - maxSizeLog2;
1056
1057 if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1058 {
1059 overlap++;
1060 }
1061
1062 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1063 if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1064 {
1065 overlap--;
1066 }
1067 overlap = Max(overlap, 0);
1068 return overlap;
1069 }
1070
1071 /**
1072 ************************************************************************************************************************
1073 * Gfx10Lib::Get3DMetaOverlapLog2
1074 *
1075 * @brief
1076 * Get 3d meta block overlap
1077 *
1078 * @return
1079 * N/A
1080 ************************************************************************************************************************
1081 */
1082 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1083 AddrResourceType resourceType, ///< [in] Resource type
1084 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1085 UINT_32 elemLog2 ///< [in] element size log2
1086 ) const
1087 {
1088 Dim3d microBlock;
1089 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
1090
1091 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1092
1093 if (m_settings.supportRbPlus)
1094 {
1095 overlap++;
1096 }
1097
1098 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1099 {
1100 overlap = 0;
1101 }
1102 return overlap;
1103 }
1104
1105 /**
1106 ************************************************************************************************************************
1107 * Gfx10Lib::GetPipeRotateAmount
1108 *
1109 * @brief
1110 * Get pipe rotate amount
1111 *
1112 * @return
1113 * Pipe rotate amount
1114 ************************************************************************************************************************
1115 */
1116
1117 INT_32 Gfx10Lib::GetPipeRotateAmount(
1118 AddrResourceType resourceType, ///< [in] Resource type
1119 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
1120 ) const
1121 {
1122 INT_32 amount = 0;
1123
1124 if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1125 {
1126 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1127 1 : m_pipesLog2 - (m_numSaLog2 + 1);
1128 }
1129
1130 return amount;
1131 }
1132
1133 /**
1134 ************************************************************************************************************************
1135 * Gfx10Lib::GetMetaBlkSize
1136 *
1137 * @brief
1138 * Get metadata block size
1139 *
1140 * @return
1141 * Meta block size
1142 ************************************************************************************************************************
1143 */
1144 UINT_32 Gfx10Lib::GetMetaBlkSize(
1145 Gfx10DataType dataType, ///< [in] Data type
1146 AddrResourceType resourceType, ///< [in] Resource type
1147 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1148 UINT_32 elemLog2, ///< [in] element size log2
1149 UINT_32 numSamplesLog2, ///< [in] number of samples
1150 BOOL_32 pipeAlign, ///< [in] pipe align
1151 Dim3d* pBlock ///< [out] block size
1152 ) const
1153 {
1154 INT_32 metablkSizeLog2;
1155 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
1156 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
1157 const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1158 const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1159 numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1160 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
1161 INT_32 numPipesLog2 = m_pipesLog2;
1162
1163 if (IsThin(resourceType, swizzleMode))
1164 {
1165 if ((pipeAlign == FALSE) ||
1166 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1167 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
1168 {
1169 if (pipeAlign)
1170 {
1171 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1172 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1173 }
1174 else
1175 {
1176 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1177 }
1178 }
1179 else
1180 {
1181 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1182 {
1183 numPipesLog2++;
1184 }
1185
1186 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1187
1188 if (numPipesLog2 >= 4)
1189 {
1190 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1191
1192 // In 16Bpe 8xaa, we have an extra overlap bit
1193 if ((pipeRotateLog2 > 0) &&
1194 (elemLog2 == 4) &&
1195 (numSamplesLog2 == 3) &&
1196 (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1197 {
1198 overlapLog2++;
1199 }
1200
1201 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1202 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1203
1204 if (m_settings.supportRbPlus &&
1205 IsRtOptSwizzle(swizzleMode) &&
1206 (numPipesLog2 == 6) &&
1207 (numSamplesLog2 == 3) &&
1208 (m_maxCompFragLog2 == 3) &&
1209 (metablkSizeLog2 < 15))
1210 {
1211 metablkSizeLog2 = 15;
1212 }
1213 }
1214 else
1215 {
1216 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1217 }
1218
1219 if (dataType == Gfx10DataDepthStencil)
1220 {
1221 // For htile surfaces, pad meta block size to 2K * num_pipes
1222 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1223 }
1224
1225 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1226
1227 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1228 {
1229 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1230
1231 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1232 }
1233 }
1234
1235 const INT_32 metablkBitsLog2 =
1236 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1237 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1238 pBlock->h = 1 << (metablkBitsLog2 >> 1);
1239 pBlock->d = 1;
1240 }
1241 else
1242 {
1243 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1244
1245 if (pipeAlign)
1246 {
1247 if (m_settings.supportRbPlus &&
1248 (m_pipesLog2 == m_numSaLog2 + 1) &&
1249 (m_pipesLog2 > 1) &&
1250 IsRbAligned(resourceType, swizzleMode))
1251 {
1252 numPipesLog2++;
1253 }
1254
1255 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1256
1257 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1258 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1259 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1260 }
1261 else
1262 {
1263 metablkSizeLog2 = 12;
1264 }
1265
1266 const INT_32 metablkBitsLog2 =
1267 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1268 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1269 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1270 pBlock->d = 1 << (metablkBitsLog2 / 3);
1271 }
1272
1273 return (1 << static_cast<UINT_32>(metablkSizeLog2));
1274 }
1275
1276 /**
1277 ************************************************************************************************************************
1278 * Gfx10Lib::ConvertSwizzlePatternToEquation
1279 *
1280 * @brief
1281 * Convert swizzle pattern to equation.
1282 *
1283 * @return
1284 * N/A
1285 ************************************************************************************************************************
1286 */
1287 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1288 UINT_32 elemLog2, ///< [in] element bytes log2
1289 AddrResourceType rsrcType, ///< [in] resource type
1290 AddrSwizzleMode swMode, ///< [in] swizzle mode
1291 const UINT_64* pPattern, ///< [in] swizzle pattern
1292 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1293 const
1294 {
1295 const ADDR_BIT_SETTING* pSwizzle = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
1296 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1297
1298 pEquation->numBits = blockSizeLog2;
1299 pEquation->stackedDepthSlices = FALSE;
1300
1301 for (UINT_32 i = 0; i < elemLog2; i++)
1302 {
1303 pEquation->addr[i].channel = 0;
1304 pEquation->addr[i].valid = 1;
1305 pEquation->addr[i].index = i;
1306 }
1307
1308 if (IsXor(swMode) == FALSE)
1309 {
1310 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1311 {
1312 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1313
1314 if (pSwizzle[i].x != 0)
1315 {
1316 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1317
1318 pEquation->addr[i].channel = 0;
1319 pEquation->addr[i].valid = 1;
1320 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1321 }
1322 else if (pSwizzle[i].y != 0)
1323 {
1324 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1325
1326 pEquation->addr[i].channel = 1;
1327 pEquation->addr[i].valid = 1;
1328 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1329 }
1330 else
1331 {
1332 ADDR_ASSERT(pSwizzle[i].z != 0);
1333 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1334
1335 pEquation->addr[i].channel = 2;
1336 pEquation->addr[i].valid = 1;
1337 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1338 }
1339
1340 pEquation->xor1[i].value = 0;
1341 pEquation->xor2[i].value = 0;
1342 }
1343 }
1344 else if (IsThin(rsrcType, swMode))
1345 {
1346 const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_2d[elemLog2].w : Block64K_Log2_2d[elemLog2].w;
1347 const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_2d[elemLog2].h : Block64K_Log2_2d[elemLog2].h;
1348 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1349 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1350
1351 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1352 UINT_32 xMask = 0;
1353 UINT_32 yMask = 0;
1354 UINT_32 bMask = (1 << elemLog2) - 1;
1355
1356 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1357 {
1358 if (IsPow2(pSwizzle[i].value))
1359 {
1360 if (pSwizzle[i].x != 0)
1361 {
1362 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1363 xMask |= pSwizzle[i].x;
1364
1365 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1366
1367 ADDR_ASSERT(xLog2 < blkXLog2);
1368
1369 pEquation->addr[i].channel = 0;
1370 pEquation->addr[i].valid = 1;
1371 pEquation->addr[i].index = xLog2 + elemLog2;
1372 }
1373 else
1374 {
1375 ADDR_ASSERT(pSwizzle[i].y != 0);
1376 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1377 yMask |= pSwizzle[i].y;
1378
1379 pEquation->addr[i].channel = 1;
1380 pEquation->addr[i].valid = 1;
1381 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1382
1383 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1384 }
1385
1386 swizzle[i].value = 0;
1387 bMask |= 1 << i;
1388 }
1389 else
1390 {
1391 if (pSwizzle[i].z != 0)
1392 {
1393 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1394
1395 pEquation->xor2[i].channel = 2;
1396 pEquation->xor2[i].valid = 1;
1397 pEquation->xor2[i].index = Log2(pSwizzle[i].z);
1398 }
1399
1400 swizzle[i].x = pSwizzle[i].x;
1401 swizzle[i].y = pSwizzle[i].y;
1402 swizzle[i].z = swizzle[i].s = 0;
1403
1404 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1405
1406 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1407
1408 if (xHi != 0)
1409 {
1410 ADDR_ASSERT(IsPow2(xHi));
1411 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1412
1413 pEquation->xor1[i].channel = 0;
1414 pEquation->xor1[i].valid = 1;
1415 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1416
1417 swizzle[i].x &= blkXMask;
1418 }
1419
1420 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1421
1422 if (yHi != 0)
1423 {
1424 ADDR_ASSERT(IsPow2(yHi));
1425
1426 if (xHi == 0)
1427 {
1428 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1429 pEquation->xor1[i].channel = 1;
1430 pEquation->xor1[i].valid = 1;
1431 pEquation->xor1[i].index = Log2(yHi);
1432 }
1433 else
1434 {
1435 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1436 pEquation->xor2[i].channel = 1;
1437 pEquation->xor2[i].valid = 1;
1438 pEquation->xor2[i].index = Log2(yHi);
1439 }
1440
1441 swizzle[i].y &= blkYMask;
1442 }
1443
1444 if (swizzle[i].value == 0)
1445 {
1446 bMask |= 1 << i;
1447 }
1448 }
1449 }
1450
1451 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1452 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1453
1454 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1455
1456 while (bMask != blockMask)
1457 {
1458 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1459 {
1460 if ((bMask & (1 << i)) == 0)
1461 {
1462 if (IsPow2(swizzle[i].value))
1463 {
1464 if (swizzle[i].x != 0)
1465 {
1466 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1467 xMask |= swizzle[i].x;
1468
1469 const UINT_32 xLog2 = Log2(swizzle[i].x);
1470
1471 ADDR_ASSERT(xLog2 < blkXLog2);
1472
1473 pEquation->addr[i].channel = 0;
1474 pEquation->addr[i].valid = 1;
1475 pEquation->addr[i].index = xLog2 + elemLog2;
1476 }
1477 else
1478 {
1479 ADDR_ASSERT(swizzle[i].y != 0);
1480 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1481 yMask |= swizzle[i].y;
1482
1483 pEquation->addr[i].channel = 1;
1484 pEquation->addr[i].valid = 1;
1485 pEquation->addr[i].index = Log2(swizzle[i].y);
1486
1487 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1488 }
1489
1490 swizzle[i].value = 0;
1491 bMask |= 1 << i;
1492 }
1493 else
1494 {
1495 const UINT_32 x = swizzle[i].x & xMask;
1496 const UINT_32 y = swizzle[i].y & yMask;
1497
1498 if (x != 0)
1499 {
1500 ADDR_ASSERT(IsPow2(x));
1501
1502 if (pEquation->xor1[i].value == 0)
1503 {
1504 pEquation->xor1[i].channel = 0;
1505 pEquation->xor1[i].valid = 1;
1506 pEquation->xor1[i].index = Log2(x) + elemLog2;
1507 }
1508 else
1509 {
1510 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1511 pEquation->xor2[i].channel = 0;
1512 pEquation->xor2[i].valid = 1;
1513 pEquation->xor2[i].index = Log2(x) + elemLog2;
1514 }
1515 }
1516
1517 if (y != 0)
1518 {
1519 ADDR_ASSERT(IsPow2(y));
1520
1521 if (pEquation->xor1[i].value == 0)
1522 {
1523 pEquation->xor1[i].channel = 1;
1524 pEquation->xor1[i].valid = 1;
1525 pEquation->xor1[i].index = Log2(y);
1526 }
1527 else
1528 {
1529 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1530 pEquation->xor2[i].channel = 1;
1531 pEquation->xor2[i].valid = 1;
1532 pEquation->xor2[i].index = Log2(y);
1533 }
1534 }
1535
1536 swizzle[i].x &= ~x;
1537 swizzle[i].y &= ~y;
1538 }
1539 }
1540 }
1541 }
1542
1543 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1544 }
1545 else if (IsEquationCompatibleThick(rsrcType, swMode))
1546 {
1547 const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1548 const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1549 const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1550 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1551 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1552 const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1553
1554 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1555 UINT_32 xMask = 0;
1556 UINT_32 yMask = 0;
1557 UINT_32 zMask = 0;
1558 UINT_32 bMask = (1 << elemLog2) - 1;
1559
1560 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1561 {
1562 if (IsPow2(pSwizzle[i].value))
1563 {
1564 if (pSwizzle[i].x != 0)
1565 {
1566 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1567 xMask |= pSwizzle[i].x;
1568
1569 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1570
1571 ADDR_ASSERT(xLog2 < blkXLog2);
1572
1573 pEquation->addr[i].channel = 0;
1574 pEquation->addr[i].valid = 1;
1575 pEquation->addr[i].index = xLog2 + elemLog2;
1576 }
1577 else if (pSwizzle[i].y != 0)
1578 {
1579 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1580 yMask |= pSwizzle[i].y;
1581
1582 pEquation->addr[i].channel = 1;
1583 pEquation->addr[i].valid = 1;
1584 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1585
1586 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1587 }
1588 else
1589 {
1590 ADDR_ASSERT(pSwizzle[i].z != 0);
1591 ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1592 zMask |= pSwizzle[i].z;
1593
1594 pEquation->addr[i].channel = 2;
1595 pEquation->addr[i].valid = 1;
1596 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1597
1598 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1599 }
1600
1601 swizzle[i].value = 0;
1602 bMask |= 1 << i;
1603 }
1604 else
1605 {
1606 swizzle[i].x = pSwizzle[i].x;
1607 swizzle[i].y = pSwizzle[i].y;
1608 swizzle[i].z = pSwizzle[i].z;
1609 swizzle[i].s = 0;
1610
1611 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1612
1613 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1614 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1615 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1616
1617 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1618
1619 if (xHi != 0)
1620 {
1621 ADDR_ASSERT(IsPow2(xHi));
1622 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1623
1624 pEquation->xor1[i].channel = 0;
1625 pEquation->xor1[i].valid = 1;
1626 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1627
1628 swizzle[i].x &= blkXMask;
1629 }
1630
1631 if (yHi != 0)
1632 {
1633 ADDR_ASSERT(IsPow2(yHi));
1634
1635 if (pEquation->xor1[i].value == 0)
1636 {
1637 pEquation->xor1[i].channel = 1;
1638 pEquation->xor1[i].valid = 1;
1639 pEquation->xor1[i].index = Log2(yHi);
1640 }
1641 else
1642 {
1643 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1644 pEquation->xor2[i].channel = 1;
1645 pEquation->xor2[i].valid = 1;
1646 pEquation->xor2[i].index = Log2(yHi);
1647 }
1648
1649 swizzle[i].y &= blkYMask;
1650 }
1651
1652 if (zHi != 0)
1653 {
1654 ADDR_ASSERT(IsPow2(zHi));
1655
1656 if (pEquation->xor1[i].value == 0)
1657 {
1658 pEquation->xor1[i].channel = 2;
1659 pEquation->xor1[i].valid = 1;
1660 pEquation->xor1[i].index = Log2(zHi);
1661 }
1662 else
1663 {
1664 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1665 pEquation->xor2[i].channel = 2;
1666 pEquation->xor2[i].valid = 1;
1667 pEquation->xor2[i].index = Log2(zHi);
1668 }
1669
1670 swizzle[i].z &= blkZMask;
1671 }
1672
1673 if (swizzle[i].value == 0)
1674 {
1675 bMask |= 1 << i;
1676 }
1677 }
1678 }
1679
1680 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1681 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1682
1683 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1684
1685 while (bMask != blockMask)
1686 {
1687 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1688 {
1689 if ((bMask & (1 << i)) == 0)
1690 {
1691 if (IsPow2(swizzle[i].value))
1692 {
1693 if (swizzle[i].x != 0)
1694 {
1695 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1696 xMask |= swizzle[i].x;
1697
1698 const UINT_32 xLog2 = Log2(swizzle[i].x);
1699
1700 ADDR_ASSERT(xLog2 < blkXLog2);
1701
1702 pEquation->addr[i].channel = 0;
1703 pEquation->addr[i].valid = 1;
1704 pEquation->addr[i].index = xLog2 + elemLog2;
1705 }
1706 else if (swizzle[i].y != 0)
1707 {
1708 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1709 yMask |= swizzle[i].y;
1710
1711 pEquation->addr[i].channel = 1;
1712 pEquation->addr[i].valid = 1;
1713 pEquation->addr[i].index = Log2(swizzle[i].y);
1714
1715 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1716 }
1717 else
1718 {
1719 ADDR_ASSERT(swizzle[i].z != 0);
1720 ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1721 zMask |= swizzle[i].z;
1722
1723 pEquation->addr[i].channel = 2;
1724 pEquation->addr[i].valid = 1;
1725 pEquation->addr[i].index = Log2(swizzle[i].z);
1726
1727 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1728 }
1729
1730 swizzle[i].value = 0;
1731 bMask |= 1 << i;
1732 }
1733 else
1734 {
1735 const UINT_32 x = swizzle[i].x & xMask;
1736 const UINT_32 y = swizzle[i].y & yMask;
1737 const UINT_32 z = swizzle[i].z & zMask;
1738
1739 if (x != 0)
1740 {
1741 ADDR_ASSERT(IsPow2(x));
1742
1743 if (pEquation->xor1[i].value == 0)
1744 {
1745 pEquation->xor1[i].channel = 0;
1746 pEquation->xor1[i].valid = 1;
1747 pEquation->xor1[i].index = Log2(x) + elemLog2;
1748 }
1749 else
1750 {
1751 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1752 pEquation->xor2[i].channel = 0;
1753 pEquation->xor2[i].valid = 1;
1754 pEquation->xor2[i].index = Log2(x) + elemLog2;
1755 }
1756 }
1757
1758 if (y != 0)
1759 {
1760 ADDR_ASSERT(IsPow2(y));
1761
1762 if (pEquation->xor1[i].value == 0)
1763 {
1764 pEquation->xor1[i].channel = 1;
1765 pEquation->xor1[i].valid = 1;
1766 pEquation->xor1[i].index = Log2(y);
1767 }
1768 else
1769 {
1770 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1771 pEquation->xor2[i].channel = 1;
1772 pEquation->xor2[i].valid = 1;
1773 pEquation->xor2[i].index = Log2(y);
1774 }
1775 }
1776
1777 if (z != 0)
1778 {
1779 ADDR_ASSERT(IsPow2(z));
1780
1781 if (pEquation->xor1[i].value == 0)
1782 {
1783 pEquation->xor1[i].channel = 2;
1784 pEquation->xor1[i].valid = 1;
1785 pEquation->xor1[i].index = Log2(z);
1786 }
1787 else
1788 {
1789 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1790 pEquation->xor2[i].channel = 2;
1791 pEquation->xor2[i].valid = 1;
1792 pEquation->xor2[i].index = Log2(z);
1793 }
1794 }
1795
1796 swizzle[i].x &= ~x;
1797 swizzle[i].y &= ~y;
1798 swizzle[i].z &= ~z;
1799 }
1800 }
1801 }
1802 }
1803
1804 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1805 }
1806 }
1807
1808 /**
1809 ************************************************************************************************************************
1810 * Gfx10Lib::InitEquationTable
1811 *
1812 * @brief
1813 * Initialize Equation table.
1814 *
1815 * @return
1816 * N/A
1817 ************************************************************************************************************************
1818 */
1819 VOID Gfx10Lib::InitEquationTable()
1820 {
1821 memset(m_equationTable, 0, sizeof(m_equationTable));
1822
1823 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1824 {
1825 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1826
1827 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
1828 {
1829 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1830
1831 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1832 {
1833 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1834 const UINT_64* pPattern = GetSwizzlePattern(swMode, rsrcType, elemLog2, 1);
1835
1836 if (pPattern != NULL)
1837 {
1838 ADDR_EQUATION equation = {};
1839
1840 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPattern, &equation);
1841
1842 equationIndex = m_numEquations;
1843 ADDR_ASSERT(equationIndex < EquationTableSize);
1844
1845 m_equationTable[equationIndex] = equation;
1846
1847 m_numEquations++;
1848 }
1849
1850 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1851 }
1852 }
1853 }
1854 }
1855
1856 /**
1857 ************************************************************************************************************************
1858 * Gfx10Lib::HwlGetEquationIndex
1859 *
1860 * @brief
1861 * Interface function stub of GetEquationIndex
1862 *
1863 * @return
1864 * ADDR_E_RETURNCODE
1865 ************************************************************************************************************************
1866 */
1867 UINT_32 Gfx10Lib::HwlGetEquationIndex(
1868 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
1869 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
1870 ) const
1871 {
1872 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1873
1874 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1875 (pIn->resourceType == ADDR_RSRC_TEX_3D))
1876 {
1877 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1878 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
1879 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
1880
1881 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1882 }
1883
1884 if (pOut->pMipInfo != NULL)
1885 {
1886 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1887 {
1888 pOut->pMipInfo[i].equationIndex = equationIdx;
1889 }
1890 }
1891
1892 return equationIdx;
1893 }
1894
1895 /**
1896 ************************************************************************************************************************
1897 * Gfx10Lib::IsValidDisplaySwizzleMode
1898 *
1899 * @brief
1900 * Check if a swizzle mode is supported by display engine
1901 *
1902 * @return
1903 * TRUE is swizzle mode is supported by display engine
1904 ************************************************************************************************************************
1905 */
1906 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
1907 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
1908 ) const
1909 {
1910 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
1911
1912 BOOL_32 support = FALSE;
1913
1914 if (m_settings.isDcn2)
1915 {
1916 switch (pIn->swizzleMode)
1917 {
1918 case ADDR_SW_4KB_D:
1919 case ADDR_SW_4KB_D_X:
1920 case ADDR_SW_64KB_D:
1921 case ADDR_SW_64KB_D_T:
1922 case ADDR_SW_64KB_D_X:
1923 support = (pIn->bpp == 64);
1924 break;
1925
1926 case ADDR_SW_LINEAR:
1927 case ADDR_SW_4KB_S:
1928 case ADDR_SW_4KB_S_X:
1929 case ADDR_SW_64KB_S:
1930 case ADDR_SW_64KB_S_T:
1931 case ADDR_SW_64KB_S_X:
1932 case ADDR_SW_64KB_R_X:
1933 support = (pIn->bpp <= 64);
1934 break;
1935
1936 default:
1937 break;
1938 }
1939 }
1940 else
1941 {
1942 ADDR_NOT_IMPLEMENTED();
1943 }
1944
1945 return support;
1946 }
1947
1948 /**
1949 ************************************************************************************************************************
1950 * Gfx10Lib::GetMaxNumMipsInTail
1951 *
1952 * @brief
1953 * Return max number of mips in tails
1954 *
1955 * @return
1956 * Max number of mips in tails
1957 ************************************************************************************************************************
1958 */
1959 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
1960 UINT_32 blockSizeLog2, ///< block size log2
1961 BOOL_32 isThin ///< is thin or thick
1962 ) const
1963 {
1964 UINT_32 effectiveLog2 = blockSizeLog2;
1965
1966 if (isThin == FALSE)
1967 {
1968 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
1969 }
1970
1971 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
1972 }
1973
1974 /**
1975 ************************************************************************************************************************
1976 * Gfx10Lib::HwlComputePipeBankXor
1977 *
1978 * @brief
1979 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
1980 *
1981 * @return
1982 * PipeBankXor value
1983 ************************************************************************************************************************
1984 */
1985 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
1986 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
1987 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
1988 ) const
1989 {
1990 if (IsNonPrtXor(pIn->swizzleMode))
1991 {
1992 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
1993 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
1994 const UINT_32 bankBits = GetBankXorBits(blockBits);
1995
1996 UINT_32 pipeXor = 0;
1997 UINT_32 bankXor = 0;
1998
1999 if (bankBits != 0)
2000 {
2001 if (blockBits == 16)
2002 {
2003 const UINT_32 XorPatternLen = 8;
2004 static const UINT_32 XorBank1b[XorPatternLen] = {0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80};
2005 static const UINT_32 XorBank2b[XorPatternLen] = {0x00, 0x80, 0x40, 0xC0, 0x80, 0x00, 0xC0, 0x40};
2006 static const UINT_32 XorBank3b[XorPatternLen] = {0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0};
2007
2008 const UINT_32 index = pIn->surfIndex % XorPatternLen;
2009
2010 if (bankBits == 1)
2011 {
2012 bankXor = XorBank1b[index];
2013 }
2014 else if (bankBits == 2)
2015 {
2016 bankXor = XorBank2b[index];
2017 }
2018 else
2019 {
2020 bankXor = XorBank3b[index];
2021
2022 if (bankBits == 4)
2023 {
2024 bankXor >>= (2 - pipeBits);
2025 }
2026 }
2027 }
2028 }
2029
2030 pOut->pipeBankXor = bankXor | pipeXor;
2031 }
2032 else
2033 {
2034 pOut->pipeBankXor = 0;
2035 }
2036
2037 return ADDR_OK;
2038 }
2039
2040 /**
2041 ************************************************************************************************************************
2042 * Gfx10Lib::HwlComputeSlicePipeBankXor
2043 *
2044 * @brief
2045 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2046 *
2047 * @return
2048 * PipeBankXor value
2049 ************************************************************************************************************************
2050 */
2051 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2052 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2053 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2054 ) const
2055 {
2056 if (IsNonPrtXor(pIn->swizzleMode))
2057 {
2058 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2059 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2060 const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2061
2062 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2063 }
2064 else
2065 {
2066 pOut->pipeBankXor = 0;
2067 }
2068
2069 return ADDR_OK;
2070 }
2071
2072 /**
2073 ************************************************************************************************************************
2074 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2075 *
2076 * @brief
2077 * Compute sub resource offset to support swizzle pattern
2078 *
2079 * @return
2080 * Offset
2081 ************************************************************************************************************************
2082 */
2083 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2084 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
2085 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
2086 ) const
2087 {
2088 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2089
2090 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2091
2092 return ADDR_OK;
2093 }
2094
2095 /**
2096 ************************************************************************************************************************
2097 * Gfx10Lib::ValidateNonSwModeParams
2098 *
2099 * @brief
2100 * Validate compute surface info params except swizzle mode
2101 *
2102 * @return
2103 * TRUE if parameters are valid, FALSE otherwise
2104 ************************************************************************************************************************
2105 */
2106 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2107 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2108 {
2109 BOOL_32 valid = TRUE;
2110
2111 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2112 {
2113 ADDR_ASSERT_ALWAYS();
2114 valid = FALSE;
2115 }
2116
2117 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2118 {
2119 ADDR_ASSERT_ALWAYS();
2120 valid = FALSE;
2121 }
2122
2123 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2124 const AddrResourceType rsrcType = pIn->resourceType;
2125 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2126 const BOOL_32 msaa = (pIn->numFrags > 1);
2127 const BOOL_32 display = flags.display;
2128 const BOOL_32 tex3d = IsTex3d(rsrcType);
2129 const BOOL_32 tex2d = IsTex2d(rsrcType);
2130 const BOOL_32 tex1d = IsTex1d(rsrcType);
2131 const BOOL_32 stereo = flags.qbStereo;
2132
2133 // Resource type check
2134 if (tex1d)
2135 {
2136 if (msaa || display || stereo)
2137 {
2138 ADDR_ASSERT_ALWAYS();
2139 valid = FALSE;
2140 }
2141 }
2142 else if (tex2d)
2143 {
2144 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2145 {
2146 ADDR_ASSERT_ALWAYS();
2147 valid = FALSE;
2148 }
2149 }
2150 else if (tex3d)
2151 {
2152 if (msaa || display || stereo)
2153 {
2154 ADDR_ASSERT_ALWAYS();
2155 valid = FALSE;
2156 }
2157 }
2158 else
2159 {
2160 ADDR_ASSERT_ALWAYS();
2161 valid = FALSE;
2162 }
2163
2164 return valid;
2165 }
2166
2167 /**
2168 ************************************************************************************************************************
2169 * Gfx10Lib::ValidateSwModeParams
2170 *
2171 * @brief
2172 * Validate compute surface info related to swizzle mode
2173 *
2174 * @return
2175 * TRUE if parameters are valid, FALSE otherwise
2176 ************************************************************************************************************************
2177 */
2178 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2179 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2180 {
2181 BOOL_32 valid = TRUE;
2182
2183 if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2184 {
2185 ADDR_ASSERT_ALWAYS();
2186 valid = FALSE;
2187 }
2188
2189 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2190 const AddrResourceType rsrcType = pIn->resourceType;
2191 const AddrSwizzleMode swizzle = pIn->swizzleMode;
2192 const BOOL_32 msaa = (pIn->numFrags > 1);
2193 const BOOL_32 zbuffer = flags.depth || flags.stencil;
2194 const BOOL_32 color = flags.color;
2195 const BOOL_32 display = flags.display;
2196 const BOOL_32 tex3d = IsTex3d(rsrcType);
2197 const BOOL_32 tex2d = IsTex2d(rsrcType);
2198 const BOOL_32 tex1d = IsTex1d(rsrcType);
2199 const BOOL_32 thin3d = flags.view3dAs2dArray;
2200 const BOOL_32 linear = IsLinear(swizzle);
2201 const BOOL_32 blk256B = IsBlock256b(swizzle);
2202 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2203 const BOOL_32 prt = flags.prt;
2204
2205 // Misc check
2206 if ((pIn->numFrags > 1) &&
2207 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2208 {
2209 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2210 ADDR_ASSERT_ALWAYS();
2211 valid = FALSE;
2212 }
2213
2214 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2215 {
2216 ADDR_ASSERT_ALWAYS();
2217 valid = FALSE;
2218 }
2219
2220 if ((pIn->bpp == 96) && (linear == FALSE))
2221 {
2222 ADDR_ASSERT_ALWAYS();
2223 valid = FALSE;
2224 }
2225
2226 const UINT_32 swizzleMask = 1 << swizzle;
2227
2228 // Resource type check
2229 if (tex1d)
2230 {
2231 if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2232 {
2233 ADDR_ASSERT_ALWAYS();
2234 valid = FALSE;
2235 }
2236 }
2237 else if (tex2d)
2238 {
2239 if (((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0) ||
2240 (prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)))
2241 {
2242 ADDR_ASSERT_ALWAYS();
2243 valid = FALSE;
2244 }
2245 }
2246 else if (tex3d)
2247 {
2248 if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2249 (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2250 (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2251 {
2252 ADDR_ASSERT_ALWAYS();
2253 valid = FALSE;
2254 }
2255 }
2256
2257 // Swizzle type check
2258 if (linear)
2259 {
2260 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2261 {
2262 ADDR_ASSERT_ALWAYS();
2263 valid = FALSE;
2264 }
2265 }
2266 else if (IsZOrderSwizzle(swizzle))
2267 {
2268 if ((pIn->bpp > 64) ||
2269 (msaa && (color || (pIn->bpp > 32))) ||
2270 ElemLib::IsBlockCompressed(pIn->format) ||
2271 ElemLib::IsMacroPixelPacked(pIn->format))
2272 {
2273 ADDR_ASSERT_ALWAYS();
2274 valid = FALSE;
2275 }
2276 }
2277 else if (IsStandardSwizzle(rsrcType, swizzle))
2278 {
2279 if (zbuffer || msaa)
2280 {
2281 ADDR_ASSERT_ALWAYS();
2282 valid = FALSE;
2283 }
2284 }
2285 else if (IsDisplaySwizzle(rsrcType, swizzle))
2286 {
2287 if (zbuffer || msaa)
2288 {
2289 ADDR_ASSERT_ALWAYS();
2290 valid = FALSE;
2291 }
2292 }
2293 else if (IsRtOptSwizzle(swizzle))
2294 {
2295 if (zbuffer)
2296 {
2297 ADDR_ASSERT_ALWAYS();
2298 valid = FALSE;
2299 }
2300 }
2301 else
2302 {
2303 ADDR_ASSERT_ALWAYS();
2304 valid = FALSE;
2305 }
2306
2307 // Block type check
2308 if (blk256B)
2309 {
2310 if (zbuffer || tex3d || msaa)
2311 {
2312 ADDR_ASSERT_ALWAYS();
2313 valid = FALSE;
2314 }
2315 }
2316
2317 return valid;
2318 }
2319
2320 /**
2321 ************************************************************************************************************************
2322 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2323 *
2324 * @brief
2325 * Compute surface info sanity check
2326 *
2327 * @return
2328 * Offset
2329 ************************************************************************************************************************
2330 */
2331 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2332 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2333 ) const
2334 {
2335 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2336 }
2337
2338 /**
2339 ************************************************************************************************************************
2340 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2341 *
2342 * @brief
2343 * Internal function to get suggested surface information for cliet to use
2344 *
2345 * @return
2346 * ADDR_E_RETURNCODE
2347 ************************************************************************************************************************
2348 */
2349 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2350 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2351 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2352 ) const
2353 {
2354 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2355
2356 if (pIn->flags.fmask)
2357 {
2358 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2359 pOut->resourceType = ADDR_RSRC_TEX_2D;
2360 pOut->validBlockSet.value = AddrBlockSetMacro64KB;
2361 pOut->canXor = TRUE;
2362 pOut->validSwTypeSet.value = AddrSwSetZ;
2363 pOut->clientPreferredSwSet = pOut->validSwTypeSet;
2364 pOut->validSwModeSet.value = Gfx10ZSwModeMask;
2365 }
2366 else
2367 {
2368 UINT_32 bpp = pIn->bpp;
2369 UINT_32 width = Max(pIn->width, 1u);
2370 UINT_32 height = Max(pIn->height, 1u);
2371
2372 // Set format to INVALID will skip this conversion
2373 if (pIn->format != ADDR_FMT_INVALID)
2374 {
2375 ElemMode elemMode = ADDR_UNCOMPRESSED;
2376 UINT_32 expandX, expandY;
2377
2378 // Get compression/expansion factors and element mode which indicates compression/expansion
2379 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2380 &elemMode,
2381 &expandX,
2382 &expandY);
2383
2384 UINT_32 basePitch = 0;
2385 GetElemLib()->AdjustSurfaceInfo(elemMode,
2386 expandX,
2387 expandY,
2388 &bpp,
2389 &basePitch,
2390 &width,
2391 &height);
2392 }
2393
2394 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2395 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2396 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2397 const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2398 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
2399
2400 // Pre sanity check on non swizzle mode parameters
2401 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2402 localIn.flags = pIn->flags;
2403 localIn.resourceType = pIn->resourceType;
2404 localIn.format = pIn->format;
2405 localIn.bpp = bpp;
2406 localIn.width = width;
2407 localIn.height = height;
2408 localIn.numSlices = numSlices;
2409 localIn.numMipLevels = numMipLevels;
2410 localIn.numSamples = numSamples;
2411 localIn.numFrags = numFrags;
2412
2413 if (ValidateNonSwModeParams(&localIn))
2414 {
2415 // Forbid swizzle mode(s) by client setting
2416 ADDR2_SWMODE_SET allowedSwModeSet = {};
2417 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2418 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;
2419 allowedSwModeSet.value |= pIn->forbiddenBlock.macro4KB ? 0 : Gfx10Blk4KBSwModeMask;
2420 allowedSwModeSet.value |= pIn->forbiddenBlock.macro64KB ? 0 : Gfx10Blk64KBSwModeMask;
2421
2422 if (pIn->preferredSwSet.value != 0)
2423 {
2424 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2425 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2426 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2427 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2428 }
2429
2430 if (pIn->noXor)
2431 {
2432 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2433 }
2434
2435 if (pIn->maxAlign > 0)
2436 {
2437 if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
2438 {
2439 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2440 }
2441
2442 if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
2443 {
2444 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2445 }
2446
2447 if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
2448 {
2449 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2450 }
2451 }
2452
2453 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2454 switch (pIn->resourceType)
2455 {
2456 case ADDR_RSRC_TEX_1D:
2457 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2458 break;
2459
2460 case ADDR_RSRC_TEX_2D:
2461 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2462 break;
2463
2464 case ADDR_RSRC_TEX_3D:
2465 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2466
2467 if (m_settings.supportRbPlus)
2468 {
2469 allowedSwModeSet.value &= ~Gfx10DisplaySwModeMask;
2470 }
2471
2472 if (pIn->flags.view3dAs2dArray)
2473 {
2474 allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2475 }
2476 break;
2477
2478 default:
2479 ADDR_ASSERT_ALWAYS();
2480 allowedSwModeSet.value = 0;
2481 break;
2482 }
2483
2484 if (ElemLib::IsBlockCompressed(pIn->format) ||
2485 ElemLib::IsMacroPixelPacked(pIn->format) ||
2486 (bpp > 64) ||
2487 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2488 {
2489 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2490 }
2491
2492 if (pIn->format == ADDR_FMT_32_32_32)
2493 {
2494 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2495 }
2496
2497 if (msaa)
2498 {
2499 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2500 }
2501
2502 if (pIn->flags.depth || pIn->flags.stencil)
2503 {
2504 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2505 }
2506
2507 if (pIn->flags.display)
2508 {
2509 if (m_settings.isDcn2)
2510 {
2511 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
2512 }
2513 else
2514 {
2515 ADDR_NOT_IMPLEMENTED();
2516 }
2517 }
2518
2519 if (allowedSwModeSet.value != 0)
2520 {
2521 #if DEBUG
2522 // Post sanity check, at least AddrLib should accept the output generated by its own
2523 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2524
2525 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2526 {
2527 if (validateSwModeSet & 1)
2528 {
2529 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2530 ADDR_ASSERT(ValidateSwModeParams(&localIn));
2531 }
2532
2533 validateSwModeSet >>= 1;
2534 }
2535 #endif
2536
2537 pOut->resourceType = pIn->resourceType;
2538 pOut->validSwModeSet = allowedSwModeSet;
2539 pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
2540 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet);
2541 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
2542
2543 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2544
2545 if (pOut->clientPreferredSwSet.value == 0)
2546 {
2547 pOut->clientPreferredSwSet.value = AddrSwSetAll;
2548 }
2549
2550 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
2551 {
2552 pOut->swizzleMode = ADDR_SW_LINEAR;
2553 }
2554 else
2555 {
2556 // Always ignore linear swizzle mode if there is other choice.
2557 allowedSwModeSet.swLinear = 0;
2558
2559 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet);
2560
2561 // Determine block size if there is 2 or more block type candidates
2562 if (IsPow2(allowedBlockSet.value) == FALSE)
2563 {
2564 const AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_256B, ADDR_SW_4KB, ADDR_SW_64KB};
2565 Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
2566 Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
2567 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
2568
2569 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2570 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2571 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2572 UINT_32 minSizeBlk = AddrBlockMicro;
2573 UINT_64 minSize = 0;
2574
2575 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2576 {
2577 if (allowedBlockSet.value & (1 << i))
2578 {
2579 ComputeBlockDimensionForSurf(&blkDim[i].w,
2580 &blkDim[i].h,
2581 &blkDim[i].d,
2582 bpp,
2583 numFrags,
2584 pOut->resourceType,
2585 swMode[i]);
2586
2587 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2588 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2589
2590 if ((minSize == 0) ||
2591 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
2592 {
2593 minSize = padSize[i];
2594 minSizeBlk = i;
2595 }
2596 }
2597 }
2598
2599 if ((allowedBlockSet.micro == TRUE) &&
2600 (width <= blkDim[AddrBlockMicro].w) &&
2601 (height <= blkDim[AddrBlockMicro].h))
2602 {
2603 minSizeBlk = AddrBlockMicro;
2604 }
2605
2606 if (minSizeBlk == AddrBlockMicro)
2607 {
2608 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
2609 }
2610 else if (minSizeBlk == AddrBlock4KB)
2611 {
2612 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
2613 }
2614 else
2615 {
2616 ADDR_ASSERT(minSizeBlk == AddrBlock64KB);
2617 allowedSwModeSet.value &= Gfx10Blk64KBSwModeMask;
2618 }
2619 }
2620
2621 // Block type should be determined.
2622 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet).value));
2623
2624 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
2625
2626 // Determine swizzle type if there is 2 or more swizzle type candidates
2627 if (IsPow2(allowedSwSet.value) == FALSE)
2628 {
2629 if (ElemLib::IsBlockCompressed(pIn->format))
2630 {
2631 if (allowedSwSet.sw_D)
2632 {
2633 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2634 }
2635 else if (allowedSwSet.sw_S)
2636 {
2637 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2638 }
2639 else
2640 {
2641 ADDR_ASSERT(allowedSwSet.sw_R);
2642 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2643 }
2644 }
2645 else if (ElemLib::IsMacroPixelPacked(pIn->format))
2646 {
2647 if (allowedSwSet.sw_S)
2648 {
2649 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2650 }
2651 else if (allowedSwSet.sw_D)
2652 {
2653 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2654 }
2655 else
2656 {
2657 ADDR_ASSERT(allowedSwSet.sw_R);
2658 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2659 }
2660 }
2661 else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2662 {
2663 if (pIn->flags.color && GetAllowedBlockSet(allowedSwModeSet).macro64KB && allowedSwSet.sw_D)
2664 {
2665 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2666 }
2667 else if (allowedSwSet.sw_S)
2668 {
2669 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2670 }
2671 else if (allowedSwSet.sw_R)
2672 {
2673 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2674 }
2675 else
2676 {
2677 ADDR_ASSERT(allowedSwSet.sw_Z);
2678 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2679 }
2680 }
2681 else
2682 {
2683 if (allowedSwSet.sw_R)
2684 {
2685 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2686 }
2687 else if (allowedSwSet.sw_D)
2688 {
2689 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2690 }
2691 else if (allowedSwSet.sw_S)
2692 {
2693 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2694 }
2695 else
2696 {
2697 ADDR_ASSERT(allowedSwSet.sw_Z);
2698 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2699 }
2700 }
2701 }
2702
2703 // Swizzle type should be determined.
2704 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
2705
2706 // Determine swizzle mode now - always select the "largest" swizzle mode for a given block type +
2707 // swizzle type combination. For example, for AddrBlock64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2708 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2709 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
2710 }
2711 }
2712 else
2713 {
2714 // Invalid combination...
2715 ADDR_ASSERT_ALWAYS();
2716 returnCode = ADDR_INVALIDPARAMS;
2717 }
2718 }
2719 else
2720 {
2721 // Invalid combination...
2722 ADDR_ASSERT_ALWAYS();
2723 returnCode = ADDR_INVALIDPARAMS;
2724 }
2725 }
2726
2727 return returnCode;
2728 }
2729
2730 /**
2731 ************************************************************************************************************************
2732 * Gfx10Lib::ComputeStereoInfo
2733 *
2734 * @brief
2735 * Compute height alignment and right eye pipeBankXor for stereo surface
2736 *
2737 * @return
2738 * Error code
2739 *
2740 ************************************************************************************************************************
2741 */
2742 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
2743 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
2744 UINT_32 blkHeight, ///< Block height
2745 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
2746 UINT_32* pRightXor ///< Right eye xor
2747 ) const
2748 {
2749 ADDR_E_RETURNCODE ret = ADDR_OK;
2750
2751 *pAlignY = 1;
2752 *pRightXor = 0;
2753
2754 if (IsNonPrtXor(pIn->swizzleMode))
2755 {
2756 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
2757 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
2758 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
2759 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
2760 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
2761
2762 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
2763 {
2764 UINT_32 yMax = 0;
2765 UINT_32 yPos = 0;
2766
2767 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
2768 {
2769 if (m_equationTable[eqIndex].xor1[i].value == 0)
2770 {
2771 break;
2772 }
2773
2774 ADDR_ASSERT(m_equationTable[eqIndex].xor1[i].valid == 1);
2775
2776 if ((m_equationTable[eqIndex].xor1[i].channel == 1) &&
2777 (m_equationTable[eqIndex].xor1[i].index > yMax))
2778 {
2779 yMax = m_equationTable[eqIndex].xor1[i].index;
2780 yPos = i;
2781 }
2782 }
2783
2784 const UINT_32 additionalAlign = 1 << yMax;
2785
2786 if (additionalAlign >= blkHeight)
2787 {
2788 *pAlignY *= (additionalAlign / blkHeight);
2789
2790 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
2791
2792 if ((alignedHeight >> yMax) & 1)
2793 {
2794 *pRightXor = 1 << (yPos - m_pipeInterleaveLog2);
2795 }
2796 }
2797 }
2798 else
2799 {
2800 ret = ADDR_INVALIDPARAMS;
2801 }
2802 }
2803
2804 return ret;
2805 }
2806
2807 /**
2808 ************************************************************************************************************************
2809 * Gfx10Lib::HwlComputeSurfaceInfoTiled
2810 *
2811 * @brief
2812 * Internal function to calculate alignment for tiled surface
2813 *
2814 * @return
2815 * ADDR_E_RETURNCODE
2816 ************************************************************************************************************************
2817 */
2818 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
2819 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
2820 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
2821 ) const
2822 {
2823 ADDR_E_RETURNCODE ret;
2824
2825 if (IsBlock256b(pIn->swizzleMode))
2826 {
2827 ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
2828 }
2829 else
2830 {
2831 ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
2832 }
2833
2834 return ret;
2835 }
2836
2837 /**
2838 ************************************************************************************************************************
2839 * Gfx10Lib::ComputeSurfaceInfoMicroTiled
2840 *
2841 * @brief
2842 * Internal function to calculate alignment for micro tiled surface
2843 *
2844 * @return
2845 * ADDR_E_RETURNCODE
2846 ************************************************************************************************************************
2847 */
2848 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
2849 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
2850 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
2851 ) const
2852 {
2853 ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
2854 &pOut->blockHeight,
2855 &pOut->blockSlices,
2856 pIn->bpp,
2857 pIn->numFrags,
2858 pIn->resourceType,
2859 pIn->swizzleMode);
2860
2861 if (ret == ADDR_OK)
2862 {
2863 pOut->mipChainPitch = 0;
2864 pOut->mipChainHeight = 0;
2865 pOut->mipChainSlice = 0;
2866 pOut->epitchIsHeight = FALSE;
2867 pOut->mipChainInTail = FALSE;
2868 pOut->firstMipIdInTail = pIn->numMipLevels;
2869
2870 const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
2871
2872 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
2873 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
2874 pOut->numSlices = pIn->numSlices;
2875 pOut->baseAlign = blockSize;
2876
2877 if (pIn->numMipLevels > 1)
2878 {
2879 const UINT_32 mip0Width = pIn->width;
2880 const UINT_32 mip0Height = pIn->height;
2881 UINT_64 mipSliceSize = 0;
2882
2883 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
2884 {
2885 UINT_32 mipWidth, mipHeight;
2886
2887 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
2888
2889 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);
2890 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
2891
2892 if (pOut->pMipInfo != NULL)
2893 {
2894 pOut->pMipInfo[i].pitch = mipActualWidth;
2895 pOut->pMipInfo[i].height = mipActualHeight;
2896 pOut->pMipInfo[i].depth = 1;
2897 pOut->pMipInfo[i].offset = mipSliceSize;
2898 pOut->pMipInfo[i].mipTailOffset = 0;
2899 pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
2900 }
2901
2902 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
2903 }
2904
2905 pOut->sliceSize = mipSliceSize;
2906 pOut->surfSize = mipSliceSize * pOut->numSlices;
2907 }
2908 else
2909 {
2910 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
2911 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
2912
2913 if (pOut->pMipInfo != NULL)
2914 {
2915 pOut->pMipInfo[0].pitch = pOut->pitch;
2916 pOut->pMipInfo[0].height = pOut->height;
2917 pOut->pMipInfo[0].depth = 1;
2918 pOut->pMipInfo[0].offset = 0;
2919 pOut->pMipInfo[0].mipTailOffset = 0;
2920 pOut->pMipInfo[0].macroBlockOffset = 0;
2921 }
2922 }
2923
2924 }
2925
2926 return ret;
2927 }
2928
2929 /**
2930 ************************************************************************************************************************
2931 * Gfx10Lib::ComputeSurfaceInfoMacroTiled
2932 *
2933 * @brief
2934 * Internal function to calculate alignment for macro tiled surface
2935 *
2936 * @return
2937 * ADDR_E_RETURNCODE
2938 ************************************************************************************************************************
2939 */
2940 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
2941 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
2942 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
2943 ) const
2944 {
2945 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
2946 &pOut->blockHeight,
2947 &pOut->blockSlices,
2948 pIn->bpp,
2949 pIn->numFrags,
2950 pIn->resourceType,
2951 pIn->swizzleMode);
2952
2953 if (returnCode == ADDR_OK)
2954 {
2955 UINT_32 heightAlign = pOut->blockHeight;
2956
2957 if (pIn->flags.qbStereo)
2958 {
2959 UINT_32 rightXor = 0;
2960 UINT_32 alignY = 1;
2961
2962 returnCode = ComputeStereoInfo(pIn, heightAlign, &alignY, &rightXor);
2963
2964 if (returnCode == ADDR_OK)
2965 {
2966 pOut->pStereoInfo->rightSwizzle = rightXor;
2967
2968 heightAlign *= alignY;
2969 }
2970 }
2971
2972 if (returnCode == ADDR_OK)
2973 {
2974 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
2975 pOut->mipChainPitch = 0;
2976 pOut->mipChainHeight = 0;
2977 pOut->mipChainSlice = 0;
2978 pOut->epitchIsHeight = FALSE;
2979 pOut->mipChainInTail = FALSE;
2980 pOut->firstMipIdInTail = pIn->numMipLevels;
2981
2982 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
2983 const UINT_32 blockSize = 1 << blockSizeLog2;
2984
2985 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
2986 pOut->height = PowTwoAlign(pIn->height, heightAlign);
2987 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
2988 pOut->baseAlign = blockSize;
2989
2990 if (pIn->numMipLevels > 1)
2991 {
2992 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
2993 pIn->swizzleMode,
2994 pOut->blockWidth,
2995 pOut->blockHeight,
2996 pOut->blockSlices);
2997 const UINT_32 mip0Width = pIn->width;
2998 const UINT_32 mip0Height = pIn->height;
2999 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3000 const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;
3001 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3002 const UINT_32 index = Log2(pIn->bpp >> 3);
3003 UINT_32 firstMipInTail = pIn->numMipLevels;
3004 UINT_64 mipChainSliceSize = 0;
3005 UINT_64 mipSize[MaxMipLevels];
3006 UINT_64 mipSliceSize[MaxMipLevels];
3007
3008 Dim3d fixedTailMaxDim = tailMaxDim;
3009
3010 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3011 {
3012 fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3013 fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3014 }
3015
3016 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3017 {
3018 UINT_32 mipWidth, mipHeight, mipDepth;
3019
3020 GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3021
3022 if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3023 {
3024 firstMipInTail = i;
3025 mipChainSliceSize += blockSize / pOut->blockSlices;
3026 break;
3027 }
3028 else
3029 {
3030 const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);
3031 const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);
3032 const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);
3033 const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3034
3035 mipSize[i] = sliceSize * depth;
3036 mipSliceSize[i] = sliceSize * pOut->blockSlices;
3037 mipChainSliceSize += sliceSize;
3038
3039 if (pOut->pMipInfo != NULL)
3040 {
3041 pOut->pMipInfo[i].pitch = pitch;
3042 pOut->pMipInfo[i].height = height;
3043 pOut->pMipInfo[i].depth = depth;
3044 }
3045 }
3046 }
3047
3048 pOut->sliceSize = mipChainSliceSize;
3049 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
3050 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
3051 pOut->firstMipIdInTail = firstMipInTail;
3052
3053 if (pOut->pMipInfo != NULL)
3054 {
3055 UINT_64 offset = 0;
3056 UINT_64 macroBlkOffset = 0;
3057 UINT_32 tailMaxDepth = 0;
3058
3059 if (firstMipInTail != pIn->numMipLevels)
3060 {
3061 UINT_32 mipWidth, mipHeight;
3062
3063 GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3064 &mipWidth, &mipHeight, &tailMaxDepth);
3065
3066 offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3067 macroBlkOffset = blockSize;
3068 }
3069
3070 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3071 {
3072 pOut->pMipInfo[i].offset = offset;
3073 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3074 pOut->pMipInfo[i].mipTailOffset = 0;
3075
3076 offset += mipSize[i];
3077 macroBlkOffset += mipSliceSize[i];
3078 }
3079
3080 UINT_32 pitch = tailMaxDim.w;
3081 UINT_32 height = tailMaxDim.h;
3082 UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3083
3084 tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3085
3086 for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3087 {
3088 const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);
3089 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3090
3091 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
3092 pOut->pMipInfo[i].mipTailOffset = mipOffset;
3093 pOut->pMipInfo[i].macroBlockOffset = 0;
3094
3095 pOut->pMipInfo[i].pitch = pitch;
3096 pOut->pMipInfo[i].height = height;
3097 pOut->pMipInfo[i].depth = depth;
3098
3099 UINT_32 mipX = ((mipOffset >> 9) & 1) |
3100 ((mipOffset >> 10) & 2) |
3101 ((mipOffset >> 11) & 4) |
3102 ((mipOffset >> 12) & 8) |
3103 ((mipOffset >> 13) & 16) |
3104 ((mipOffset >> 14) & 32);
3105 UINT_32 mipY = ((mipOffset >> 8) & 1) |
3106 ((mipOffset >> 9) & 2) |
3107 ((mipOffset >> 10) & 4) |
3108 ((mipOffset >> 11) & 8) |
3109 ((mipOffset >> 12) & 16) |
3110 ((mipOffset >> 13) & 32);
3111
3112 if (blockSizeLog2 & 1)
3113 {
3114 const UINT_32 temp = mipX;
3115 mipX = mipY;
3116 mipY = temp;
3117
3118 if (index & 1)
3119 {
3120 mipY = (mipY << 1) | (mipX & 1);
3121 mipX = mipX >> 1;
3122 }
3123 }
3124
3125 if (isThin)
3126 {
3127 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3128 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3129 pOut->pMipInfo[i].mipTailCoordZ = 0;
3130
3131 pitch = Max(pitch >> 1, Block256_2d[index].w);
3132 height = Max(height >> 1, Block256_2d[index].h);
3133 depth = 1;
3134 }
3135 else
3136 {
3137 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3138 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3139 pOut->pMipInfo[i].mipTailCoordZ = 0;
3140
3141 pitch = Max(pitch >> 1, Block256_3d[index].w);
3142 height = Max(height >> 1, Block256_3d[index].h);
3143 depth = PowTwoAlign(Max(depth >> 1, 1u), Block256_3d[index].d);
3144 }
3145 }
3146 }
3147 }
3148 else
3149 {
3150 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3151 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3152
3153 if (pOut->pMipInfo != NULL)
3154 {
3155 pOut->pMipInfo[0].pitch = pOut->pitch;
3156 pOut->pMipInfo[0].height = pOut->height;
3157 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3158 pOut->pMipInfo[0].offset = 0;
3159 pOut->pMipInfo[0].mipTailOffset = 0;
3160 pOut->pMipInfo[0].macroBlockOffset = 0;
3161 pOut->pMipInfo[0].mipTailCoordX = 0;
3162 pOut->pMipInfo[0].mipTailCoordY = 0;
3163 pOut->pMipInfo[0].mipTailCoordZ = 0;
3164 }
3165 }
3166 }
3167 }
3168
3169 return returnCode;
3170 }
3171
3172 /**
3173 ************************************************************************************************************************
3174 * Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3175 *
3176 * @brief
3177 * Internal function to calculate address from coord for tiled swizzle surface
3178 *
3179 * @return
3180 * ADDR_E_RETURNCODE
3181 ************************************************************************************************************************
3182 */
3183 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3184 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3185 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3186 ) const
3187 {
3188 ADDR_E_RETURNCODE ret;
3189
3190 if (IsBlock256b(pIn->swizzleMode))
3191 {
3192 ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3193 }
3194 else
3195 {
3196 ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3197 }
3198
3199 return ret;
3200 }
3201
3202 /**
3203 ************************************************************************************************************************
3204 * Gfx10Lib::ComputeOffsetFromEquation
3205 *
3206 * @brief
3207 * Compute offset from equation
3208 *
3209 * @return
3210 * Offset
3211 ************************************************************************************************************************
3212 */
3213 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3214 const ADDR_EQUATION* pEq, ///< Equation
3215 UINT_32 x, ///< x coord in bytes
3216 UINT_32 y, ///< y coord in pixel
3217 UINT_32 z ///< z coord in slice
3218 ) const
3219 {
3220 UINT_32 offset = 0;
3221
3222 for (UINT_32 i = 0; i < pEq->numBits; i++)
3223 {
3224 UINT_32 v = 0;
3225
3226 if (pEq->addr[i].valid)
3227 {
3228 if (pEq->addr[i].channel == 0)
3229 {
3230 v ^= (x >> pEq->addr[i].index) & 1;
3231 }
3232 else if (pEq->addr[i].channel == 1)
3233 {
3234 v ^= (y >> pEq->addr[i].index) & 1;
3235 }
3236 else
3237 {
3238 ADDR_ASSERT(pEq->addr[i].channel == 2);
3239 v ^= (z >> pEq->addr[i].index) & 1;
3240 }
3241 }
3242
3243 if (pEq->xor1[i].valid)
3244 {
3245 if (pEq->xor1[i].channel == 0)
3246 {
3247 v ^= (x >> pEq->xor1[i].index) & 1;
3248 }
3249 else if (pEq->xor1[i].channel == 1)
3250 {
3251 v ^= (y >> pEq->xor1[i].index) & 1;
3252 }
3253 else
3254 {
3255 ADDR_ASSERT(pEq->xor1[i].channel == 2);
3256 v ^= (z >> pEq->xor1[i].index) & 1;
3257 }
3258 }
3259
3260 if (pEq->xor2[i].valid)
3261 {
3262 if (pEq->xor2[i].channel == 0)
3263 {
3264 v ^= (x >> pEq->xor2[i].index) & 1;
3265 }
3266 else if (pEq->xor2[i].channel == 1)
3267 {
3268 v ^= (y >> pEq->xor2[i].index) & 1;
3269 }
3270 else
3271 {
3272 ADDR_ASSERT(pEq->xor2[i].channel == 2);
3273 v ^= (z >> pEq->xor2[i].index) & 1;
3274 }
3275 }
3276
3277 offset |= (v << i);
3278 }
3279
3280 return offset;
3281 }
3282
3283 /**
3284 ************************************************************************************************************************
3285 * Gfx10Lib::ComputeOffsetFromSwizzlePattern
3286 *
3287 * @brief
3288 * Compute offset from swizzle pattern
3289 *
3290 * @return
3291 * Offset
3292 ************************************************************************************************************************
3293 */
3294 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3295 const UINT_64* pPattern, ///< Swizzle pattern
3296 UINT_32 numBits, ///< Number of bits in pattern
3297 UINT_32 x, ///< x coord in pixel
3298 UINT_32 y, ///< y coord in pixel
3299 UINT_32 z, ///< z coord in slice
3300 UINT_32 s ///< sample id
3301 ) const
3302 {
3303 UINT_32 offset = 0;
3304 const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3305
3306 for (UINT_32 i = 0; i < numBits; i++)
3307 {
3308 UINT_32 v = 0;
3309
3310 if (pSwizzlePattern[i].x != 0)
3311 {
3312 UINT_16 mask = pSwizzlePattern[i].x;
3313 UINT_32 xBits = x;
3314
3315 while (mask != 0)
3316 {
3317 if (mask & 1)
3318 {
3319 v ^= xBits & 1;
3320 }
3321
3322 xBits >>= 1;
3323 mask >>= 1;
3324 }
3325 }
3326
3327 if (pSwizzlePattern[i].y != 0)
3328 {
3329 UINT_16 mask = pSwizzlePattern[i].y;
3330 UINT_32 yBits = y;
3331
3332 while (mask != 0)
3333 {
3334 if (mask & 1)
3335 {
3336 v ^= yBits & 1;
3337 }
3338
3339 yBits >>= 1;
3340 mask >>= 1;
3341 }
3342 }
3343
3344 if (pSwizzlePattern[i].z != 0)
3345 {
3346 UINT_16 mask = pSwizzlePattern[i].z;
3347 UINT_32 zBits = z;
3348
3349 while (mask != 0)
3350 {
3351 if (mask & 1)
3352 {
3353 v ^= zBits & 1;
3354 }
3355
3356 zBits >>= 1;
3357 mask >>= 1;
3358 }
3359 }
3360
3361 if (pSwizzlePattern[i].s != 0)
3362 {
3363 UINT_16 mask = pSwizzlePattern[i].s;
3364 UINT_32 sBits = s;
3365
3366 while (mask != 0)
3367 {
3368 if (mask & 1)
3369 {
3370 v ^= sBits & 1;
3371 }
3372
3373 sBits >>= 1;
3374 mask >>= 1;
3375 }
3376 }
3377
3378 offset |= (v << i);
3379 }
3380
3381 return offset;
3382 }
3383
3384 /**
3385 ************************************************************************************************************************
3386 * Gfx10Lib::GetSwizzlePattern
3387 *
3388 * @brief
3389 * Get swizzle pattern
3390 *
3391 * @return
3392 * Swizzle pattern
3393 ************************************************************************************************************************
3394 */
3395 const UINT_64* Gfx10Lib::GetSwizzlePattern(
3396 AddrSwizzleMode swizzleMode, ///< Swizzle mode
3397 AddrResourceType resourceType, ///< Resource type
3398 UINT_32 elemLog2, ///< Element size in bytes log2
3399 UINT_32 numFrag ///< Number of fragment
3400 ) const
3401 {
3402 const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
3403 const UINT_64* pSwizzlePattern = NULL;
3404 const UINT_32 swizzleMask = 1 << swizzleMode;
3405
3406 if (IsLinear(swizzleMode))
3407 {
3408 pSwizzlePattern = NULL;
3409 }
3410 else if (resourceType == ADDR_RSRC_TEX_3D)
3411 {
3412 ADDR_ASSERT(numFrag == 1);
3413
3414 if ((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0)
3415 {
3416 pSwizzlePattern = NULL;
3417 }
3418 else if (IsRtOptSwizzle(swizzleMode))
3419 {
3420 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_R_X_1xaa_RBPLUS[index] : SW_64K_R_X_1xaa[index];
3421 }
3422 else if (IsZOrderSwizzle(swizzleMode))
3423 {
3424 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_Z_X_1xaa_RBPLUS[index] : SW_64K_Z_X_1xaa[index];
3425 }
3426 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3427 {
3428 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
3429 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_D3_X_RBPLUS[index] : SW_64K_D3_X[index];
3430 }
3431 else
3432 {
3433 ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
3434
3435 if (IsBlock4kb(swizzleMode))
3436 {
3437 if (swizzleMode == ADDR_SW_4KB_S)
3438 {
3439 pSwizzlePattern = m_settings.supportRbPlus ? SW_4K_S3_RBPLUS[index] : SW_4K_S3[index];
3440 }
3441 else
3442 {
3443 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3444 pSwizzlePattern = m_settings.supportRbPlus ? SW_4K_S3_X_RBPLUS[index] : SW_4K_S3_X[index];
3445 }
3446 }
3447 else
3448 {
3449 if (swizzleMode == ADDR_SW_64KB_S)
3450 {
3451 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_S3_RBPLUS[index] : SW_64K_S3[index];
3452 }
3453 else if (swizzleMode == ADDR_SW_64KB_S_X)
3454 {
3455 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_S3_X_RBPLUS[index] : SW_64K_S3_X[index];
3456 }
3457 else
3458 {
3459 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3460 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_S3_T_RBPLUS[index] : SW_64K_S3_T[index];
3461 }
3462 }
3463 }
3464
3465 }
3466 else
3467 {
3468 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
3469 {
3470 pSwizzlePattern = NULL;
3471 }
3472 else if (IsBlock256b(swizzleMode))
3473 {
3474 if (swizzleMode == ADDR_SW_256B_S)
3475 {
3476 pSwizzlePattern = m_settings.supportRbPlus ? SW_256_S_RBPLUS[index] : SW_256_S[index];
3477 }
3478 else
3479 {
3480 ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
3481 pSwizzlePattern = m_settings.supportRbPlus ? SW_256_D_RBPLUS[index] : SW_256_D[index];
3482 }
3483 }
3484 else if (IsBlock4kb(swizzleMode))
3485 {
3486 if (IsStandardSwizzle(resourceType, swizzleMode))
3487 {
3488 if (swizzleMode == ADDR_SW_4KB_S)
3489 {
3490 pSwizzlePattern = m_settings.supportRbPlus ? SW_4K_S_RBPLUS[index] : SW_4K_S[index];
3491 }
3492 else
3493 {
3494 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3495 pSwizzlePattern = m_settings.supportRbPlus ? SW_4K_S_X_RBPLUS[index] : SW_4K_S_X[index];
3496 }
3497 }
3498 else
3499 {
3500 if (swizzleMode == ADDR_SW_4KB_D)
3501 {
3502 pSwizzlePattern = m_settings.supportRbPlus ? SW_4K_D_RBPLUS[index] : SW_4K_D[index];
3503 }
3504 else
3505 {
3506 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
3507 pSwizzlePattern = m_settings.supportRbPlus ? SW_4K_D_X_RBPLUS[index] : SW_4K_D_X[index];
3508 }
3509 }
3510 }
3511 else
3512 {
3513 if (IsRtOptSwizzle(swizzleMode))
3514 {
3515 if (numFrag == 1)
3516 {
3517 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_R_X_1xaa_RBPLUS[index] : SW_64K_R_X_1xaa[index];
3518 }
3519 else if (numFrag == 2)
3520 {
3521 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_R_X_2xaa_RBPLUS[index] : SW_64K_R_X_2xaa[index];
3522 }
3523 else if (numFrag == 4)
3524 {
3525 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_R_X_4xaa_RBPLUS[index] : SW_64K_R_X_4xaa[index];
3526 }
3527 else
3528 {
3529 ADDR_ASSERT(numFrag == 8);
3530 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_R_X_8xaa_RBPLUS[index] : SW_64K_R_X_8xaa[index];
3531 }
3532 }
3533 else if (IsZOrderSwizzle(swizzleMode))
3534 {
3535 if (numFrag == 1)
3536 {
3537 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_Z_X_1xaa_RBPLUS[index] : SW_64K_Z_X_1xaa[index];
3538 }
3539 else if (numFrag == 2)
3540 {
3541 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_Z_X_2xaa_RBPLUS[index] : SW_64K_Z_X_2xaa[index];
3542 }
3543 else if (numFrag == 4)
3544 {
3545 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_Z_X_4xaa_RBPLUS[index] : SW_64K_Z_X_4xaa[index];
3546 }
3547 else
3548 {
3549 ADDR_ASSERT(numFrag == 8);
3550 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_Z_X_8xaa_RBPLUS[index] : SW_64K_Z_X_8xaa[index];
3551 }
3552 }
3553 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3554 {
3555 if (swizzleMode == ADDR_SW_64KB_D)
3556 {
3557 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_D_RBPLUS[index] : SW_64K_D[index];
3558 }
3559 else if (swizzleMode == ADDR_SW_64KB_D_X)
3560 {
3561 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_D_X_RBPLUS[index] : SW_64K_D_X[index];
3562 }
3563 else
3564 {
3565 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
3566 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_D_T_RBPLUS[index] : SW_64K_D_T[index];
3567 }
3568 }
3569 else
3570 {
3571 if (swizzleMode == ADDR_SW_64KB_S)
3572 {
3573 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_S_RBPLUS[index] : SW_64K_S[index];
3574 }
3575 else if (swizzleMode == ADDR_SW_64KB_S_X)
3576 {
3577 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_S_X_RBPLUS[index] : SW_64K_S_X[index];
3578 }
3579 else
3580 {
3581 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3582 pSwizzlePattern = m_settings.supportRbPlus ? SW_64K_S_T_RBPLUS[index] : SW_64K_S_T[index];
3583 }
3584 }
3585 }
3586 }
3587
3588 return pSwizzlePattern;
3589 }
3590
3591 /**
3592 ************************************************************************************************************************
3593 * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
3594 *
3595 * @brief
3596 * Internal function to calculate address from coord for micro tiled swizzle surface
3597 *
3598 * @return
3599 * ADDR_E_RETURNCODE
3600 ************************************************************************************************************************
3601 */
3602 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
3603 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3604 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3605 ) const
3606 {
3607 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3608 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3609 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
3610
3611 localIn.swizzleMode = pIn->swizzleMode;
3612 localIn.flags = pIn->flags;
3613 localIn.resourceType = pIn->resourceType;
3614 localIn.bpp = pIn->bpp;
3615 localIn.width = Max(pIn->unalignedWidth, 1u);
3616 localIn.height = Max(pIn->unalignedHeight, 1u);
3617 localIn.numSlices = Max(pIn->numSlices, 1u);
3618 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3619 localIn.numSamples = Max(pIn->numSamples, 1u);
3620 localIn.numFrags = Max(pIn->numFrags, 1u);
3621 localOut.pMipInfo = mipInfo;
3622
3623 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
3624
3625 if (ret == ADDR_OK)
3626 {
3627 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3628 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3629 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3630 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3631
3632 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3633 {
3634 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3635 const UINT_32 yb = pIn->y / localOut.blockHeight;
3636 const UINT_32 xb = pIn->x / localOut.blockWidth;
3637 const UINT_32 blockIndex = yb * pb + xb;
3638 const UINT_32 blockSize = 256;
3639 const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3640 pIn->x << elemLog2,
3641 pIn->y,
3642 0);
3643 pOut->addr = localOut.sliceSize * pIn->slice +
3644 mipInfo[pIn->mipId].macroBlockOffset +
3645 (blockIndex * blockSize) +
3646 blk256Offset;
3647 }
3648 else
3649 {
3650 ret = ADDR_INVALIDPARAMS;
3651 }
3652 }
3653
3654 return ret;
3655 }
3656
3657 /**
3658 ************************************************************************************************************************
3659 * Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
3660 *
3661 * @brief
3662 * Internal function to calculate address from coord for macro tiled swizzle surface
3663 *
3664 * @return
3665 * ADDR_E_RETURNCODE
3666 ************************************************************************************************************************
3667 */
3668 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
3669 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3670 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3671 ) const
3672 {
3673 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3674 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3675 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
3676
3677 localIn.swizzleMode = pIn->swizzleMode;
3678 localIn.flags = pIn->flags;
3679 localIn.resourceType = pIn->resourceType;
3680 localIn.bpp = pIn->bpp;
3681 localIn.width = Max(pIn->unalignedWidth, 1u);
3682 localIn.height = Max(pIn->unalignedHeight, 1u);
3683 localIn.numSlices = Max(pIn->numSlices, 1u);
3684 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3685 localIn.numSamples = Max(pIn->numSamples, 1u);
3686 localIn.numFrags = Max(pIn->numFrags, 1u);
3687 localOut.pMipInfo = mipInfo;
3688
3689 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
3690
3691 if (ret == ADDR_OK)
3692 {
3693 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3694 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3695 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
3696 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
3697 const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
3698 const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
3699 (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
3700
3701 if (localIn.numFrags > 1)
3702 {
3703 const UINT_64* pPattern = GetSwizzlePattern(pIn->swizzleMode,
3704 pIn->resourceType,
3705 elemLog2,
3706 localIn.numFrags);
3707
3708 if (pPattern != NULL)
3709 {
3710 const UINT_32 pb = localOut.pitch / localOut.blockWidth;
3711 const UINT_32 yb = pIn->y / localOut.blockHeight;
3712 const UINT_32 xb = pIn->x / localOut.blockWidth;
3713 const UINT_64 blkIdx = yb * pb + xb;
3714 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(pPattern,
3715 blkSizeLog2,
3716 pIn->x,
3717 pIn->y,
3718 pIn->slice,
3719 pIn->sample);
3720 pOut->addr = (localOut.sliceSize * pIn->slice) +
3721 (blkIdx << blkSizeLog2) +
3722 (blkOffset ^ pipeBankXor);
3723 }
3724 else
3725 {
3726 ret = ADDR_INVALIDPARAMS;
3727 }
3728 }
3729 else
3730 {
3731 const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
3732 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3733 const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
3734
3735 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3736 {
3737 const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
3738 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3739 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
3740 const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
3741 const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
3742 const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
3743 const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
3744 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3745 const UINT_32 yb = pIn->y / localOut.blockHeight;
3746 const UINT_32 xb = pIn->x / localOut.blockWidth;
3747 const UINT_64 blkIdx = yb * pb + xb;
3748 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3749 x << elemLog2,
3750 y,
3751 z);
3752 pOut->addr = sliceSize * sliceId +
3753 mipInfo[pIn->mipId].macroBlockOffset +
3754 (blkIdx << blkSizeLog2) +
3755 (blkOffset ^ pipeBankXor);
3756 }
3757 else
3758 {
3759 ret = ADDR_INVALIDPARAMS;
3760 }
3761 }
3762 }
3763
3764 return ret;
3765 }
3766
3767 /**
3768 ************************************************************************************************************************
3769 * Gfx10Lib::HwlComputeMaxBaseAlignments
3770 *
3771 * @brief
3772 * Gets maximum alignments
3773 * @return
3774 * maximum alignments
3775 ************************************************************************************************************************
3776 */
3777 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
3778 {
3779 return GetBlockSize(ADDR_SW_64KB);
3780 }
3781
3782 /**
3783 ************************************************************************************************************************
3784 * Gfx10Lib::HwlComputeMaxMetaBaseAlignments
3785 *
3786 * @brief
3787 * Gets maximum alignments for metadata
3788 * @return
3789 * maximum alignments for metadata
3790 ************************************************************************************************************************
3791 */
3792 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
3793 {
3794 // Max base alignment for Htile
3795 Dim3d metaBlk = {0};
3796 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
3797 ADDR_RSRC_TEX_2D,
3798 ADDR_SW_64KB_Z_X,
3799 0,
3800 0,
3801 TRUE,
3802 &metaBlk);
3803
3804 const UINT_32 maxBaseAlignHtile = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
3805
3806 // Max base alignment for Cmask
3807 const UINT_32 maxBaseAlignCmask = GetMetaBlkSize(Gfx10DataFmask,
3808 ADDR_RSRC_TEX_2D,
3809 ADDR_SW_64KB_Z_X,
3810 0,
3811 0,
3812 TRUE,
3813 &metaBlk);
3814
3815 // Max base alignment for 2D Dcc
3816 const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
3817 {
3818 ADDR_SW_64KB_S_X,
3819 ADDR_SW_64KB_D_X,
3820 ADDR_SW_64KB_R_X,
3821 };
3822
3823 UINT_32 maxBaseAlignDcc2D = 0;
3824
3825 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
3826 {
3827 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
3828 {
3829 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
3830 {
3831 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
3832 ADDR_RSRC_TEX_2D,
3833 ValidSwizzleModeForDcc2D[swIdx],
3834 bppLog2,
3835 numFragLog2,
3836 TRUE,
3837 &metaBlk);
3838
3839 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
3840 }
3841 }
3842 }
3843
3844 // Max base alignment for 3D Dcc
3845 const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
3846 {
3847 ADDR_SW_64KB_Z_X,
3848 ADDR_SW_64KB_S_X,
3849 ADDR_SW_64KB_D_X,
3850 ADDR_SW_64KB_R_X,
3851 };
3852
3853 UINT_32 maxBaseAlignDcc3D = 0;
3854
3855 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
3856 {
3857 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
3858 {
3859 const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
3860 ADDR_RSRC_TEX_3D,
3861 ValidSwizzleModeForDcc3D[swIdx],
3862 bppLog2,
3863 0,
3864 TRUE,
3865 &metaBlk);
3866
3867 maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
3868 }
3869 }
3870
3871 return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
3872 }
3873
3874 /**
3875 ************************************************************************************************************************
3876 * Gfx10Lib::GetMetaElementSizeLog2
3877 *
3878 * @brief
3879 * Gets meta data element size log2
3880 * @return
3881 * Meta data element size log2
3882 ************************************************************************************************************************
3883 */
3884 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
3885 Gfx10DataType dataType) ///< Data surface type
3886 {
3887 INT_32 elemSizeLog2 = 0;
3888
3889 if (dataType == Gfx10DataColor)
3890 {
3891 elemSizeLog2 = 0;
3892 }
3893 else if (dataType == Gfx10DataDepthStencil)
3894 {
3895 elemSizeLog2 = 2;
3896 }
3897 else
3898 {
3899 ADDR_ASSERT(dataType == Gfx10DataFmask);
3900 elemSizeLog2 = -1;
3901 }
3902
3903 return elemSizeLog2;
3904 }
3905
3906 /**
3907 ************************************************************************************************************************
3908 * Gfx10Lib::GetMetaCacheSizeLog2
3909 *
3910 * @brief
3911 * Gets meta data cache line size log2
3912 * @return
3913 * Meta data cache line size log2
3914 ************************************************************************************************************************
3915 */
3916 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
3917 Gfx10DataType dataType) ///< Data surface type
3918 {
3919 INT_32 cacheSizeLog2 = 0;
3920
3921 if (dataType == Gfx10DataColor)
3922 {
3923 cacheSizeLog2 = 6;
3924 }
3925 else if (dataType == Gfx10DataDepthStencil)
3926 {
3927 cacheSizeLog2 = 8;
3928 }
3929 else
3930 {
3931 ADDR_ASSERT(dataType == Gfx10DataFmask);
3932 cacheSizeLog2 = 8;
3933 }
3934 return cacheSizeLog2;
3935 }
3936
3937 /**
3938 ************************************************************************************************************************
3939 * Gfx10Lib::HwlComputeSurfaceInfoLinear
3940 *
3941 * @brief
3942 * Internal function to calculate alignment for linear surface
3943 *
3944 * @return
3945 * ADDR_E_RETURNCODE
3946 ************************************************************************************************************************
3947 */
3948 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
3949 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3950 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3951 ) const
3952 {
3953 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3954
3955 if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
3956 {
3957 returnCode = ADDR_INVALIDPARAMS;
3958 }
3959 else
3960 {
3961 const UINT_32 elementBytes = pIn->bpp >> 3;
3962 const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
3963 const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
3964 UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);
3965 UINT_32 actualHeight = pIn->height;
3966 UINT_64 sliceSize = 0;
3967
3968 if (pIn->numMipLevels > 1)
3969 {
3970 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3971 {
3972 UINT_32 mipWidth, mipHeight;
3973
3974 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
3975
3976 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
3977
3978 if (pOut->pMipInfo != NULL)
3979 {
3980 pOut->pMipInfo[i].pitch = mipActualWidth;
3981 pOut->pMipInfo[i].height = mipHeight;
3982 pOut->pMipInfo[i].depth = mipDepth;
3983 pOut->pMipInfo[i].offset = sliceSize;
3984 pOut->pMipInfo[i].mipTailOffset = 0;
3985 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
3986 }
3987
3988 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
3989 }
3990 }
3991 else
3992 {
3993 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
3994
3995 if (returnCode == ADDR_OK)
3996 {
3997 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
3998
3999 if (pOut->pMipInfo != NULL)
4000 {
4001 pOut->pMipInfo[0].pitch = pitch;
4002 pOut->pMipInfo[0].height = actualHeight;
4003 pOut->pMipInfo[0].depth = mipDepth;
4004 pOut->pMipInfo[0].offset = 0;
4005 pOut->pMipInfo[0].mipTailOffset = 0;
4006 pOut->pMipInfo[0].macroBlockOffset = 0;
4007 }
4008 }
4009 }
4010
4011 if (returnCode == ADDR_OK)
4012 {
4013 pOut->pitch = pitch;
4014 pOut->height = actualHeight;
4015 pOut->numSlices = pIn->numSlices;
4016 pOut->sliceSize = sliceSize;
4017 pOut->surfSize = sliceSize * pOut->numSlices;
4018 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4019 pOut->blockWidth = pitchAlign;
4020 pOut->blockHeight = 1;
4021 pOut->blockSlices = 1;
4022
4023 // Following members are useless on GFX10
4024 pOut->mipChainPitch = 0;
4025 pOut->mipChainHeight = 0;
4026 pOut->mipChainSlice = 0;
4027 pOut->epitchIsHeight = FALSE;
4028
4029 // Post calculation validate
4030 ADDR_ASSERT(pOut->sliceSize > 0);
4031 }
4032 }
4033
4034 return returnCode;
4035 }
4036
4037 } // V2
4038 } // Addr