2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
28 ************************************************************************************************************************
29 * @file gfx10addrlib.cpp
30 * @brief Contain the implementation for the Gfx10Lib class.
31 ************************************************************************************************************************
34 #include "gfx10addrlib.h"
35 #include "gfx10_gb_reg.h"
37 #include "amdgpu_asic_addr.h"
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
45 ************************************************************************************************************************
49 * Creates an Gfx10Lib object.
52 * Returns an Gfx10Lib object pointer.
53 ************************************************************************************************************************
55 Addr::Lib
* Gfx10HwlInit(const Client
* pClient
)
57 return V2::Gfx10Lib::CreateObj(pClient
);
63 ////////////////////////////////////////////////////////////////////////////////////////////////////
64 // Static Const Member
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
67 const SwizzleModeFlags
Gfx10Lib::SwizzleModeTable
[ADDR_SW_MAX_TYPE
] =
68 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
69 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
70 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
71 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
72 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
74 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
75 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
76 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
77 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
79 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
80 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
81 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
82 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
84 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
85 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
86 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
87 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
91 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
92 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
94 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
95 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_X
96 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_X
97 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
99 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
100 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
101 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
102 {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_64KB_R_X
104 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_Z_X
105 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
106 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
107 {0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_VAR_R_X
108 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
111 const Dim3d
Gfx10Lib::Block256_3d
[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
113 const Dim3d
Gfx10Lib::Block64K_Log2_3d
[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114 const Dim3d
Gfx10Lib::Block4K_Log2_3d
[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
117 ************************************************************************************************************************
123 ************************************************************************************************************************
125 Gfx10Lib::Gfx10Lib(const Client
* pClient
)
132 m_class
= AI_ADDRLIB
;
133 memset(&m_settings
, 0, sizeof(m_settings
));
134 memcpy(m_swizzleModeTable
, SwizzleModeTable
, sizeof(SwizzleModeTable
));
138 ************************************************************************************************************************
139 * Gfx10Lib::~Gfx10Lib
143 ************************************************************************************************************************
145 Gfx10Lib::~Gfx10Lib()
150 ************************************************************************************************************************
151 * Gfx10Lib::HwlComputeHtileInfo
154 * Interface function stub of AddrComputeHtilenfo
158 ************************************************************************************************************************
160 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeHtileInfo(
161 const ADDR2_COMPUTE_HTILE_INFO_INPUT
* pIn
, ///< [in] input structure
162 ADDR2_COMPUTE_HTILE_INFO_OUTPUT
* pOut
///< [out] output structure
165 ADDR_E_RETURNCODE ret
= ADDR_OK
;
167 if (((pIn
->swizzleMode
!= ADDR_SW_64KB_Z_X
) &&
168 ((pIn
->swizzleMode
!= ADDR_SW_VAR_Z_X
) || (m_blockVarSizeLog2
== 0))) ||
169 (pIn
->hTileFlags
.pipeAligned
!= TRUE
))
171 ret
= ADDR_INVALIDPARAMS
;
176 const UINT_32 metaBlkSize
= GetMetaBlkSize(Gfx10DataDepthStencil
,
184 pOut
->pitch
= PowTwoAlign(pIn
->unalignedWidth
, metaBlk
.w
);
185 pOut
->height
= PowTwoAlign(pIn
->unalignedHeight
, metaBlk
.h
);
186 pOut
->baseAlign
= Max(metaBlkSize
, 1u << (m_pipesLog2
+ 11u));
187 pOut
->metaBlkWidth
= metaBlk
.w
;
188 pOut
->metaBlkHeight
= metaBlk
.h
;
190 if (pIn
->numMipLevels
> 1)
192 ADDR_ASSERT(pIn
->firstMipIdInTail
<= pIn
->numMipLevels
);
194 UINT_32 offset
= (pIn
->firstMipIdInTail
== pIn
->numMipLevels
) ? 0 : metaBlkSize
;
196 for (INT_32 i
= static_cast<INT_32
>(pIn
->firstMipIdInTail
) - 1; i
>=0; i
--)
198 UINT_32 mipWidth
, mipHeight
;
200 GetMipSize(pIn
->unalignedWidth
, pIn
->unalignedHeight
, 1, i
, &mipWidth
, &mipHeight
);
202 mipWidth
= PowTwoAlign(mipWidth
, metaBlk
.w
);
203 mipHeight
= PowTwoAlign(mipHeight
, metaBlk
.h
);
205 const UINT_32 pitchInM
= mipWidth
/ metaBlk
.w
;
206 const UINT_32 heightInM
= mipHeight
/ metaBlk
.h
;
207 const UINT_32 mipSliceSize
= pitchInM
* heightInM
* metaBlkSize
;
209 if (pOut
->pMipInfo
!= NULL
)
211 pOut
->pMipInfo
[i
].inMiptail
= FALSE
;
212 pOut
->pMipInfo
[i
].offset
= offset
;
213 pOut
->pMipInfo
[i
].sliceSize
= mipSliceSize
;
216 offset
+= mipSliceSize
;
219 pOut
->sliceSize
= offset
;
220 pOut
->metaBlkNumPerSlice
= offset
/ metaBlkSize
;
221 pOut
->htileBytes
= pOut
->sliceSize
* pIn
->numSlices
;
223 if (pOut
->pMipInfo
!= NULL
)
225 for (UINT_32 i
= pIn
->firstMipIdInTail
; i
< pIn
->numMipLevels
; i
++)
227 pOut
->pMipInfo
[i
].inMiptail
= TRUE
;
228 pOut
->pMipInfo
[i
].offset
= 0;
229 pOut
->pMipInfo
[i
].sliceSize
= 0;
232 if (pIn
->firstMipIdInTail
!= pIn
->numMipLevels
)
234 pOut
->pMipInfo
[pIn
->firstMipIdInTail
].sliceSize
= metaBlkSize
;
240 const UINT_32 pitchInM
= pOut
->pitch
/ metaBlk
.w
;
241 const UINT_32 heightInM
= pOut
->height
/ metaBlk
.h
;
243 pOut
->metaBlkNumPerSlice
= pitchInM
* heightInM
;
244 pOut
->sliceSize
= pOut
->metaBlkNumPerSlice
* metaBlkSize
;
245 pOut
->htileBytes
= pOut
->sliceSize
* pIn
->numSlices
;
247 if (pOut
->pMipInfo
!= NULL
)
249 pOut
->pMipInfo
[0].inMiptail
= FALSE
;
250 pOut
->pMipInfo
[0].offset
= 0;
251 pOut
->pMipInfo
[0].sliceSize
= pOut
->sliceSize
;
260 ************************************************************************************************************************
261 * Gfx10Lib::HwlComputeCmaskInfo
264 * Interface function stub of AddrComputeCmaskInfo
268 ************************************************************************************************************************
270 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeCmaskInfo(
271 const ADDR2_COMPUTE_CMASK_INFO_INPUT
* pIn
, ///< [in] input structure
272 ADDR2_COMPUTE_CMASK_INFO_OUTPUT
* pOut
///< [out] output structure
275 ADDR_E_RETURNCODE ret
= ADDR_OK
;
277 if ((pIn
->resourceType
!= ADDR_RSRC_TEX_2D
) ||
278 (pIn
->cMaskFlags
.pipeAligned
!= TRUE
) ||
279 ((pIn
->swizzleMode
!= ADDR_SW_64KB_Z_X
) &&
280 ((pIn
->swizzleMode
!= ADDR_SW_VAR_Z_X
) || (m_blockVarSizeLog2
== 0))))
282 ret
= ADDR_INVALIDPARAMS
;
287 const UINT_32 metaBlkSize
= GetMetaBlkSize(Gfx10DataFmask
,
295 pOut
->pitch
= PowTwoAlign(pIn
->unalignedWidth
, metaBlk
.w
);
296 pOut
->height
= PowTwoAlign(pIn
->unalignedHeight
, metaBlk
.h
);
297 pOut
->baseAlign
= metaBlkSize
;
298 pOut
->metaBlkWidth
= metaBlk
.w
;
299 pOut
->metaBlkHeight
= metaBlk
.h
;
301 if (pIn
->numMipLevels
> 1)
303 ADDR_ASSERT(pIn
->firstMipIdInTail
<= pIn
->numMipLevels
);
305 UINT_32 metaBlkPerSlice
= (pIn
->firstMipIdInTail
== pIn
->numMipLevels
) ? 0 : 1;
307 for (INT_32 i
= static_cast<INT_32
>(pIn
->firstMipIdInTail
) - 1; i
>= 0; i
--)
309 UINT_32 mipWidth
, mipHeight
;
311 GetMipSize(pIn
->unalignedWidth
, pIn
->unalignedHeight
, 1, i
, &mipWidth
, &mipHeight
);
313 mipWidth
= PowTwoAlign(mipWidth
, metaBlk
.w
);
314 mipHeight
= PowTwoAlign(mipHeight
, metaBlk
.h
);
316 const UINT_32 pitchInM
= mipWidth
/ metaBlk
.w
;
317 const UINT_32 heightInM
= mipHeight
/ metaBlk
.h
;
319 if (pOut
->pMipInfo
!= NULL
)
321 pOut
->pMipInfo
[i
].inMiptail
= FALSE
;
322 pOut
->pMipInfo
[i
].offset
= metaBlkPerSlice
* metaBlkSize
;
323 pOut
->pMipInfo
[i
].sliceSize
= pitchInM
* heightInM
* metaBlkSize
;
326 metaBlkPerSlice
+= pitchInM
* heightInM
;
329 pOut
->metaBlkNumPerSlice
= metaBlkPerSlice
;
331 if (pOut
->pMipInfo
!= NULL
)
333 for (UINT_32 i
= pIn
->firstMipIdInTail
; i
< pIn
->numMipLevels
; i
++)
335 pOut
->pMipInfo
[i
].inMiptail
= TRUE
;
336 pOut
->pMipInfo
[i
].offset
= 0;
337 pOut
->pMipInfo
[i
].sliceSize
= 0;
340 if (pIn
->firstMipIdInTail
!= pIn
->numMipLevels
)
342 pOut
->pMipInfo
[pIn
->firstMipIdInTail
].sliceSize
= metaBlkSize
;
348 const UINT_32 pitchInM
= pOut
->pitch
/ metaBlk
.w
;
349 const UINT_32 heightInM
= pOut
->height
/ metaBlk
.h
;
351 pOut
->metaBlkNumPerSlice
= pitchInM
* heightInM
;
353 if (pOut
->pMipInfo
!= NULL
)
355 pOut
->pMipInfo
[0].inMiptail
= FALSE
;
356 pOut
->pMipInfo
[0].offset
= 0;
357 pOut
->pMipInfo
[0].sliceSize
= pOut
->metaBlkNumPerSlice
* metaBlkSize
;
361 pOut
->sliceSize
= pOut
->metaBlkNumPerSlice
* metaBlkSize
;
362 pOut
->cmaskBytes
= pOut
->sliceSize
* pIn
->numSlices
;
369 ************************************************************************************************************************
370 * Gfx10Lib::HwlComputeDccInfo
373 * Interface function to compute DCC key info
377 ************************************************************************************************************************
379 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeDccInfo(
380 const ADDR2_COMPUTE_DCCINFO_INPUT
* pIn
, ///< [in] input structure
381 ADDR2_COMPUTE_DCCINFO_OUTPUT
* pOut
///< [out] output structure
384 ADDR_E_RETURNCODE ret
= ADDR_OK
;
386 if (pIn
->swizzleMode
!= ADDR_SW_64KB_Z_X
&& pIn
->swizzleMode
!= ADDR_SW_64KB_R_X
)
388 // Hardware does not support DCC for this swizzle mode.
389 ret
= ADDR_INVALIDPARAMS
;
391 else if (m_settings
.dccUnsup3DSwDis
&& IsTex3d(pIn
->resourceType
) && IsDisplaySwizzle(pIn
->swizzleMode
))
393 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
394 ret
= ADDR_INVALIDPARAMS
;
398 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
399 ADDR_ASSERT(IsRtOptSwizzle(pIn
->swizzleMode
));
402 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
403 const UINT_32 numFragLog2
= Log2(pIn
->numFrags
);
404 const UINT_32 metaBlkSize
= GetMetaBlkSize(Gfx10DataColor
,
409 pIn
->dccKeyFlags
.pipeAligned
,
411 const BOOL_32 isThick
= IsThick(pIn
->resourceType
, pIn
->swizzleMode
);
413 pOut
->compressBlkWidth
= isThick
? Block256_3d
[elemLog2
].w
: Block256_2d
[elemLog2
].w
;
414 pOut
->compressBlkHeight
= isThick
? Block256_3d
[elemLog2
].h
: Block256_2d
[elemLog2
].h
;
415 pOut
->compressBlkDepth
= isThick
? Block256_3d
[elemLog2
].d
: 1;
417 pOut
->dccRamBaseAlign
= metaBlkSize
;
418 pOut
->metaBlkWidth
= metaBlk
.w
;
419 pOut
->metaBlkHeight
= metaBlk
.h
;
420 pOut
->metaBlkDepth
= metaBlk
.d
;
422 pOut
->pitch
= PowTwoAlign(pIn
->unalignedWidth
, metaBlk
.w
);
423 pOut
->height
= PowTwoAlign(pIn
->unalignedHeight
, metaBlk
.h
);
424 pOut
->depth
= PowTwoAlign(pIn
->numSlices
, metaBlk
.d
);
426 if (pIn
->numMipLevels
> 1)
428 ADDR_ASSERT(pIn
->firstMipIdInTail
<= pIn
->numMipLevels
);
430 UINT_32 offset
= (pIn
->firstMipIdInTail
== pIn
->numMipLevels
) ? 0 : metaBlkSize
;
432 for (INT_32 i
= static_cast<INT_32
>(pIn
->firstMipIdInTail
) - 1; i
>= 0; i
--)
434 UINT_32 mipWidth
, mipHeight
;
436 GetMipSize(pIn
->unalignedWidth
, pIn
->unalignedHeight
, 1, i
, &mipWidth
, &mipHeight
);
438 mipWidth
= PowTwoAlign(mipWidth
, metaBlk
.w
);
439 mipHeight
= PowTwoAlign(mipHeight
, metaBlk
.h
);
441 const UINT_32 pitchInM
= mipWidth
/ metaBlk
.w
;
442 const UINT_32 heightInM
= mipHeight
/ metaBlk
.h
;
443 const UINT_32 mipSliceSize
= pitchInM
* heightInM
* metaBlkSize
;
445 if (pOut
->pMipInfo
!= NULL
)
447 pOut
->pMipInfo
[i
].inMiptail
= FALSE
;
448 pOut
->pMipInfo
[i
].offset
= offset
;
449 pOut
->pMipInfo
[i
].sliceSize
= mipSliceSize
;
452 offset
+= mipSliceSize
;
455 pOut
->dccRamSliceSize
= offset
;
456 pOut
->metaBlkNumPerSlice
= offset
/ metaBlkSize
;
457 pOut
->dccRamSize
= pOut
->dccRamSliceSize
* (pOut
->depth
/ metaBlk
.d
);
459 if (pOut
->pMipInfo
!= NULL
)
461 for (UINT_32 i
= pIn
->firstMipIdInTail
; i
< pIn
->numMipLevels
; i
++)
463 pOut
->pMipInfo
[i
].inMiptail
= TRUE
;
464 pOut
->pMipInfo
[i
].offset
= 0;
465 pOut
->pMipInfo
[i
].sliceSize
= 0;
468 if (pIn
->firstMipIdInTail
!= pIn
->numMipLevels
)
470 pOut
->pMipInfo
[pIn
->firstMipIdInTail
].sliceSize
= metaBlkSize
;
476 const UINT_32 pitchInM
= pOut
->pitch
/ metaBlk
.w
;
477 const UINT_32 heightInM
= pOut
->height
/ metaBlk
.h
;
479 pOut
->metaBlkNumPerSlice
= pitchInM
* heightInM
;
480 pOut
->dccRamSliceSize
= pOut
->metaBlkNumPerSlice
* metaBlkSize
;
481 pOut
->dccRamSize
= pOut
->dccRamSliceSize
* (pOut
->depth
/ metaBlk
.d
);
483 if (pOut
->pMipInfo
!= NULL
)
485 pOut
->pMipInfo
[0].inMiptail
= FALSE
;
486 pOut
->pMipInfo
[0].offset
= 0;
487 pOut
->pMipInfo
[0].sliceSize
= pOut
->dccRamSliceSize
;
496 ************************************************************************************************************************
497 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
500 * Interface function stub of AddrComputeCmaskAddrFromCoord
504 ************************************************************************************************************************
506 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeCmaskAddrFromCoord(
507 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
508 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
510 // Only support pipe aligned CMask
511 ADDR_ASSERT(pIn
->cMaskFlags
.pipeAligned
== TRUE
);
513 ADDR2_COMPUTE_CMASK_INFO_INPUT input
= {};
514 input
.size
= sizeof(input
);
515 input
.cMaskFlags
= pIn
->cMaskFlags
;
516 input
.colorFlags
= pIn
->colorFlags
;
517 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
518 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
519 input
.numSlices
= Max(pIn
->numSlices
, 1u);
520 input
.swizzleMode
= pIn
->swizzleMode
;
521 input
.resourceType
= pIn
->resourceType
;
523 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output
= {};
524 output
.size
= sizeof(output
);
526 ADDR_E_RETURNCODE returnCode
= ComputeCmaskInfo(&input
, &output
);
528 if (returnCode
== ADDR_OK
)
530 const UINT_32 fmaskBpp
= GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
);
531 const UINT_32 fmaskElemLog2
= Log2(fmaskBpp
>> 3);
532 const UINT_32 pipeMask
= (1 << m_pipesLog2
) - 1;
533 const UINT_32 index
= m_xmaskBaseIndex
+ fmaskElemLog2
;
534 const UINT_8
* patIdxTable
= (pIn
->swizzleMode
== ADDR_SW_VAR_Z_X
) ? CMASK_VAR_RBPLUS_PATIDX
:
535 (m_settings
.supportRbPlus
? CMASK_64K_RBPLUS_PATIDX
: CMASK_64K_PATIDX
);
537 const UINT_32 blkSizeLog2
= Log2(output
.metaBlkWidth
) + Log2(output
.metaBlkHeight
) - 7;
538 const UINT_32 blkMask
= (1 << blkSizeLog2
) - 1;
539 const UINT_32 blkOffset
= ComputeOffsetFromSwizzlePattern(CMASK_SW_PATTERN
[patIdxTable
[index
]],
540 blkSizeLog2
+ 1, // +1 for nibble offset
545 const UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
546 const UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
547 const UINT_32 pb
= output
.pitch
/ output
.metaBlkWidth
;
548 const UINT_32 blkIndex
= (yb
* pb
) + xb
;
549 const UINT_32 pipeXor
= ((pIn
->pipeXor
& pipeMask
) << m_pipeInterleaveLog2
) & blkMask
;
551 pOut
->addr
= (output
.sliceSize
* pIn
->slice
) +
552 (blkIndex
* (1 << blkSizeLog2
)) +
553 ((blkOffset
>> 1) ^ pipeXor
);
554 pOut
->bitPosition
= (blkOffset
& 1) << 2;
561 ************************************************************************************************************************
562 * Gfx10Lib::HwlComputeHtileAddrFromCoord
565 * Interface function stub of AddrComputeHtileAddrFromCoord
569 ************************************************************************************************************************
571 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeHtileAddrFromCoord(
572 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
573 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
575 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
577 if (pIn
->numMipLevels
> 1)
579 returnCode
= ADDR_NOTIMPLEMENTED
;
583 ADDR2_COMPUTE_HTILE_INFO_INPUT input
= {0};
584 input
.size
= sizeof(input
);
585 input
.hTileFlags
= pIn
->hTileFlags
;
586 input
.depthFlags
= pIn
->depthflags
;
587 input
.swizzleMode
= pIn
->swizzleMode
;
588 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
589 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
590 input
.numSlices
= Max(pIn
->numSlices
, 1u);
591 input
.numMipLevels
= 1;
593 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output
= {0};
594 output
.size
= sizeof(output
);
596 returnCode
= ComputeHtileInfo(&input
, &output
);
598 if (returnCode
== ADDR_OK
)
600 const UINT_32 numSampleLog2
= Log2(pIn
->numSamples
);
601 const UINT_32 pipeMask
= (1 << m_pipesLog2
) - 1;
602 const UINT_32 index
= m_xmaskBaseIndex
+ numSampleLog2
;
603 const UINT_8
* patIdxTable
= m_settings
.supportRbPlus
? HTILE_RBPLUS_PATIDX
: HTILE_PATIDX
;
605 const UINT_32 blkSizeLog2
= Log2(output
.metaBlkWidth
) + Log2(output
.metaBlkHeight
) - 4;
606 const UINT_32 blkMask
= (1 << blkSizeLog2
) - 1;
607 const UINT_32 blkOffset
= ComputeOffsetFromSwizzlePattern(HTILE_SW_PATTERN
[patIdxTable
[index
]],
608 blkSizeLog2
+ 1, // +1 for nibble offset
613 const UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
614 const UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
615 const UINT_32 pb
= output
.pitch
/ output
.metaBlkWidth
;
616 const UINT_32 blkIndex
= (yb
* pb
) + xb
;
617 const UINT_32 pipeXor
= ((pIn
->pipeXor
& pipeMask
) << m_pipeInterleaveLog2
) & blkMask
;
619 pOut
->addr
= (static_cast<UINT_64
>(output
.sliceSize
) * pIn
->slice
) +
620 (blkIndex
* (1 << blkSizeLog2
)) +
621 ((blkOffset
>> 1) ^ pipeXor
);
629 ************************************************************************************************************************
630 * Gfx10Lib::HwlComputeHtileCoordFromAddr
633 * Interface function stub of AddrComputeHtileCoordFromAddr
637 ************************************************************************************************************************
639 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeHtileCoordFromAddr(
640 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT
* pIn
, ///< [in] input structure
641 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
* pOut
) ///< [out] output structure
643 ADDR_NOT_IMPLEMENTED();
649 ************************************************************************************************************************
650 * Gfx10Lib::HwlComputeDccAddrFromCoord
653 * Interface function stub of AddrComputeDccAddrFromCoord
657 ************************************************************************************************************************
659 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeDccAddrFromCoord(
660 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
661 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
663 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
665 if ((pIn
->resourceType
!= ADDR_RSRC_TEX_2D
) ||
666 (pIn
->swizzleMode
!= ADDR_SW_64KB_R_X
) ||
667 (pIn
->dccKeyFlags
.linear
== TRUE
) ||
668 (pIn
->numFrags
> 1) ||
669 (pIn
->numMipLevels
> 1) ||
672 returnCode
= ADDR_NOTSUPPORTED
;
676 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
677 const UINT_32 numPipeLog2
= m_pipesLog2
;
678 const UINT_32 pipeMask
= (1 << numPipeLog2
) - 1;
679 UINT_32 index
= m_dccBaseIndex
+ elemLog2
;
680 const UINT_8
* patIdxTable
;
682 if (m_settings
.supportRbPlus
)
684 patIdxTable
= DCC_64K_R_X_RBPLUS_PATIDX
;
686 if (pIn
->dccKeyFlags
.pipeAligned
)
688 index
+= MaxNumOfBpp
;
690 if (m_numPkrLog2
< 2)
692 index
+= m_pipesLog2
* MaxNumOfBpp
;
696 // 4 groups for "m_numPkrLog2 < 2" case
697 index
+= 4 * MaxNumOfBpp
;
699 const UINT_32 dccPipePerPkr
= 3;
701 index
+= (m_numPkrLog2
- 2) * dccPipePerPkr
* MaxNumOfBpp
+
702 (m_pipesLog2
- m_numPkrLog2
) * MaxNumOfBpp
;
708 patIdxTable
= DCC_64K_R_X_PATIDX
;
710 if (pIn
->dccKeyFlags
.pipeAligned
)
712 index
+= (numPipeLog2
+ UnalignedDccType
) * MaxNumOfBpp
;
716 index
+= Min(numPipeLog2
, UnalignedDccType
- 1) * MaxNumOfBpp
;
720 const UINT_32 blkSizeLog2
= Log2(pIn
->metaBlkWidth
) + Log2(pIn
->metaBlkHeight
) + elemLog2
- 8;
721 const UINT_32 blkMask
= (1 << blkSizeLog2
) - 1;
722 const UINT_32 blkOffset
= ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN
[patIdxTable
[index
]],
723 blkSizeLog2
+ 1, // +1 for nibble offset
728 const UINT_32 xb
= pIn
->x
/ pIn
->metaBlkWidth
;
729 const UINT_32 yb
= pIn
->y
/ pIn
->metaBlkHeight
;
730 const UINT_32 pb
= pIn
->pitch
/ pIn
->metaBlkWidth
;
731 const UINT_32 blkIndex
= (yb
* pb
) + xb
;
732 const UINT_32 pipeXor
= ((pIn
->pipeXor
& pipeMask
) << m_pipeInterleaveLog2
) & blkMask
;
734 pOut
->addr
= (static_cast<UINT_64
>(pIn
->dccRamSliceSize
) * pIn
->slice
) +
735 (blkIndex
* (1 << blkSizeLog2
)) +
736 ((blkOffset
>> 1) ^ pipeXor
);
743 ************************************************************************************************************************
744 * Gfx10Lib::HwlInitGlobalParams
747 * Initializes global parameters
750 * TRUE if all settings are valid
752 ************************************************************************************************************************
754 BOOL_32
Gfx10Lib::HwlInitGlobalParams(
755 const ADDR_CREATE_INPUT
* pCreateIn
) ///< [in] create input
757 BOOL_32 valid
= TRUE
;
758 GB_ADDR_CONFIG_gfx10 gbAddrConfig
;
760 gbAddrConfig
.u32All
= pCreateIn
->regValue
.gbAddrConfig
;
762 // These values are copied from CModel code
763 switch (gbAddrConfig
.bits
.NUM_PIPES
)
765 case ADDR_CONFIG_1_PIPE
:
769 case ADDR_CONFIG_2_PIPE
:
773 case ADDR_CONFIG_4_PIPE
:
777 case ADDR_CONFIG_8_PIPE
:
781 case ADDR_CONFIG_16_PIPE
:
785 case ADDR_CONFIG_32_PIPE
:
789 case ADDR_CONFIG_64_PIPE
:
794 ADDR_ASSERT_ALWAYS();
799 switch (gbAddrConfig
.bits
.PIPE_INTERLEAVE_SIZE
)
801 case ADDR_CONFIG_PIPE_INTERLEAVE_256B
:
802 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_256B
;
803 m_pipeInterleaveLog2
= 8;
805 case ADDR_CONFIG_PIPE_INTERLEAVE_512B
:
806 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_512B
;
807 m_pipeInterleaveLog2
= 9;
809 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB
:
810 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_1KB
;
811 m_pipeInterleaveLog2
= 10;
813 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB
:
814 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_2KB
;
815 m_pipeInterleaveLog2
= 11;
818 ADDR_ASSERT_ALWAYS();
823 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
824 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
825 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
826 ADDR_ASSERT(m_pipeInterleaveBytes
== ADDR_PIPEINTERLEAVE_256B
);
828 switch (gbAddrConfig
.bits
.MAX_COMPRESSED_FRAGS
)
830 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS
:
832 m_maxCompFragLog2
= 0;
834 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS
:
836 m_maxCompFragLog2
= 1;
838 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS
:
840 m_maxCompFragLog2
= 2;
842 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS
:
844 m_maxCompFragLog2
= 3;
847 ADDR_ASSERT_ALWAYS();
853 // Skip unaligned case
854 m_xmaskBaseIndex
+= MaxNumOfAA
;
856 m_xmaskBaseIndex
+= m_pipesLog2
* MaxNumOfAA
;
857 m_colorBaseIndex
+= m_pipesLog2
* MaxNumOfBpp
;
859 if (m_settings
.supportRbPlus
)
861 m_numPkrLog2
= gbAddrConfig
.bits
.NUM_PKRS
;
862 m_numSaLog2
= (m_numPkrLog2
> 0) ? (m_numPkrLog2
- 1) : 0;
864 ADDR_ASSERT((m_numPkrLog2
<= m_pipesLog2
) && ((m_pipesLog2
- m_numPkrLog2
) <= 2));
866 ADDR_C_ASSERT(sizeof(HTILE_RBPLUS_PATIDX
) / sizeof(HTILE_RBPLUS_PATIDX
[0]) ==
867 sizeof(CMASK_64K_RBPLUS_PATIDX
) / sizeof(CMASK_64K_RBPLUS_PATIDX
[0]));
869 if (m_numPkrLog2
>= 2)
871 m_colorBaseIndex
+= (2 * m_numPkrLog2
- 2) * MaxNumOfBpp
;
872 m_xmaskBaseIndex
+= (m_numPkrLog2
- 1) * 3 * MaxNumOfAA
;
877 const UINT_32 numPipeType
= static_cast<UINT_32
>(ADDR_CONFIG_64_PIPE
) -
878 static_cast<UINT_32
>(ADDR_CONFIG_1_PIPE
) +
881 ADDR_C_ASSERT(sizeof(HTILE_PATIDX
) / sizeof(HTILE_PATIDX
[0]) == (numPipeType
+ 1) * MaxNumOfAA
);
883 ADDR_C_ASSERT(sizeof(HTILE_PATIDX
) / sizeof(HTILE_PATIDX
[0]) ==
884 sizeof(CMASK_64K_PATIDX
) / sizeof(CMASK_64K_PATIDX
[0]));
888 if (m_settings
.supportRbPlus
)
890 // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
891 // corresponding SW_64KB_* mode
892 m_blockVarSizeLog2
= m_pipesLog2
+ 14;
904 ************************************************************************************************************************
905 * Gfx10Lib::HwlConvertChipFamily
908 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
911 ************************************************************************************************************************
913 ChipFamily
Gfx10Lib::HwlConvertChipFamily(
914 UINT_32 chipFamily
, ///< [in] chip family defined in atiih.h
915 UINT_32 chipRevision
) ///< [in] chip revision defined in "asic_family"_id.h
917 ChipFamily family
= ADDR_CHIP_FAMILY_NAVI
;
919 m_settings
.dccUnsup3DSwDis
= 1;
924 m_settings
.isDcn2
= 1;
926 if (ASICREV_IS_SIENNA_M(chipRevision
))
928 m_settings
.supportRbPlus
= 1;
929 m_settings
.dccUnsup3DSwDis
= 0;
933 ADDR_ASSERT(!"Unknown chip family");
937 m_settings
.dsMipmapHtileFix
= 1;
939 if (ASICREV_IS_NAVI10_P(chipRevision
))
941 m_settings
.dsMipmapHtileFix
= 0;
944 m_configFlags
.use32bppFor422Fmt
= TRUE
;
950 ************************************************************************************************************************
951 * Gfx10Lib::GetBlk256SizeLog2
958 ************************************************************************************************************************
960 void Gfx10Lib::GetBlk256SizeLog2(
961 AddrResourceType resourceType
, ///< [in] Resource type
962 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
963 UINT_32 elemLog2
, ///< [in] element size log2
964 UINT_32 numSamplesLog2
, ///< [in] number of samples
965 Dim3d
* pBlock
///< [out] block size
968 if (IsThin(resourceType
, swizzleMode
))
970 UINT_32 blockBits
= 8 - elemLog2
;
972 if (IsZOrderSwizzle(swizzleMode
))
974 blockBits
-= numSamplesLog2
;
977 pBlock
->w
= (blockBits
>> 1) + (blockBits
& 1);
978 pBlock
->h
= (blockBits
>> 1);
983 ADDR_ASSERT(IsThick(resourceType
, swizzleMode
));
985 UINT_32 blockBits
= 8 - elemLog2
;
987 pBlock
->d
= (blockBits
/ 3) + (((blockBits
% 3) > 0) ? 1 : 0);
988 pBlock
->w
= (blockBits
/ 3) + (((blockBits
% 3) > 1) ? 1 : 0);
989 pBlock
->h
= (blockBits
/ 3);
994 ************************************************************************************************************************
995 * Gfx10Lib::GetCompressedBlockSizeLog2
998 * Get compress block size
1002 ************************************************************************************************************************
1004 void Gfx10Lib::GetCompressedBlockSizeLog2(
1005 Gfx10DataType dataType
, ///< [in] Data type
1006 AddrResourceType resourceType
, ///< [in] Resource type
1007 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
1008 UINT_32 elemLog2
, ///< [in] element size log2
1009 UINT_32 numSamplesLog2
, ///< [in] number of samples
1010 Dim3d
* pBlock
///< [out] block size
1013 if (dataType
== Gfx10DataColor
)
1015 GetBlk256SizeLog2(resourceType
, swizzleMode
, elemLog2
, numSamplesLog2
, pBlock
);
1019 ADDR_ASSERT((dataType
== Gfx10DataDepthStencil
) || (dataType
== Gfx10DataFmask
));
1027 ************************************************************************************************************************
1028 * Gfx10Lib::GetMetaOverlapLog2
1031 * Get meta block overlap
1035 ************************************************************************************************************************
1037 INT_32
Gfx10Lib::GetMetaOverlapLog2(
1038 Gfx10DataType dataType
, ///< [in] Data type
1039 AddrResourceType resourceType
, ///< [in] Resource type
1040 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
1041 UINT_32 elemLog2
, ///< [in] element size log2
1042 UINT_32 numSamplesLog2
///< [in] number of samples
1048 GetCompressedBlockSizeLog2(dataType
, resourceType
, swizzleMode
, elemLog2
, numSamplesLog2
, &compBlock
);
1049 GetBlk256SizeLog2(resourceType
, swizzleMode
, elemLog2
, numSamplesLog2
, µBlock
);
1051 const INT_32 compSizeLog2
= compBlock
.w
+ compBlock
.h
+ compBlock
.d
;
1052 const INT_32 blk256SizeLog2
= microBlock
.w
+ microBlock
.h
+ microBlock
.d
;
1053 const INT_32 maxSizeLog2
= Max(compSizeLog2
, blk256SizeLog2
);
1054 const INT_32 numPipesLog2
= GetEffectiveNumPipes();
1055 INT_32 overlap
= numPipesLog2
- maxSizeLog2
;
1057 if ((numPipesLog2
> 1) && m_settings
.supportRbPlus
)
1062 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1063 if ((elemLog2
== 4) && (numSamplesLog2
== 3))
1067 overlap
= Max(overlap
, 0);
1072 ************************************************************************************************************************
1073 * Gfx10Lib::Get3DMetaOverlapLog2
1076 * Get 3d meta block overlap
1080 ************************************************************************************************************************
1082 INT_32
Gfx10Lib::Get3DMetaOverlapLog2(
1083 AddrResourceType resourceType
, ///< [in] Resource type
1084 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
1085 UINT_32 elemLog2
///< [in] element size log2
1089 GetBlk256SizeLog2(resourceType
, swizzleMode
, elemLog2
, 0, µBlock
);
1091 INT_32 overlap
= GetEffectiveNumPipes() - static_cast<INT_32
>(microBlock
.w
);
1093 if (m_settings
.supportRbPlus
)
1098 if ((overlap
< 0) || (IsStandardSwizzle(resourceType
, swizzleMode
) == TRUE
))
1106 ************************************************************************************************************************
1107 * Gfx10Lib::GetPipeRotateAmount
1110 * Get pipe rotate amount
1113 * Pipe rotate amount
1114 ************************************************************************************************************************
1117 INT_32
Gfx10Lib::GetPipeRotateAmount(
1118 AddrResourceType resourceType
, ///< [in] Resource type
1119 AddrSwizzleMode swizzleMode
///< [in] Swizzle mode
1124 if (m_settings
.supportRbPlus
&& (m_pipesLog2
>= (m_numSaLog2
+ 1)) && (m_pipesLog2
> 1))
1126 amount
= ((m_pipesLog2
== (m_numSaLog2
+ 1)) && IsRbAligned(resourceType
, swizzleMode
)) ?
1127 1 : m_pipesLog2
- (m_numSaLog2
+ 1);
1134 ************************************************************************************************************************
1135 * Gfx10Lib::GetMetaBlkSize
1138 * Get metadata block size
1142 ************************************************************************************************************************
1144 UINT_32
Gfx10Lib::GetMetaBlkSize(
1145 Gfx10DataType dataType
, ///< [in] Data type
1146 AddrResourceType resourceType
, ///< [in] Resource type
1147 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
1148 UINT_32 elemLog2
, ///< [in] element size log2
1149 UINT_32 numSamplesLog2
, ///< [in] number of samples
1150 BOOL_32 pipeAlign
, ///< [in] pipe align
1151 Dim3d
* pBlock
///< [out] block size
1154 INT_32 metablkSizeLog2
;
1155 const INT_32 metaElemSizeLog2
= GetMetaElementSizeLog2(dataType
);
1156 const INT_32 metaCacheSizeLog2
= GetMetaCacheSizeLog2(dataType
);
1157 const INT_32 compBlkSizeLog2
= (dataType
== Gfx10DataColor
) ? 8 : 6 + numSamplesLog2
+ elemLog2
;
1158 const INT_32 metaBlkSamplesLog2
= (dataType
== Gfx10DataDepthStencil
) ?
1159 numSamplesLog2
: Min(numSamplesLog2
, m_maxCompFragLog2
);
1160 const INT_32 dataBlkSizeLog2
= GetBlockSizeLog2(swizzleMode
);
1161 INT_32 numPipesLog2
= m_pipesLog2
;
1163 if (IsThin(resourceType
, swizzleMode
))
1165 if ((pipeAlign
== FALSE
) ||
1166 (IsStandardSwizzle(resourceType
, swizzleMode
) == TRUE
) ||
1167 (IsDisplaySwizzle(resourceType
, swizzleMode
) == TRUE
))
1171 metablkSizeLog2
= Max(static_cast<INT_32
>(m_pipeInterleaveLog2
) + numPipesLog2
, 12);
1172 metablkSizeLog2
= Min(metablkSizeLog2
, dataBlkSizeLog2
);
1176 metablkSizeLog2
= Min(dataBlkSizeLog2
, 12);
1181 if (m_settings
.supportRbPlus
&& (m_pipesLog2
== m_numSaLog2
+ 1) && (m_pipesLog2
> 1))
1186 INT_32 pipeRotateLog2
= GetPipeRotateAmount(resourceType
, swizzleMode
);
1188 if (numPipesLog2
>= 4)
1190 INT_32 overlapLog2
= GetMetaOverlapLog2(dataType
, resourceType
, swizzleMode
, elemLog2
, numSamplesLog2
);
1192 // In 16Bpe 8xaa, we have an extra overlap bit
1193 if ((pipeRotateLog2
> 0) &&
1195 (numSamplesLog2
== 3) &&
1196 (IsZOrderSwizzle(swizzleMode
) || (GetEffectiveNumPipes() > 3)))
1201 metablkSizeLog2
= metaCacheSizeLog2
+ overlapLog2
+ numPipesLog2
;
1202 metablkSizeLog2
= Max(metablkSizeLog2
, static_cast<INT_32
>(m_pipeInterleaveLog2
) + numPipesLog2
);
1204 if (m_settings
.supportRbPlus
&&
1205 IsRtOptSwizzle(swizzleMode
) &&
1206 (numPipesLog2
== 6) &&
1207 (numSamplesLog2
== 3) &&
1208 (m_maxCompFragLog2
== 3) &&
1209 (metablkSizeLog2
< 15))
1211 metablkSizeLog2
= 15;
1216 metablkSizeLog2
= Max(static_cast<INT_32
>(m_pipeInterleaveLog2
) + numPipesLog2
, 12);
1219 if (dataType
== Gfx10DataDepthStencil
)
1221 // For htile surfaces, pad meta block size to 2K * num_pipes
1222 metablkSizeLog2
= Max(metablkSizeLog2
, 11 + numPipesLog2
);
1225 const INT_32 compFragLog2
= Min(m_maxCompFragLog2
, numSamplesLog2
);
1227 if (IsRtOptSwizzle(swizzleMode
) && (compFragLog2
> 1) && (pipeRotateLog2
>= 1))
1229 const INT_32 tmp
= 8 + m_pipesLog2
+ Max(pipeRotateLog2
, compFragLog2
- 1);
1231 metablkSizeLog2
= Max(metablkSizeLog2
, tmp
);
1235 const INT_32 metablkBitsLog2
=
1236 metablkSizeLog2
+ compBlkSizeLog2
- elemLog2
- metaBlkSamplesLog2
- metaElemSizeLog2
;
1237 pBlock
->w
= 1 << ((metablkBitsLog2
>> 1) + (metablkBitsLog2
& 1));
1238 pBlock
->h
= 1 << (metablkBitsLog2
>> 1);
1243 ADDR_ASSERT(IsThick(resourceType
, swizzleMode
));
1247 if (m_settings
.supportRbPlus
&&
1248 (m_pipesLog2
== m_numSaLog2
+ 1) &&
1249 (m_pipesLog2
> 1) &&
1250 IsRbAligned(resourceType
, swizzleMode
))
1255 const INT_32 overlapLog2
= Get3DMetaOverlapLog2(resourceType
, swizzleMode
, elemLog2
);
1257 metablkSizeLog2
= metaCacheSizeLog2
+ overlapLog2
+ numPipesLog2
;
1258 metablkSizeLog2
= Max(metablkSizeLog2
, static_cast<INT_32
>(m_pipeInterleaveLog2
) + numPipesLog2
);
1259 metablkSizeLog2
= Max(metablkSizeLog2
, 12);
1263 metablkSizeLog2
= 12;
1266 const INT_32 metablkBitsLog2
=
1267 metablkSizeLog2
+ compBlkSizeLog2
- elemLog2
- metaBlkSamplesLog2
- metaElemSizeLog2
;
1268 pBlock
->w
= 1 << ((metablkBitsLog2
/ 3) + (((metablkBitsLog2
% 3) > 0) ? 1 : 0));
1269 pBlock
->h
= 1 << ((metablkBitsLog2
/ 3) + (((metablkBitsLog2
% 3) > 1) ? 1 : 0));
1270 pBlock
->d
= 1 << (metablkBitsLog2
/ 3);
1273 return (1 << static_cast<UINT_32
>(metablkSizeLog2
));
1277 ************************************************************************************************************************
1278 * Gfx10Lib::ConvertSwizzlePatternToEquation
1281 * Convert swizzle pattern to equation.
1285 ************************************************************************************************************************
1287 VOID
Gfx10Lib::ConvertSwizzlePatternToEquation(
1288 UINT_32 elemLog2
, ///< [in] element bytes log2
1289 AddrResourceType rsrcType
, ///< [in] resource type
1290 AddrSwizzleMode swMode
, ///< [in] swizzle mode
1291 const ADDR_SW_PATINFO
* pPatInfo
, ///< [in] swizzle pattern infor
1292 ADDR_EQUATION
* pEquation
) ///< [out] equation converted from swizzle pattern
1295 ADDR_BIT_SETTING fullSwizzlePattern
[20];
1296 GetSwizzlePatternFromPatternInfo(pPatInfo
, fullSwizzlePattern
);
1298 const ADDR_BIT_SETTING
* pSwizzle
= fullSwizzlePattern
;
1299 const UINT_32 blockSizeLog2
= GetBlockSizeLog2(swMode
);
1301 pEquation
->numBits
= blockSizeLog2
;
1302 pEquation
->stackedDepthSlices
= FALSE
;
1304 for (UINT_32 i
= 0; i
< elemLog2
; i
++)
1306 pEquation
->addr
[i
].channel
= 0;
1307 pEquation
->addr
[i
].valid
= 1;
1308 pEquation
->addr
[i
].index
= i
;
1311 if (IsXor(swMode
) == FALSE
)
1313 for (UINT_32 i
= elemLog2
; i
< blockSizeLog2
; i
++)
1315 ADDR_ASSERT(IsPow2(pSwizzle
[i
].value
));
1317 if (pSwizzle
[i
].x
!= 0)
1319 ADDR_ASSERT(IsPow2(static_cast<UINT_32
>(pSwizzle
[i
].x
)));
1321 pEquation
->addr
[i
].channel
= 0;
1322 pEquation
->addr
[i
].valid
= 1;
1323 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].x
) + elemLog2
;
1325 else if (pSwizzle
[i
].y
!= 0)
1327 ADDR_ASSERT(IsPow2(static_cast<UINT_32
>(pSwizzle
[i
].y
)));
1329 pEquation
->addr
[i
].channel
= 1;
1330 pEquation
->addr
[i
].valid
= 1;
1331 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].y
);
1335 ADDR_ASSERT(pSwizzle
[i
].z
!= 0);
1336 ADDR_ASSERT(IsPow2(static_cast<UINT_32
>(pSwizzle
[i
].z
)));
1338 pEquation
->addr
[i
].channel
= 2;
1339 pEquation
->addr
[i
].valid
= 1;
1340 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].z
);
1343 pEquation
->xor1
[i
].value
= 0;
1344 pEquation
->xor2
[i
].value
= 0;
1347 else if (IsThin(rsrcType
, swMode
))
1350 ComputeThinBlockDimension(&dim
.w
, &dim
.h
, &dim
.d
, 8u << elemLog2
, 0, rsrcType
, swMode
);
1352 const UINT_32 blkXLog2
= Log2(dim
.w
);
1353 const UINT_32 blkYLog2
= Log2(dim
.h
);
1354 const UINT_32 blkXMask
= dim
.w
- 1;
1355 const UINT_32 blkYMask
= dim
.h
- 1;
1357 ADDR_BIT_SETTING swizzle
[ADDR_MAX_EQUATION_BIT
];
1360 UINT_32 bMask
= (1 << elemLog2
) - 1;
1362 for (UINT_32 i
= elemLog2
; i
< blockSizeLog2
; i
++)
1364 if (IsPow2(pSwizzle
[i
].value
))
1366 if (pSwizzle
[i
].x
!= 0)
1368 ADDR_ASSERT((xMask
& pSwizzle
[i
].x
) == 0);
1369 xMask
|= pSwizzle
[i
].x
;
1371 const UINT_32 xLog2
= Log2(pSwizzle
[i
].x
);
1373 ADDR_ASSERT(xLog2
< blkXLog2
);
1375 pEquation
->addr
[i
].channel
= 0;
1376 pEquation
->addr
[i
].valid
= 1;
1377 pEquation
->addr
[i
].index
= xLog2
+ elemLog2
;
1381 ADDR_ASSERT(pSwizzle
[i
].y
!= 0);
1382 ADDR_ASSERT((yMask
& pSwizzle
[i
].y
) == 0);
1383 yMask
|= pSwizzle
[i
].y
;
1385 pEquation
->addr
[i
].channel
= 1;
1386 pEquation
->addr
[i
].valid
= 1;
1387 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].y
);
1389 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkYLog2
);
1392 swizzle
[i
].value
= 0;
1397 if (pSwizzle
[i
].z
!= 0)
1399 ADDR_ASSERT(IsPow2(static_cast<UINT_32
>(pSwizzle
[i
].z
)));
1401 pEquation
->xor2
[i
].channel
= 2;
1402 pEquation
->xor2
[i
].valid
= 1;
1403 pEquation
->xor2
[i
].index
= Log2(pSwizzle
[i
].z
);
1406 swizzle
[i
].x
= pSwizzle
[i
].x
;
1407 swizzle
[i
].y
= pSwizzle
[i
].y
;
1408 swizzle
[i
].z
= swizzle
[i
].s
= 0;
1410 ADDR_ASSERT(IsPow2(swizzle
[i
].value
) == FALSE
);
1412 const UINT_32 xHi
= swizzle
[i
].x
& (~blkXMask
);
1416 ADDR_ASSERT(IsPow2(xHi
));
1417 ADDR_ASSERT(pEquation
->xor1
[i
].value
== 0);
1419 pEquation
->xor1
[i
].channel
= 0;
1420 pEquation
->xor1
[i
].valid
= 1;
1421 pEquation
->xor1
[i
].index
= Log2(xHi
) + elemLog2
;
1423 swizzle
[i
].x
&= blkXMask
;
1426 const UINT_32 yHi
= swizzle
[i
].y
& (~blkYMask
);
1430 ADDR_ASSERT(IsPow2(yHi
));
1434 ADDR_ASSERT(pEquation
->xor1
[i
].value
== 0);
1435 pEquation
->xor1
[i
].channel
= 1;
1436 pEquation
->xor1
[i
].valid
= 1;
1437 pEquation
->xor1
[i
].index
= Log2(yHi
);
1441 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1442 pEquation
->xor2
[i
].channel
= 1;
1443 pEquation
->xor2
[i
].valid
= 1;
1444 pEquation
->xor2
[i
].index
= Log2(yHi
);
1447 swizzle
[i
].y
&= blkYMask
;
1450 if (swizzle
[i
].value
== 0)
1457 const UINT_32 pipeIntMask
= (1 << m_pipeInterleaveLog2
) - 1;
1458 const UINT_32 blockMask
= (1 << blockSizeLog2
) - 1;
1460 ADDR_ASSERT((bMask
& pipeIntMask
) == pipeIntMask
);
1462 while (bMask
!= blockMask
)
1464 for (UINT_32 i
= m_pipeInterleaveLog2
; i
< blockSizeLog2
; i
++)
1466 if ((bMask
& (1 << i
)) == 0)
1468 if (IsPow2(swizzle
[i
].value
))
1470 if (swizzle
[i
].x
!= 0)
1472 ADDR_ASSERT((xMask
& swizzle
[i
].x
) == 0);
1473 xMask
|= swizzle
[i
].x
;
1475 const UINT_32 xLog2
= Log2(swizzle
[i
].x
);
1477 ADDR_ASSERT(xLog2
< blkXLog2
);
1479 pEquation
->addr
[i
].channel
= 0;
1480 pEquation
->addr
[i
].valid
= 1;
1481 pEquation
->addr
[i
].index
= xLog2
+ elemLog2
;
1485 ADDR_ASSERT(swizzle
[i
].y
!= 0);
1486 ADDR_ASSERT((yMask
& swizzle
[i
].y
) == 0);
1487 yMask
|= swizzle
[i
].y
;
1489 pEquation
->addr
[i
].channel
= 1;
1490 pEquation
->addr
[i
].valid
= 1;
1491 pEquation
->addr
[i
].index
= Log2(swizzle
[i
].y
);
1493 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkYLog2
);
1496 swizzle
[i
].value
= 0;
1501 const UINT_32 x
= swizzle
[i
].x
& xMask
;
1502 const UINT_32 y
= swizzle
[i
].y
& yMask
;
1506 ADDR_ASSERT(IsPow2(x
));
1508 if (pEquation
->xor1
[i
].value
== 0)
1510 pEquation
->xor1
[i
].channel
= 0;
1511 pEquation
->xor1
[i
].valid
= 1;
1512 pEquation
->xor1
[i
].index
= Log2(x
) + elemLog2
;
1516 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1517 pEquation
->xor2
[i
].channel
= 0;
1518 pEquation
->xor2
[i
].valid
= 1;
1519 pEquation
->xor2
[i
].index
= Log2(x
) + elemLog2
;
1525 ADDR_ASSERT(IsPow2(y
));
1527 if (pEquation
->xor1
[i
].value
== 0)
1529 pEquation
->xor1
[i
].channel
= 1;
1530 pEquation
->xor1
[i
].valid
= 1;
1531 pEquation
->xor1
[i
].index
= Log2(y
);
1535 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1536 pEquation
->xor2
[i
].channel
= 1;
1537 pEquation
->xor2
[i
].valid
= 1;
1538 pEquation
->xor2
[i
].index
= Log2(y
);
1549 ADDR_ASSERT((xMask
== blkXMask
) && (yMask
== blkYMask
));
1553 const UINT_32 blkXLog2
= (blockSizeLog2
== 12) ? Block4K_Log2_3d
[elemLog2
].w
: Block64K_Log2_3d
[elemLog2
].w
;
1554 const UINT_32 blkYLog2
= (blockSizeLog2
== 12) ? Block4K_Log2_3d
[elemLog2
].h
: Block64K_Log2_3d
[elemLog2
].h
;
1555 const UINT_32 blkZLog2
= (blockSizeLog2
== 12) ? Block4K_Log2_3d
[elemLog2
].d
: Block64K_Log2_3d
[elemLog2
].d
;
1556 const UINT_32 blkXMask
= (1 << blkXLog2
) - 1;
1557 const UINT_32 blkYMask
= (1 << blkYLog2
) - 1;
1558 const UINT_32 blkZMask
= (1 << blkZLog2
) - 1;
1560 ADDR_BIT_SETTING swizzle
[ADDR_MAX_EQUATION_BIT
];
1564 UINT_32 bMask
= (1 << elemLog2
) - 1;
1566 for (UINT_32 i
= elemLog2
; i
< blockSizeLog2
; i
++)
1568 if (IsPow2(pSwizzle
[i
].value
))
1570 if (pSwizzle
[i
].x
!= 0)
1572 ADDR_ASSERT((xMask
& pSwizzle
[i
].x
) == 0);
1573 xMask
|= pSwizzle
[i
].x
;
1575 const UINT_32 xLog2
= Log2(pSwizzle
[i
].x
);
1577 ADDR_ASSERT(xLog2
< blkXLog2
);
1579 pEquation
->addr
[i
].channel
= 0;
1580 pEquation
->addr
[i
].valid
= 1;
1581 pEquation
->addr
[i
].index
= xLog2
+ elemLog2
;
1583 else if (pSwizzle
[i
].y
!= 0)
1585 ADDR_ASSERT((yMask
& pSwizzle
[i
].y
) == 0);
1586 yMask
|= pSwizzle
[i
].y
;
1588 pEquation
->addr
[i
].channel
= 1;
1589 pEquation
->addr
[i
].valid
= 1;
1590 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].y
);
1592 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkYLog2
);
1596 ADDR_ASSERT(pSwizzle
[i
].z
!= 0);
1597 ADDR_ASSERT((zMask
& pSwizzle
[i
].z
) == 0);
1598 zMask
|= pSwizzle
[i
].z
;
1600 pEquation
->addr
[i
].channel
= 2;
1601 pEquation
->addr
[i
].valid
= 1;
1602 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].z
);
1604 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkZLog2
);
1607 swizzle
[i
].value
= 0;
1612 swizzle
[i
].x
= pSwizzle
[i
].x
;
1613 swizzle
[i
].y
= pSwizzle
[i
].y
;
1614 swizzle
[i
].z
= pSwizzle
[i
].z
;
1617 ADDR_ASSERT(IsPow2(swizzle
[i
].value
) == FALSE
);
1619 const UINT_32 xHi
= swizzle
[i
].x
& (~blkXMask
);
1620 const UINT_32 yHi
= swizzle
[i
].y
& (~blkYMask
);
1621 const UINT_32 zHi
= swizzle
[i
].z
& (~blkZMask
);
1623 ADDR_ASSERT((xHi
== 0) || (yHi
== 0) || (zHi
== 0));
1627 ADDR_ASSERT(IsPow2(xHi
));
1628 ADDR_ASSERT(pEquation
->xor1
[i
].value
== 0);
1630 pEquation
->xor1
[i
].channel
= 0;
1631 pEquation
->xor1
[i
].valid
= 1;
1632 pEquation
->xor1
[i
].index
= Log2(xHi
) + elemLog2
;
1634 swizzle
[i
].x
&= blkXMask
;
1639 ADDR_ASSERT(IsPow2(yHi
));
1641 if (pEquation
->xor1
[i
].value
== 0)
1643 pEquation
->xor1
[i
].channel
= 1;
1644 pEquation
->xor1
[i
].valid
= 1;
1645 pEquation
->xor1
[i
].index
= Log2(yHi
);
1649 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1650 pEquation
->xor2
[i
].channel
= 1;
1651 pEquation
->xor2
[i
].valid
= 1;
1652 pEquation
->xor2
[i
].index
= Log2(yHi
);
1655 swizzle
[i
].y
&= blkYMask
;
1660 ADDR_ASSERT(IsPow2(zHi
));
1662 if (pEquation
->xor1
[i
].value
== 0)
1664 pEquation
->xor1
[i
].channel
= 2;
1665 pEquation
->xor1
[i
].valid
= 1;
1666 pEquation
->xor1
[i
].index
= Log2(zHi
);
1670 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1671 pEquation
->xor2
[i
].channel
= 2;
1672 pEquation
->xor2
[i
].valid
= 1;
1673 pEquation
->xor2
[i
].index
= Log2(zHi
);
1676 swizzle
[i
].z
&= blkZMask
;
1679 if (swizzle
[i
].value
== 0)
1686 const UINT_32 pipeIntMask
= (1 << m_pipeInterleaveLog2
) - 1;
1687 const UINT_32 blockMask
= (1 << blockSizeLog2
) - 1;
1689 ADDR_ASSERT((bMask
& pipeIntMask
) == pipeIntMask
);
1691 while (bMask
!= blockMask
)
1693 for (UINT_32 i
= m_pipeInterleaveLog2
; i
< blockSizeLog2
; i
++)
1695 if ((bMask
& (1 << i
)) == 0)
1697 if (IsPow2(swizzle
[i
].value
))
1699 if (swizzle
[i
].x
!= 0)
1701 ADDR_ASSERT((xMask
& swizzle
[i
].x
) == 0);
1702 xMask
|= swizzle
[i
].x
;
1704 const UINT_32 xLog2
= Log2(swizzle
[i
].x
);
1706 ADDR_ASSERT(xLog2
< blkXLog2
);
1708 pEquation
->addr
[i
].channel
= 0;
1709 pEquation
->addr
[i
].valid
= 1;
1710 pEquation
->addr
[i
].index
= xLog2
+ elemLog2
;
1712 else if (swizzle
[i
].y
!= 0)
1714 ADDR_ASSERT((yMask
& swizzle
[i
].y
) == 0);
1715 yMask
|= swizzle
[i
].y
;
1717 pEquation
->addr
[i
].channel
= 1;
1718 pEquation
->addr
[i
].valid
= 1;
1719 pEquation
->addr
[i
].index
= Log2(swizzle
[i
].y
);
1721 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkYLog2
);
1725 ADDR_ASSERT(swizzle
[i
].z
!= 0);
1726 ADDR_ASSERT((zMask
& swizzle
[i
].z
) == 0);
1727 zMask
|= swizzle
[i
].z
;
1729 pEquation
->addr
[i
].channel
= 2;
1730 pEquation
->addr
[i
].valid
= 1;
1731 pEquation
->addr
[i
].index
= Log2(swizzle
[i
].z
);
1733 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkZLog2
);
1736 swizzle
[i
].value
= 0;
1741 const UINT_32 x
= swizzle
[i
].x
& xMask
;
1742 const UINT_32 y
= swizzle
[i
].y
& yMask
;
1743 const UINT_32 z
= swizzle
[i
].z
& zMask
;
1747 ADDR_ASSERT(IsPow2(x
));
1749 if (pEquation
->xor1
[i
].value
== 0)
1751 pEquation
->xor1
[i
].channel
= 0;
1752 pEquation
->xor1
[i
].valid
= 1;
1753 pEquation
->xor1
[i
].index
= Log2(x
) + elemLog2
;
1757 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1758 pEquation
->xor2
[i
].channel
= 0;
1759 pEquation
->xor2
[i
].valid
= 1;
1760 pEquation
->xor2
[i
].index
= Log2(x
) + elemLog2
;
1766 ADDR_ASSERT(IsPow2(y
));
1768 if (pEquation
->xor1
[i
].value
== 0)
1770 pEquation
->xor1
[i
].channel
= 1;
1771 pEquation
->xor1
[i
].valid
= 1;
1772 pEquation
->xor1
[i
].index
= Log2(y
);
1776 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1777 pEquation
->xor2
[i
].channel
= 1;
1778 pEquation
->xor2
[i
].valid
= 1;
1779 pEquation
->xor2
[i
].index
= Log2(y
);
1785 ADDR_ASSERT(IsPow2(z
));
1787 if (pEquation
->xor1
[i
].value
== 0)
1789 pEquation
->xor1
[i
].channel
= 2;
1790 pEquation
->xor1
[i
].valid
= 1;
1791 pEquation
->xor1
[i
].index
= Log2(z
);
1795 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1796 pEquation
->xor2
[i
].channel
= 2;
1797 pEquation
->xor2
[i
].valid
= 1;
1798 pEquation
->xor2
[i
].index
= Log2(z
);
1810 ADDR_ASSERT((xMask
== blkXMask
) && (yMask
== blkYMask
) && (zMask
== blkZMask
));
1815 ************************************************************************************************************************
1816 * Gfx10Lib::InitEquationTable
1819 * Initialize Equation table.
1823 ************************************************************************************************************************
1825 VOID
Gfx10Lib::InitEquationTable()
1827 memset(m_equationTable
, 0, sizeof(m_equationTable
));
1829 for (UINT_32 rsrcTypeIdx
= 0; rsrcTypeIdx
< MaxRsrcType
; rsrcTypeIdx
++)
1831 const AddrResourceType rsrcType
= static_cast<AddrResourceType
>(rsrcTypeIdx
+ ADDR_RSRC_TEX_2D
);
1833 for (UINT_32 swModeIdx
= 0; swModeIdx
< MaxSwModeType
; swModeIdx
++)
1835 const AddrSwizzleMode swMode
= static_cast<AddrSwizzleMode
>(swModeIdx
);
1837 for (UINT_32 elemLog2
= 0; elemLog2
< MaxElementBytesLog2
; elemLog2
++)
1839 UINT_32 equationIndex
= ADDR_INVALID_EQUATION_INDEX
;
1840 const ADDR_SW_PATINFO
* pPatInfo
= GetSwizzlePatternInfo(swMode
, rsrcType
, elemLog2
, 1);
1842 if (pPatInfo
!= NULL
)
1844 ADDR_ASSERT(IsValidSwMode(swMode
));
1846 if (pPatInfo
->maxItemCount
<= 3)
1848 ADDR_EQUATION equation
= {};
1850 ConvertSwizzlePatternToEquation(elemLog2
, rsrcType
, swMode
, pPatInfo
, &equation
);
1852 equationIndex
= m_numEquations
;
1853 ADDR_ASSERT(equationIndex
< EquationTableSize
);
1855 m_equationTable
[equationIndex
] = equation
;
1861 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
1862 ADDR_ASSERT((elemLog2
== 3) || (elemLog2
== 4));
1863 ADDR_ASSERT(rsrcTypeIdx
== 1);
1864 ADDR_ASSERT(swMode
== ADDR_SW_64KB_D_X
);
1865 ADDR_ASSERT(m_settings
.supportRbPlus
== 1);
1869 m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][elemLog2
] = equationIndex
;
1876 ************************************************************************************************************************
1877 * Gfx10Lib::HwlGetEquationIndex
1880 * Interface function stub of GetEquationIndex
1884 ************************************************************************************************************************
1886 UINT_32
Gfx10Lib::HwlGetEquationIndex(
1887 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
1888 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
1891 UINT_32 equationIdx
= ADDR_INVALID_EQUATION_INDEX
;
1893 if ((pIn
->resourceType
== ADDR_RSRC_TEX_2D
) ||
1894 (pIn
->resourceType
== ADDR_RSRC_TEX_3D
))
1896 const UINT_32 rsrcTypeIdx
= static_cast<UINT_32
>(pIn
->resourceType
) - 1;
1897 const UINT_32 swModeIdx
= static_cast<UINT_32
>(pIn
->swizzleMode
);
1898 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
1900 equationIdx
= m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][elemLog2
];
1903 if (pOut
->pMipInfo
!= NULL
)
1905 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
1907 pOut
->pMipInfo
[i
].equationIndex
= equationIdx
;
1915 ************************************************************************************************************************
1916 * Gfx10Lib::IsValidDisplaySwizzleMode
1919 * Check if a swizzle mode is supported by display engine
1922 * TRUE is swizzle mode is supported by display engine
1923 ************************************************************************************************************************
1925 BOOL_32
Gfx10Lib::IsValidDisplaySwizzleMode(
1926 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
///< [in] input structure
1929 ADDR_ASSERT(pIn
->resourceType
== ADDR_RSRC_TEX_2D
);
1931 BOOL_32 support
= FALSE
;
1933 if (m_settings
.isDcn2
)
1935 switch (pIn
->swizzleMode
)
1938 case ADDR_SW_4KB_D_X
:
1939 case ADDR_SW_64KB_D
:
1940 case ADDR_SW_64KB_D_T
:
1941 case ADDR_SW_64KB_D_X
:
1942 support
= (pIn
->bpp
== 64);
1945 case ADDR_SW_LINEAR
:
1947 case ADDR_SW_4KB_S_X
:
1948 case ADDR_SW_64KB_S
:
1949 case ADDR_SW_64KB_S_T
:
1950 case ADDR_SW_64KB_S_X
:
1951 case ADDR_SW_64KB_R_X
:
1952 support
= (pIn
->bpp
<= 64);
1961 ADDR_NOT_IMPLEMENTED();
1968 ************************************************************************************************************************
1969 * Gfx10Lib::GetMaxNumMipsInTail
1972 * Return max number of mips in tails
1975 * Max number of mips in tails
1976 ************************************************************************************************************************
1978 UINT_32
Gfx10Lib::GetMaxNumMipsInTail(
1979 UINT_32 blockSizeLog2
, ///< block size log2
1980 BOOL_32 isThin
///< is thin or thick
1983 UINT_32 effectiveLog2
= blockSizeLog2
;
1985 if (isThin
== FALSE
)
1987 effectiveLog2
-= (blockSizeLog2
- 8) / 3;
1990 return (effectiveLog2
<= 11) ? (1 + (1 << (effectiveLog2
- 9))) : (effectiveLog2
- 4);
1994 ************************************************************************************************************************
1995 * Gfx10Lib::HwlComputePipeBankXor
1998 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2002 ************************************************************************************************************************
2004 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputePipeBankXor(
2005 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT
* pIn
, ///< [in] input structure
2006 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
* pOut
///< [out] output structure
2009 if (IsNonPrtXor(pIn
->swizzleMode
))
2011 const UINT_32 blockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
2012 const UINT_32 pipeBits
= GetPipeXorBits(blockBits
);
2013 const UINT_32 bankBits
= GetBankXorBits(blockBits
);
2015 UINT_32 pipeXor
= 0;
2016 UINT_32 bankXor
= 0;
2020 if (blockBits
== 16)
2022 const UINT_32 XorPatternLen
= 8;
2023 static const UINT_32 XorBank1b
[XorPatternLen
] = {0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80};
2024 static const UINT_32 XorBank2b
[XorPatternLen
] = {0x00, 0x80, 0x40, 0xC0, 0x80, 0x00, 0xC0, 0x40};
2025 static const UINT_32 XorBank3b
[XorPatternLen
] = {0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0};
2027 const UINT_32 index
= pIn
->surfIndex
% XorPatternLen
;
2031 bankXor
= XorBank1b
[index
];
2033 else if (bankBits
== 2)
2035 bankXor
= XorBank2b
[index
];
2039 bankXor
= XorBank3b
[index
];
2043 bankXor
>>= (2 - pipeBits
);
2049 pOut
->pipeBankXor
= bankXor
| pipeXor
;
2053 pOut
->pipeBankXor
= 0;
2060 ************************************************************************************************************************
2061 * Gfx10Lib::HwlComputeSlicePipeBankXor
2064 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2068 ************************************************************************************************************************
2070 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSlicePipeBankXor(
2071 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT
* pIn
, ///< [in] input structure
2072 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
* pOut
///< [out] output structure
2075 if (IsNonPrtXor(pIn
->swizzleMode
))
2077 const UINT_32 blockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
2078 const UINT_32 pipeBits
= GetPipeXorBits(blockBits
);
2079 const UINT_32 pipeXor
= ReverseBitVector(pIn
->slice
, pipeBits
);
2081 pOut
->pipeBankXor
= pIn
->basePipeBankXor
^ pipeXor
;
2085 pOut
->pipeBankXor
= 0;
2092 ************************************************************************************************************************
2093 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2096 * Compute sub resource offset to support swizzle pattern
2100 ************************************************************************************************************************
2102 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2103 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
* pIn
, ///< [in] input structure
2104 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
* pOut
///< [out] output structure
2107 ADDR_ASSERT(IsThin(pIn
->resourceType
, pIn
->swizzleMode
));
2109 pOut
->offset
= pIn
->slice
* pIn
->sliceSize
+ pIn
->macroBlockOffset
;
2115 ************************************************************************************************************************
2116 * Gfx10Lib::ValidateNonSwModeParams
2119 * Validate compute surface info params except swizzle mode
2122 * TRUE if parameters are valid, FALSE otherwise
2123 ************************************************************************************************************************
2125 BOOL_32
Gfx10Lib::ValidateNonSwModeParams(
2126 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
2128 BOOL_32 valid
= TRUE
;
2130 if ((pIn
->bpp
== 0) || (pIn
->bpp
> 128) || (pIn
->width
== 0) || (pIn
->numFrags
> 8) || (pIn
->numSamples
> 16))
2132 ADDR_ASSERT_ALWAYS();
2136 if (pIn
->resourceType
>= ADDR_RSRC_MAX_TYPE
)
2138 ADDR_ASSERT_ALWAYS();
2142 const ADDR2_SURFACE_FLAGS flags
= pIn
->flags
;
2143 const AddrResourceType rsrcType
= pIn
->resourceType
;
2144 const BOOL_32 mipmap
= (pIn
->numMipLevels
> 1);
2145 const BOOL_32 msaa
= (pIn
->numFrags
> 1);
2146 const BOOL_32 display
= flags
.display
;
2147 const BOOL_32 tex3d
= IsTex3d(rsrcType
);
2148 const BOOL_32 tex2d
= IsTex2d(rsrcType
);
2149 const BOOL_32 tex1d
= IsTex1d(rsrcType
);
2150 const BOOL_32 stereo
= flags
.qbStereo
;
2152 // Resource type check
2155 if (msaa
|| display
|| stereo
)
2157 ADDR_ASSERT_ALWAYS();
2163 if ((msaa
&& mipmap
) || (stereo
&& msaa
) || (stereo
&& mipmap
))
2165 ADDR_ASSERT_ALWAYS();
2171 if (msaa
|| display
|| stereo
)
2173 ADDR_ASSERT_ALWAYS();
2179 ADDR_ASSERT_ALWAYS();
2187 ************************************************************************************************************************
2188 * Gfx10Lib::ValidateSwModeParams
2191 * Validate compute surface info related to swizzle mode
2194 * TRUE if parameters are valid, FALSE otherwise
2195 ************************************************************************************************************************
2197 BOOL_32
Gfx10Lib::ValidateSwModeParams(
2198 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
2200 BOOL_32 valid
= TRUE
;
2202 if ((pIn
->swizzleMode
>= ADDR_SW_MAX_TYPE
) || (IsValidSwMode(pIn
->swizzleMode
) == FALSE
))
2204 ADDR_ASSERT_ALWAYS();
2208 const ADDR2_SURFACE_FLAGS flags
= pIn
->flags
;
2209 const AddrResourceType rsrcType
= pIn
->resourceType
;
2210 const AddrSwizzleMode swizzle
= pIn
->swizzleMode
;
2211 const BOOL_32 msaa
= (pIn
->numFrags
> 1);
2212 const BOOL_32 zbuffer
= flags
.depth
|| flags
.stencil
;
2213 const BOOL_32 color
= flags
.color
;
2214 const BOOL_32 display
= flags
.display
;
2215 const BOOL_32 tex3d
= IsTex3d(rsrcType
);
2216 const BOOL_32 tex2d
= IsTex2d(rsrcType
);
2217 const BOOL_32 tex1d
= IsTex1d(rsrcType
);
2218 const BOOL_32 thin3d
= flags
.view3dAs2dArray
;
2219 const BOOL_32 linear
= IsLinear(swizzle
);
2220 const BOOL_32 blk256B
= IsBlock256b(swizzle
);
2221 const BOOL_32 blkVar
= IsBlockVariable(swizzle
);
2222 const BOOL_32 isNonPrtXor
= IsNonPrtXor(swizzle
);
2223 const BOOL_32 prt
= flags
.prt
;
2224 const BOOL_32 fmask
= flags
.fmask
;
2227 if ((pIn
->numFrags
> 1) &&
2228 (GetBlockSize(swizzle
) < (m_pipeInterleaveBytes
* pIn
->numFrags
)))
2230 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2231 ADDR_ASSERT_ALWAYS();
2235 if (display
&& (IsValidDisplaySwizzleMode(pIn
) == FALSE
))
2237 ADDR_ASSERT_ALWAYS();
2241 if ((pIn
->bpp
== 96) && (linear
== FALSE
))
2243 ADDR_ASSERT_ALWAYS();
2247 const UINT_32 swizzleMask
= 1 << swizzle
;
2249 // Resource type check
2252 if ((swizzleMask
& Gfx10Rsrc1dSwModeMask
) == 0)
2254 ADDR_ASSERT_ALWAYS();
2260 if (((swizzleMask
& Gfx10Rsrc2dSwModeMask
) == 0) ||
2261 (prt
&& ((swizzleMask
& Gfx10Rsrc2dPrtSwModeMask
) == 0)) ||
2262 (fmask
&& ((swizzleMask
& Gfx10ZSwModeMask
) == 0)))
2264 ADDR_ASSERT_ALWAYS();
2270 if (((swizzleMask
& Gfx10Rsrc3dSwModeMask
) == 0) ||
2271 (prt
&& ((swizzleMask
& Gfx10Rsrc3dPrtSwModeMask
) == 0)) ||
2272 (thin3d
&& ((swizzleMask
& Gfx10Rsrc3dThinSwModeMask
) == 0)))
2274 ADDR_ASSERT_ALWAYS();
2279 // Swizzle type check
2282 if (zbuffer
|| msaa
|| (pIn
->bpp
== 0) || ((pIn
->bpp
% 8) != 0))
2284 ADDR_ASSERT_ALWAYS();
2288 else if (IsZOrderSwizzle(swizzle
))
2290 if ((pIn
->bpp
> 64) ||
2291 (msaa
&& (color
|| (pIn
->bpp
> 32))) ||
2292 ElemLib::IsBlockCompressed(pIn
->format
) ||
2293 ElemLib::IsMacroPixelPacked(pIn
->format
))
2295 ADDR_ASSERT_ALWAYS();
2299 else if (IsStandardSwizzle(rsrcType
, swizzle
))
2301 if (zbuffer
|| msaa
)
2303 ADDR_ASSERT_ALWAYS();
2307 else if (IsDisplaySwizzle(rsrcType
, swizzle
))
2309 if (zbuffer
|| msaa
)
2311 ADDR_ASSERT_ALWAYS();
2315 else if (IsRtOptSwizzle(swizzle
))
2319 ADDR_ASSERT_ALWAYS();
2325 ADDR_ASSERT_ALWAYS();
2332 if (zbuffer
|| tex3d
|| msaa
)
2334 ADDR_ASSERT_ALWAYS();
2340 if (m_blockVarSizeLog2
== 0)
2342 ADDR_ASSERT_ALWAYS();
2351 ************************************************************************************************************************
2352 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2355 * Compute surface info sanity check
2359 ************************************************************************************************************************
2361 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2362 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
///< [in] input structure
2365 return ValidateNonSwModeParams(pIn
) && ValidateSwModeParams(pIn
) ? ADDR_OK
: ADDR_INVALIDPARAMS
;
2369 ************************************************************************************************************************
2370 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2373 * Internal function to get suggested surface information for cliet to use
2377 ************************************************************************************************************************
2379 ADDR_E_RETURNCODE
Gfx10Lib::HwlGetPreferredSurfaceSetting(
2380 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
* pIn
, ///< [in] input structure
2381 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
* pOut
///< [out] output structure
2384 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
2386 if (pIn
->flags
.fmask
)
2388 const BOOL_32 forbid64KbBlockType
= pIn
->forbiddenBlock
.macroThin64KB
? TRUE
: FALSE
;
2389 const BOOL_32 forbidVarBlockType
= ((m_blockVarSizeLog2
== 0) || (pIn
->forbiddenBlock
.var
!= 0));
2391 if (forbid64KbBlockType
&& forbidVarBlockType
)
2393 // Invalid combination...
2394 ADDR_ASSERT_ALWAYS();
2395 returnCode
= ADDR_INVALIDPARAMS
;
2399 pOut
->resourceType
= ADDR_RSRC_TEX_2D
;
2400 pOut
->validBlockSet
.value
= 0;
2401 pOut
->validBlockSet
.macroThin64KB
= forbid64KbBlockType
? 0 : 1;
2402 pOut
->validBlockSet
.var
= forbidVarBlockType
? 0 : 1;
2403 pOut
->validSwModeSet
.value
= 0;
2404 pOut
->validSwModeSet
.sw64KB_Z_X
= forbid64KbBlockType
? 0 : 1;
2405 pOut
->validSwModeSet
.swVar_Z_X
= forbidVarBlockType
? 0 : 1;
2406 pOut
->canXor
= TRUE
;
2407 pOut
->validSwTypeSet
.value
= AddrSwSetZ
;
2408 pOut
->clientPreferredSwSet
= pOut
->validSwTypeSet
;
2410 BOOL_32 use64KbBlockType
= (forbid64KbBlockType
== FALSE
);
2412 if ((forbid64KbBlockType
== FALSE
) && (forbidVarBlockType
== FALSE
))
2414 const UINT_8 maxFmaskSwizzleModeType
= 2;
2415 const UINT_32 ratioLow
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 3 : 2);
2416 const UINT_32 ratioHi
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 2 : 1);
2417 const UINT_32 fmaskBpp
= GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
);
2418 const UINT_32 numSlices
= Max(pIn
->numSlices
, 1u);
2419 const UINT_32 width
= Max(pIn
->width
, 1u);
2420 const UINT_32 height
= Max(pIn
->height
, 1u);
2421 const UINT_64 sizeAlignInElement
= Max(NextPow2(pIn
->minSizeAlign
) / (fmaskBpp
>> 3), 1u);
2423 AddrSwizzleMode swMode
[maxFmaskSwizzleModeType
] = {ADDR_SW_64KB_Z_X
, ADDR_SW_VAR_Z_X
};
2424 Dim3d blkDim
[maxFmaskSwizzleModeType
] = {{0}, {0}};
2425 Dim3d padDim
[maxFmaskSwizzleModeType
] = {{0}, {0}};
2426 UINT_64 padSize
[maxFmaskSwizzleModeType
] = {0};
2428 for (UINT_8 i
= 0; i
< maxFmaskSwizzleModeType
; i
++)
2430 ComputeBlockDimensionForSurf(&blkDim
[i
].w
,
2438 padSize
[i
] = ComputePadSize(&blkDim
[i
], width
, height
, numSlices
, &padDim
[i
]);
2439 padSize
[i
] = PowTwoAlign(padSize
[i
], sizeAlignInElement
);
2442 if (GetBlockSizeLog2(swMode
[1]) >= GetBlockSizeLog2(swMode
[0]))
2444 if ((padSize
[1] * ratioHi
) <= (padSize
[0] * ratioLow
))
2446 use64KbBlockType
= FALSE
;
2451 if ((padSize
[1] * ratioLow
) < (padSize
[0] * ratioHi
))
2453 use64KbBlockType
= FALSE
;
2457 else if (forbidVarBlockType
)
2459 use64KbBlockType
= TRUE
;
2462 if (use64KbBlockType
)
2464 pOut
->swizzleMode
= ADDR_SW_64KB_Z_X
;
2468 pOut
->swizzleMode
= ADDR_SW_VAR_Z_X
;
2474 UINT_32 bpp
= pIn
->bpp
;
2475 UINT_32 width
= Max(pIn
->width
, 1u);
2476 UINT_32 height
= Max(pIn
->height
, 1u);
2478 // Set format to INVALID will skip this conversion
2479 if (pIn
->format
!= ADDR_FMT_INVALID
)
2481 ElemMode elemMode
= ADDR_UNCOMPRESSED
;
2482 UINT_32 expandX
, expandY
;
2484 // Get compression/expansion factors and element mode which indicates compression/expansion
2485 bpp
= GetElemLib()->GetBitsPerPixel(pIn
->format
,
2490 UINT_32 basePitch
= 0;
2491 GetElemLib()->AdjustSurfaceInfo(elemMode
,
2500 const UINT_32 numSlices
= Max(pIn
->numSlices
, 1u);
2501 const UINT_32 numMipLevels
= Max(pIn
->numMipLevels
, 1u);
2502 const UINT_32 numSamples
= Max(pIn
->numSamples
, 1u);
2503 const UINT_32 numFrags
= (pIn
->numFrags
== 0) ? numSamples
: pIn
->numFrags
;
2504 const BOOL_32 msaa
= (numFrags
> 1) || (numSamples
> 1);
2506 // Pre sanity check on non swizzle mode parameters
2507 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {};
2508 localIn
.flags
= pIn
->flags
;
2509 localIn
.resourceType
= pIn
->resourceType
;
2510 localIn
.format
= pIn
->format
;
2512 localIn
.width
= width
;
2513 localIn
.height
= height
;
2514 localIn
.numSlices
= numSlices
;
2515 localIn
.numMipLevels
= numMipLevels
;
2516 localIn
.numSamples
= numSamples
;
2517 localIn
.numFrags
= numFrags
;
2519 if (ValidateNonSwModeParams(&localIn
))
2521 // Forbid swizzle mode(s) by client setting
2522 ADDR2_SWMODE_SET allowedSwModeSet
= {};
2523 allowedSwModeSet
.value
|= pIn
->forbiddenBlock
.linear
? 0 : Gfx10LinearSwModeMask
;
2524 allowedSwModeSet
.value
|= pIn
->forbiddenBlock
.micro
? 0 : Gfx10Blk256BSwModeMask
;
2525 allowedSwModeSet
.value
|=
2526 pIn
->forbiddenBlock
.macroThin4KB
? 0 :
2527 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? 0 : Gfx10Blk4KBSwModeMask
);
2528 allowedSwModeSet
.value
|=
2529 pIn
->forbiddenBlock
.macroThick4KB
? 0 :
2530 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? Gfx10Rsrc3dThick4KBSwModeMask
: 0);
2531 allowedSwModeSet
.value
|=
2532 pIn
->forbiddenBlock
.macroThin64KB
? 0 :
2533 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? Gfx10Rsrc3dThin64KBSwModeMask
: Gfx10Blk64KBSwModeMask
);
2534 allowedSwModeSet
.value
|=
2535 pIn
->forbiddenBlock
.macroThick64KB
? 0 :
2536 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? Gfx10Rsrc3dThick64KBSwModeMask
: 0);
2537 allowedSwModeSet
.value
|=
2538 pIn
->forbiddenBlock
.var
? 0 : (m_blockVarSizeLog2
? Gfx10BlkVarSwModeMask
: 0);
2540 if (pIn
->preferredSwSet
.value
!= 0)
2542 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_Z
? ~0 : ~Gfx10ZSwModeMask
;
2543 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_S
? ~0 : ~Gfx10StandardSwModeMask
;
2544 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_D
? ~0 : ~Gfx10DisplaySwModeMask
;
2545 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_R
? ~0 : ~Gfx10RenderSwModeMask
;
2550 allowedSwModeSet
.value
&= ~Gfx10XorSwModeMask
;
2553 if (pIn
->maxAlign
> 0)
2555 if (pIn
->maxAlign
< (1u << m_blockVarSizeLog2
))
2557 allowedSwModeSet
.value
&= ~Gfx10BlkVarSwModeMask
;
2560 if (pIn
->maxAlign
< Size64K
)
2562 allowedSwModeSet
.value
&= ~Gfx10Blk64KBSwModeMask
;
2565 if (pIn
->maxAlign
< Size4K
)
2567 allowedSwModeSet
.value
&= ~Gfx10Blk4KBSwModeMask
;
2570 if (pIn
->maxAlign
< Size256
)
2572 allowedSwModeSet
.value
&= ~Gfx10Blk256BSwModeMask
;
2576 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2577 switch (pIn
->resourceType
)
2579 case ADDR_RSRC_TEX_1D
:
2580 allowedSwModeSet
.value
&= Gfx10Rsrc1dSwModeMask
;
2583 case ADDR_RSRC_TEX_2D
:
2584 allowedSwModeSet
.value
&= pIn
->flags
.prt
? Gfx10Rsrc2dPrtSwModeMask
: Gfx10Rsrc2dSwModeMask
;
2587 case ADDR_RSRC_TEX_3D
:
2588 allowedSwModeSet
.value
&= pIn
->flags
.prt
? Gfx10Rsrc3dPrtSwModeMask
: Gfx10Rsrc3dSwModeMask
;
2590 if (pIn
->flags
.view3dAs2dArray
)
2592 allowedSwModeSet
.value
&= Gfx10Rsrc3dThinSwModeMask
;
2597 ADDR_ASSERT_ALWAYS();
2598 allowedSwModeSet
.value
= 0;
2602 if (ElemLib::IsBlockCompressed(pIn
->format
) ||
2603 ElemLib::IsMacroPixelPacked(pIn
->format
) ||
2605 (msaa
&& ((bpp
> 32) || pIn
->flags
.color
|| pIn
->flags
.unordered
)))
2607 allowedSwModeSet
.value
&= ~Gfx10ZSwModeMask
;
2610 if (pIn
->format
== ADDR_FMT_32_32_32
)
2612 allowedSwModeSet
.value
&= Gfx10LinearSwModeMask
;
2617 allowedSwModeSet
.value
&= Gfx10MsaaSwModeMask
;
2620 if (pIn
->flags
.depth
|| pIn
->flags
.stencil
)
2622 allowedSwModeSet
.value
&= Gfx10ZSwModeMask
;
2625 if (pIn
->flags
.display
)
2627 if (m_settings
.isDcn2
)
2629 allowedSwModeSet
.value
&= (bpp
== 64) ? Dcn2Bpp64SwModeMask
: Dcn2NonBpp64SwModeMask
;
2633 ADDR_NOT_IMPLEMENTED();
2637 if (allowedSwModeSet
.value
!= 0)
2640 // Post sanity check, at least AddrLib should accept the output generated by its own
2641 UINT_32 validateSwModeSet
= allowedSwModeSet
.value
;
2643 for (UINT_32 i
= 0; validateSwModeSet
!= 0; i
++)
2645 if (validateSwModeSet
& 1)
2647 localIn
.swizzleMode
= static_cast<AddrSwizzleMode
>(i
);
2648 ADDR_ASSERT(ValidateSwModeParams(&localIn
));
2651 validateSwModeSet
>>= 1;
2655 pOut
->resourceType
= pIn
->resourceType
;
2656 pOut
->validSwModeSet
= allowedSwModeSet
;
2657 pOut
->canXor
= (allowedSwModeSet
.value
& Gfx10XorSwModeMask
) ? TRUE
: FALSE
;
2658 pOut
->validBlockSet
= GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
);
2659 pOut
->validSwTypeSet
= GetAllowedSwSet(allowedSwModeSet
);
2661 pOut
->clientPreferredSwSet
= pIn
->preferredSwSet
;
2663 if (pOut
->clientPreferredSwSet
.value
== 0)
2665 pOut
->clientPreferredSwSet
.value
= AddrSwSetAll
;
2668 // Apply optional restrictions
2669 if ((pIn
->flags
.depth
|| pIn
->flags
.stencil
) && msaa
&& m_configFlags
.nonPower2MemConfig
)
2671 if ((allowedSwModeSet
.value
&= ~Gfx10BlkVarSwModeMask
) != 0)
2673 // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
2674 // the GL2 in VAR mode, so it should be avoided.
2675 allowedSwModeSet
.value
&= ~Gfx10BlkVarSwModeMask
;
2679 // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
2680 // But we have to suffer from low performance because there is no other choice...
2681 ADDR_ASSERT_ALWAYS();
2685 if (pIn
->flags
.needEquation
)
2687 FilterInvalidEqSwizzleMode(allowedSwModeSet
, pIn
->resourceType
, Log2(bpp
>> 3));
2690 if (allowedSwModeSet
.value
== Gfx10LinearSwModeMask
)
2692 pOut
->swizzleMode
= ADDR_SW_LINEAR
;
2696 // Always ignore linear swizzle mode if there is other choice.
2697 allowedSwModeSet
.swLinear
= 0;
2699 ADDR2_BLOCK_SET allowedBlockSet
= GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
);
2701 // Determine block size if there is 2 or more block type candidates
2702 if (IsPow2(allowedBlockSet
.value
) == FALSE
)
2704 AddrSwizzleMode swMode
[AddrBlockMaxTiledType
] = { ADDR_SW_LINEAR
};
2706 if (m_blockVarSizeLog2
!= 0)
2708 swMode
[AddrBlockVar
] = ADDR_SW_VAR_R_X
;
2711 if (pOut
->resourceType
== ADDR_RSRC_TEX_3D
)
2713 swMode
[AddrBlockThick4KB
] = ADDR_SW_4KB_S
;
2714 swMode
[AddrBlockThin64KB
] = ADDR_SW_64KB_R_X
;
2715 swMode
[AddrBlockThick64KB
] = ADDR_SW_64KB_S
;
2719 swMode
[AddrBlockMicro
] = ADDR_SW_256B_S
;
2720 swMode
[AddrBlockThin4KB
] = ADDR_SW_4KB_S
;
2721 swMode
[AddrBlockThin64KB
] = ADDR_SW_64KB_S
;
2724 Dim3d blkDim
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}, {0}, {0}, {0}};
2725 Dim3d padDim
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}, {0}, {0}, {0}};
2726 UINT_64 padSize
[AddrBlockMaxTiledType
] = {0};
2728 const UINT_32 ratioLow
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 3 : 2);
2729 const UINT_32 ratioHi
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 2 : 1);
2730 const UINT_64 sizeAlignInElement
= Max(NextPow2(pIn
->minSizeAlign
) / (bpp
>> 3), 1u);
2731 UINT_32 minSizeBlk
= AddrBlockMicro
;
2732 UINT_64 minSize
= 0;
2734 for (UINT_32 i
= AddrBlockMicro
; i
< AddrBlockMaxTiledType
; i
++)
2736 if (allowedBlockSet
.value
& (1 << i
))
2738 ComputeBlockDimensionForSurf(&blkDim
[i
].w
,
2746 padSize
[i
] = ComputePadSize(&blkDim
[i
], width
, height
, numSlices
, &padDim
[i
]);
2747 padSize
[i
] = PowTwoAlign(padSize
[i
] * numFrags
, sizeAlignInElement
);
2751 minSize
= padSize
[i
];
2756 // Due to the fact that VAR block size = 16KB * m_pipes, it is possible that VAR
2757 // block size < 64KB. And ratio[Hi/Low] logic implicitly requires iterating from
2758 // smaller block type to bigger block type. So we have to correct comparing logic
2759 // according to the size of existing "minimun block" and size of coming/comparing
2760 // block. The new logic can also be useful to any future change about AddrBlockType.
2761 if (GetBlockSizeLog2(swMode
[i
]) >= GetBlockSizeLog2(swMode
[minSizeBlk
]))
2763 if ((padSize
[i
] * ratioHi
) <= (minSize
* ratioLow
))
2765 minSize
= padSize
[i
];
2771 if ((padSize
[i
] * ratioLow
) < (minSize
* ratioHi
))
2773 minSize
= padSize
[i
];
2781 if ((allowedBlockSet
.micro
== TRUE
) &&
2782 (width
<= blkDim
[AddrBlockMicro
].w
) &&
2783 (height
<= blkDim
[AddrBlockMicro
].h
))
2785 minSizeBlk
= AddrBlockMicro
;
2788 if (minSizeBlk
== AddrBlockMicro
)
2790 ADDR_ASSERT(pOut
->resourceType
!= ADDR_RSRC_TEX_3D
);
2791 allowedSwModeSet
.value
&= Gfx10Blk256BSwModeMask
;
2793 else if (minSizeBlk
== AddrBlockThick4KB
)
2795 ADDR_ASSERT(pOut
->resourceType
== ADDR_RSRC_TEX_3D
);
2796 allowedSwModeSet
.value
&= Gfx10Rsrc3dThick4KBSwModeMask
;
2798 else if (minSizeBlk
== AddrBlockThin4KB
)
2800 ADDR_ASSERT(pOut
->resourceType
!= ADDR_RSRC_TEX_3D
);
2801 allowedSwModeSet
.value
&= Gfx10Blk4KBSwModeMask
;
2803 else if (minSizeBlk
== AddrBlockThick64KB
)
2805 ADDR_ASSERT(pOut
->resourceType
== ADDR_RSRC_TEX_3D
);
2806 allowedSwModeSet
.value
&= Gfx10Rsrc3dThick64KBSwModeMask
;
2808 else if (minSizeBlk
== AddrBlockThin64KB
)
2810 allowedSwModeSet
.value
&= (pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ?
2811 Gfx10Rsrc3dThin64KBSwModeMask
: Gfx10Blk64KBSwModeMask
;
2815 ADDR_ASSERT(minSizeBlk
== AddrBlockVar
);
2816 allowedSwModeSet
.value
&= Gfx10BlkVarSwModeMask
;
2820 // Block type should be determined.
2821 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
).value
));
2823 ADDR2_SWTYPE_SET allowedSwSet
= GetAllowedSwSet(allowedSwModeSet
);
2825 // Determine swizzle type if there is 2 or more swizzle type candidates
2826 if (IsPow2(allowedSwSet
.value
) == FALSE
)
2828 if (ElemLib::IsBlockCompressed(pIn
->format
))
2830 if (allowedSwSet
.sw_D
)
2832 allowedSwModeSet
.value
&= Gfx10DisplaySwModeMask
;
2834 else if (allowedSwSet
.sw_S
)
2836 allowedSwModeSet
.value
&= Gfx10StandardSwModeMask
;
2840 ADDR_ASSERT(allowedSwSet
.sw_R
);
2841 allowedSwModeSet
.value
&= Gfx10RenderSwModeMask
;
2844 else if (ElemLib::IsMacroPixelPacked(pIn
->format
))
2846 if (allowedSwSet
.sw_S
)
2848 allowedSwModeSet
.value
&= Gfx10StandardSwModeMask
;
2850 else if (allowedSwSet
.sw_D
)
2852 allowedSwModeSet
.value
&= Gfx10DisplaySwModeMask
;
2856 ADDR_ASSERT(allowedSwSet
.sw_R
);
2857 allowedSwModeSet
.value
&= Gfx10RenderSwModeMask
;
2860 else if (pIn
->resourceType
== ADDR_RSRC_TEX_3D
)
2862 if (pIn
->flags
.color
&&
2863 GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
).macroThick64KB
&&
2866 allowedSwModeSet
.value
&= Gfx10DisplaySwModeMask
;
2868 else if (allowedSwSet
.sw_S
)
2870 allowedSwModeSet
.value
&= Gfx10StandardSwModeMask
;
2872 else if (allowedSwSet
.sw_R
)
2874 allowedSwModeSet
.value
&= Gfx10RenderSwModeMask
;
2878 ADDR_ASSERT(allowedSwSet
.sw_Z
);
2879 allowedSwModeSet
.value
&= Gfx10ZSwModeMask
;
2884 if (allowedSwSet
.sw_R
)
2886 allowedSwModeSet
.value
&= Gfx10RenderSwModeMask
;
2888 else if (allowedSwSet
.sw_D
)
2890 allowedSwModeSet
.value
&= Gfx10DisplaySwModeMask
;
2892 else if (allowedSwSet
.sw_S
)
2894 allowedSwModeSet
.value
&= Gfx10StandardSwModeMask
;
2898 ADDR_ASSERT(allowedSwSet
.sw_Z
);
2899 allowedSwModeSet
.value
&= Gfx10ZSwModeMask
;
2904 // Swizzle type should be determined.
2905 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet
).value
));
2907 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2908 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2909 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2910 pOut
->swizzleMode
= static_cast<AddrSwizzleMode
>(Log2NonPow2(allowedSwModeSet
.value
));
2915 // Invalid combination...
2916 ADDR_ASSERT_ALWAYS();
2917 returnCode
= ADDR_INVALIDPARAMS
;
2922 // Invalid combination...
2923 ADDR_ASSERT_ALWAYS();
2924 returnCode
= ADDR_INVALIDPARAMS
;
2932 ************************************************************************************************************************
2933 * Gfx10Lib::ComputeStereoInfo
2936 * Compute height alignment and right eye pipeBankXor for stereo surface
2941 ************************************************************************************************************************
2943 ADDR_E_RETURNCODE
Gfx10Lib::ComputeStereoInfo(
2944 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< Compute surface info
2945 UINT_32 blkHeight
, ///< Block height
2946 UINT_32
* pAlignY
, ///< Stereo requested additional alignment in Y
2947 UINT_32
* pRightXor
///< Right eye xor
2950 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2955 if (IsNonPrtXor(pIn
->swizzleMode
))
2957 const UINT_32 blkSizeLog2
= GetBlockSizeLog2(pIn
->swizzleMode
);
2958 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
2959 const UINT_32 rsrcType
= static_cast<UINT_32
>(pIn
->resourceType
) - 1;
2960 const UINT_32 swMode
= static_cast<UINT_32
>(pIn
->swizzleMode
);
2961 const UINT_32 eqIndex
= m_equationLookupTable
[rsrcType
][swMode
][elemLog2
];
2963 if (eqIndex
!= ADDR_INVALID_EQUATION_INDEX
)
2968 for (UINT_32 i
= m_pipeInterleaveLog2
; i
< blkSizeLog2
; i
++)
2970 if (m_equationTable
[eqIndex
].xor1
[i
].value
== 0)
2975 ADDR_ASSERT(m_equationTable
[eqIndex
].xor1
[i
].valid
== 1);
2977 if ((m_equationTable
[eqIndex
].xor1
[i
].channel
== 1) &&
2978 (m_equationTable
[eqIndex
].xor1
[i
].index
> yMax
))
2980 yMax
= m_equationTable
[eqIndex
].xor1
[i
].index
;
2985 const UINT_32 additionalAlign
= 1 << yMax
;
2987 if (additionalAlign
>= blkHeight
)
2989 *pAlignY
*= (additionalAlign
/ blkHeight
);
2991 const UINT_32 alignedHeight
= PowTwoAlign(pIn
->height
, additionalAlign
);
2993 if ((alignedHeight
>> yMax
) & 1)
2995 *pRightXor
= 1 << (yPos
- m_pipeInterleaveLog2
);
3001 ret
= ADDR_INVALIDPARAMS
;
3009 ************************************************************************************************************************
3010 * Gfx10Lib::HwlComputeSurfaceInfoTiled
3013 * Internal function to calculate alignment for tiled surface
3017 ************************************************************************************************************************
3019 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSurfaceInfoTiled(
3020 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
3021 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
3024 ADDR_E_RETURNCODE ret
;
3026 if (IsBlock256b(pIn
->swizzleMode
))
3028 ret
= ComputeSurfaceInfoMicroTiled(pIn
, pOut
);
3032 ret
= ComputeSurfaceInfoMacroTiled(pIn
, pOut
);
3039 ************************************************************************************************************************
3040 * Gfx10Lib::ComputeSurfaceInfoMicroTiled
3043 * Internal function to calculate alignment for micro tiled surface
3047 ************************************************************************************************************************
3049 ADDR_E_RETURNCODE
Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3050 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
3051 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
3054 ADDR_E_RETURNCODE ret
= ComputeBlockDimensionForSurf(&pOut
->blockWidth
,
3064 pOut
->mipChainPitch
= 0;
3065 pOut
->mipChainHeight
= 0;
3066 pOut
->mipChainSlice
= 0;
3067 pOut
->epitchIsHeight
= FALSE
;
3068 pOut
->mipChainInTail
= FALSE
;
3069 pOut
->firstMipIdInTail
= pIn
->numMipLevels
;
3071 const UINT_32 blockSize
= GetBlockSize(pIn
->swizzleMode
);
3073 pOut
->pitch
= PowTwoAlign(pIn
->width
, pOut
->blockWidth
);
3074 pOut
->height
= PowTwoAlign(pIn
->height
, pOut
->blockHeight
);
3075 pOut
->numSlices
= pIn
->numSlices
;
3076 pOut
->baseAlign
= blockSize
;
3078 if (pIn
->numMipLevels
> 1)
3080 const UINT_32 mip0Width
= pIn
->width
;
3081 const UINT_32 mip0Height
= pIn
->height
;
3082 UINT_64 mipSliceSize
= 0;
3084 for (INT_32 i
= static_cast<INT_32
>(pIn
->numMipLevels
) - 1; i
>= 0; i
--)
3086 UINT_32 mipWidth
, mipHeight
;
3088 GetMipSize(mip0Width
, mip0Height
, 1, i
, &mipWidth
, &mipHeight
);
3090 const UINT_32 mipActualWidth
= PowTwoAlign(mipWidth
, pOut
->blockWidth
);
3091 const UINT_32 mipActualHeight
= PowTwoAlign(mipHeight
, pOut
->blockHeight
);
3093 if (pOut
->pMipInfo
!= NULL
)
3095 pOut
->pMipInfo
[i
].pitch
= mipActualWidth
;
3096 pOut
->pMipInfo
[i
].height
= mipActualHeight
;
3097 pOut
->pMipInfo
[i
].depth
= 1;
3098 pOut
->pMipInfo
[i
].offset
= mipSliceSize
;
3099 pOut
->pMipInfo
[i
].mipTailOffset
= 0;
3100 pOut
->pMipInfo
[i
].macroBlockOffset
= mipSliceSize
;
3103 mipSliceSize
+= mipActualWidth
* mipActualHeight
* (pIn
->bpp
>> 3);
3106 pOut
->sliceSize
= mipSliceSize
;
3107 pOut
->surfSize
= mipSliceSize
* pOut
->numSlices
;
3111 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->pitch
) * pOut
->height
* (pIn
->bpp
>> 3);
3112 pOut
->surfSize
= pOut
->sliceSize
* pOut
->numSlices
;
3114 if (pOut
->pMipInfo
!= NULL
)
3116 pOut
->pMipInfo
[0].pitch
= pOut
->pitch
;
3117 pOut
->pMipInfo
[0].height
= pOut
->height
;
3118 pOut
->pMipInfo
[0].depth
= 1;
3119 pOut
->pMipInfo
[0].offset
= 0;
3120 pOut
->pMipInfo
[0].mipTailOffset
= 0;
3121 pOut
->pMipInfo
[0].macroBlockOffset
= 0;
3131 ************************************************************************************************************************
3132 * Gfx10Lib::ComputeSurfaceInfoMacroTiled
3135 * Internal function to calculate alignment for macro tiled surface
3139 ************************************************************************************************************************
3141 ADDR_E_RETURNCODE
Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3142 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
3143 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
3146 ADDR_E_RETURNCODE returnCode
= ComputeBlockDimensionForSurf(&pOut
->blockWidth
,
3154 if (returnCode
== ADDR_OK
)
3156 UINT_32 heightAlign
= pOut
->blockHeight
;
3158 if (pIn
->flags
.qbStereo
)
3160 UINT_32 rightXor
= 0;
3163 returnCode
= ComputeStereoInfo(pIn
, heightAlign
, &alignY
, &rightXor
);
3165 if (returnCode
== ADDR_OK
)
3167 pOut
->pStereoInfo
->rightSwizzle
= rightXor
;
3169 heightAlign
*= alignY
;
3173 if (returnCode
== ADDR_OK
)
3175 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3176 pOut
->mipChainPitch
= 0;
3177 pOut
->mipChainHeight
= 0;
3178 pOut
->mipChainSlice
= 0;
3179 pOut
->epitchIsHeight
= FALSE
;
3180 pOut
->mipChainInTail
= FALSE
;
3181 pOut
->firstMipIdInTail
= pIn
->numMipLevels
;
3183 const UINT_32 blockSizeLog2
= GetBlockSizeLog2(pIn
->swizzleMode
);
3184 const UINT_32 blockSize
= 1 << blockSizeLog2
;
3186 pOut
->pitch
= PowTwoAlign(pIn
->width
, pOut
->blockWidth
);
3187 pOut
->height
= PowTwoAlign(pIn
->height
, heightAlign
);
3188 pOut
->numSlices
= PowTwoAlign(pIn
->numSlices
, pOut
->blockSlices
);
3189 pOut
->baseAlign
= blockSize
;
3191 if (pIn
->numMipLevels
> 1)
3193 const Dim3d tailMaxDim
= GetMipTailDim(pIn
->resourceType
,
3198 const UINT_32 mip0Width
= pIn
->width
;
3199 const UINT_32 mip0Height
= pIn
->height
;
3200 const BOOL_32 isThin
= IsThin(pIn
->resourceType
, pIn
->swizzleMode
);
3201 const UINT_32 mip0Depth
= isThin
? 1 : pIn
->numSlices
;
3202 const UINT_32 maxMipsInTail
= GetMaxNumMipsInTail(blockSizeLog2
, isThin
);
3203 const UINT_32 index
= Log2(pIn
->bpp
>> 3);
3204 UINT_32 firstMipInTail
= pIn
->numMipLevels
;
3205 UINT_64 mipChainSliceSize
= 0;
3206 UINT_64 mipSize
[MaxMipLevels
];
3207 UINT_64 mipSliceSize
[MaxMipLevels
];
3209 Dim3d fixedTailMaxDim
= tailMaxDim
;
3211 if (m_settings
.dsMipmapHtileFix
&& IsZOrderSwizzle(pIn
->swizzleMode
) && (index
<= 1))
3213 fixedTailMaxDim
.w
/= Block256_2d
[index
].w
/ Block256_2d
[2].w
;
3214 fixedTailMaxDim
.h
/= Block256_2d
[index
].h
/ Block256_2d
[2].h
;
3217 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
3219 UINT_32 mipWidth
, mipHeight
, mipDepth
;
3221 GetMipSize(mip0Width
, mip0Height
, mip0Depth
, i
, &mipWidth
, &mipHeight
, &mipDepth
);
3223 if (IsInMipTail(fixedTailMaxDim
, maxMipsInTail
, mipWidth
, mipHeight
, pIn
->numMipLevels
- i
))
3226 mipChainSliceSize
+= blockSize
/ pOut
->blockSlices
;
3231 const UINT_32 pitch
= PowTwoAlign(mipWidth
, pOut
->blockWidth
);
3232 const UINT_32 height
= PowTwoAlign(mipHeight
, pOut
->blockHeight
);
3233 const UINT_32 depth
= PowTwoAlign(mipDepth
, pOut
->blockSlices
);
3234 const UINT_64 sliceSize
= static_cast<UINT_64
>(pitch
) * height
* (pIn
->bpp
>> 3);
3236 mipSize
[i
] = sliceSize
* depth
;
3237 mipSliceSize
[i
] = sliceSize
* pOut
->blockSlices
;
3238 mipChainSliceSize
+= sliceSize
;
3240 if (pOut
->pMipInfo
!= NULL
)
3242 pOut
->pMipInfo
[i
].pitch
= pitch
;
3243 pOut
->pMipInfo
[i
].height
= height
;
3244 pOut
->pMipInfo
[i
].depth
= depth
;
3249 pOut
->sliceSize
= mipChainSliceSize
;
3250 pOut
->surfSize
= mipChainSliceSize
* pOut
->numSlices
;
3251 pOut
->mipChainInTail
= (firstMipInTail
== 0) ? TRUE
: FALSE
;
3252 pOut
->firstMipIdInTail
= firstMipInTail
;
3254 if (pOut
->pMipInfo
!= NULL
)
3257 UINT_64 macroBlkOffset
= 0;
3258 UINT_32 tailMaxDepth
= 0;
3260 if (firstMipInTail
!= pIn
->numMipLevels
)
3262 UINT_32 mipWidth
, mipHeight
;
3264 GetMipSize(mip0Width
, mip0Height
, mip0Depth
, firstMipInTail
,
3265 &mipWidth
, &mipHeight
, &tailMaxDepth
);
3267 offset
= blockSize
* PowTwoAlign(tailMaxDepth
, pOut
->blockSlices
) / pOut
->blockSlices
;
3268 macroBlkOffset
= blockSize
;
3271 for (INT_32 i
= firstMipInTail
- 1; i
>= 0; i
--)
3273 pOut
->pMipInfo
[i
].offset
= offset
;
3274 pOut
->pMipInfo
[i
].macroBlockOffset
= macroBlkOffset
;
3275 pOut
->pMipInfo
[i
].mipTailOffset
= 0;
3277 offset
+= mipSize
[i
];
3278 macroBlkOffset
+= mipSliceSize
[i
];
3281 UINT_32 pitch
= tailMaxDim
.w
;
3282 UINT_32 height
= tailMaxDim
.h
;
3283 UINT_32 depth
= isThin
? 1 : PowTwoAlign(tailMaxDepth
, Block256_3d
[index
].d
);
3285 tailMaxDepth
= isThin
? 1 : (depth
/ Block256_3d
[index
].d
);
3287 for (UINT_32 i
= firstMipInTail
; i
< pIn
->numMipLevels
; i
++)
3289 const UINT_32 m
= maxMipsInTail
- 1 - (i
- firstMipInTail
);
3290 const UINT_32 mipOffset
= (m
> 6) ? (16 << m
) : (m
<< 8);
3292 pOut
->pMipInfo
[i
].offset
= mipOffset
* tailMaxDepth
;
3293 pOut
->pMipInfo
[i
].mipTailOffset
= mipOffset
;
3294 pOut
->pMipInfo
[i
].macroBlockOffset
= 0;
3296 pOut
->pMipInfo
[i
].pitch
= pitch
;
3297 pOut
->pMipInfo
[i
].height
= height
;
3298 pOut
->pMipInfo
[i
].depth
= depth
;
3300 UINT_32 mipX
= ((mipOffset
>> 9) & 1) |
3301 ((mipOffset
>> 10) & 2) |
3302 ((mipOffset
>> 11) & 4) |
3303 ((mipOffset
>> 12) & 8) |
3304 ((mipOffset
>> 13) & 16) |
3305 ((mipOffset
>> 14) & 32);
3306 UINT_32 mipY
= ((mipOffset
>> 8) & 1) |
3307 ((mipOffset
>> 9) & 2) |
3308 ((mipOffset
>> 10) & 4) |
3309 ((mipOffset
>> 11) & 8) |
3310 ((mipOffset
>> 12) & 16) |
3311 ((mipOffset
>> 13) & 32);
3313 if (blockSizeLog2
& 1)
3315 const UINT_32 temp
= mipX
;
3321 mipY
= (mipY
<< 1) | (mipX
& 1);
3328 pOut
->pMipInfo
[i
].mipTailCoordX
= mipX
* Block256_2d
[index
].w
;
3329 pOut
->pMipInfo
[i
].mipTailCoordY
= mipY
* Block256_2d
[index
].h
;
3330 pOut
->pMipInfo
[i
].mipTailCoordZ
= 0;
3332 pitch
= Max(pitch
>> 1, Block256_2d
[index
].w
);
3333 height
= Max(height
>> 1, Block256_2d
[index
].h
);
3338 pOut
->pMipInfo
[i
].mipTailCoordX
= mipX
* Block256_3d
[index
].w
;
3339 pOut
->pMipInfo
[i
].mipTailCoordY
= mipY
* Block256_3d
[index
].h
;
3340 pOut
->pMipInfo
[i
].mipTailCoordZ
= 0;
3342 pitch
= Max(pitch
>> 1, Block256_3d
[index
].w
);
3343 height
= Max(height
>> 1, Block256_3d
[index
].h
);
3344 depth
= PowTwoAlign(Max(depth
>> 1, 1u), Block256_3d
[index
].d
);
3351 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->pitch
) * pOut
->height
* (pIn
->bpp
>> 3) * pIn
->numFrags
;
3352 pOut
->surfSize
= pOut
->sliceSize
* pOut
->numSlices
;
3354 if (pOut
->pMipInfo
!= NULL
)
3356 pOut
->pMipInfo
[0].pitch
= pOut
->pitch
;
3357 pOut
->pMipInfo
[0].height
= pOut
->height
;
3358 pOut
->pMipInfo
[0].depth
= IsTex3d(pIn
->resourceType
)? pOut
->numSlices
: 1;
3359 pOut
->pMipInfo
[0].offset
= 0;
3360 pOut
->pMipInfo
[0].mipTailOffset
= 0;
3361 pOut
->pMipInfo
[0].macroBlockOffset
= 0;
3362 pOut
->pMipInfo
[0].mipTailCoordX
= 0;
3363 pOut
->pMipInfo
[0].mipTailCoordY
= 0;
3364 pOut
->pMipInfo
[0].mipTailCoordZ
= 0;
3374 ************************************************************************************************************************
3375 * Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3378 * Internal function to calculate address from coord for tiled swizzle surface
3382 ************************************************************************************************************************
3384 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3385 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
3386 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
3389 ADDR_E_RETURNCODE ret
;
3391 if (IsBlock256b(pIn
->swizzleMode
))
3393 ret
= ComputeSurfaceAddrFromCoordMicroTiled(pIn
, pOut
);
3397 ret
= ComputeSurfaceAddrFromCoordMacroTiled(pIn
, pOut
);
3404 ************************************************************************************************************************
3405 * Gfx10Lib::ComputeOffsetFromEquation
3408 * Compute offset from equation
3412 ************************************************************************************************************************
3414 UINT_32
Gfx10Lib::ComputeOffsetFromEquation(
3415 const ADDR_EQUATION
* pEq
, ///< Equation
3416 UINT_32 x
, ///< x coord in bytes
3417 UINT_32 y
, ///< y coord in pixel
3418 UINT_32 z
///< z coord in slice
3423 for (UINT_32 i
= 0; i
< pEq
->numBits
; i
++)
3427 if (pEq
->addr
[i
].valid
)
3429 if (pEq
->addr
[i
].channel
== 0)
3431 v
^= (x
>> pEq
->addr
[i
].index
) & 1;
3433 else if (pEq
->addr
[i
].channel
== 1)
3435 v
^= (y
>> pEq
->addr
[i
].index
) & 1;
3439 ADDR_ASSERT(pEq
->addr
[i
].channel
== 2);
3440 v
^= (z
>> pEq
->addr
[i
].index
) & 1;
3444 if (pEq
->xor1
[i
].valid
)
3446 if (pEq
->xor1
[i
].channel
== 0)
3448 v
^= (x
>> pEq
->xor1
[i
].index
) & 1;
3450 else if (pEq
->xor1
[i
].channel
== 1)
3452 v
^= (y
>> pEq
->xor1
[i
].index
) & 1;
3456 ADDR_ASSERT(pEq
->xor1
[i
].channel
== 2);
3457 v
^= (z
>> pEq
->xor1
[i
].index
) & 1;
3461 if (pEq
->xor2
[i
].valid
)
3463 if (pEq
->xor2
[i
].channel
== 0)
3465 v
^= (x
>> pEq
->xor2
[i
].index
) & 1;
3467 else if (pEq
->xor2
[i
].channel
== 1)
3469 v
^= (y
>> pEq
->xor2
[i
].index
) & 1;
3473 ADDR_ASSERT(pEq
->xor2
[i
].channel
== 2);
3474 v
^= (z
>> pEq
->xor2
[i
].index
) & 1;
3485 ************************************************************************************************************************
3486 * Gfx10Lib::ComputeOffsetFromSwizzlePattern
3489 * Compute offset from swizzle pattern
3493 ************************************************************************************************************************
3495 UINT_32
Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3496 const UINT_64
* pPattern
, ///< Swizzle pattern
3497 UINT_32 numBits
, ///< Number of bits in pattern
3498 UINT_32 x
, ///< x coord in pixel
3499 UINT_32 y
, ///< y coord in pixel
3500 UINT_32 z
, ///< z coord in slice
3501 UINT_32 s
///< sample id
3505 const ADDR_BIT_SETTING
* pSwizzlePattern
= reinterpret_cast<const ADDR_BIT_SETTING
*>(pPattern
);
3507 for (UINT_32 i
= 0; i
< numBits
; i
++)
3511 if (pSwizzlePattern
[i
].x
!= 0)
3513 UINT_16 mask
= pSwizzlePattern
[i
].x
;
3528 if (pSwizzlePattern
[i
].y
!= 0)
3530 UINT_16 mask
= pSwizzlePattern
[i
].y
;
3545 if (pSwizzlePattern
[i
].z
!= 0)
3547 UINT_16 mask
= pSwizzlePattern
[i
].z
;
3562 if (pSwizzlePattern
[i
].s
!= 0)
3564 UINT_16 mask
= pSwizzlePattern
[i
].s
;
3586 ************************************************************************************************************************
3587 * Gfx10Lib::GetSwizzlePatternInfo
3590 * Get swizzle pattern
3593 * Swizzle pattern information
3594 ************************************************************************************************************************
3596 const ADDR_SW_PATINFO
* Gfx10Lib::GetSwizzlePatternInfo(
3597 AddrSwizzleMode swizzleMode
, ///< Swizzle mode
3598 AddrResourceType resourceType
, ///< Resource type
3599 UINT_32 elemLog2
, ///< Element size in bytes log2
3600 UINT_32 numFrag
///< Number of fragment
3603 const UINT_32 index
= IsXor(swizzleMode
) ? (m_colorBaseIndex
+ elemLog2
) : elemLog2
;
3604 const ADDR_SW_PATINFO
* patInfo
= NULL
;
3605 const UINT_32 swizzleMask
= 1 << swizzleMode
;
3607 if (IsLinear(swizzleMode
) == FALSE
)
3609 if (IsBlockVariable(swizzleMode
))
3611 if (m_blockVarSizeLog2
!= 0)
3613 ADDR_ASSERT(m_settings
.supportRbPlus
);
3615 if (IsRtOptSwizzle(swizzleMode
))
3619 patInfo
= SW_VAR_R_X_1xaa_RBPLUS_PATINFO
;
3621 else if (numFrag
== 2)
3623 patInfo
= SW_VAR_R_X_2xaa_RBPLUS_PATINFO
;
3625 else if (numFrag
== 4)
3627 patInfo
= SW_VAR_R_X_4xaa_RBPLUS_PATINFO
;
3631 ADDR_ASSERT(numFrag
== 8);
3632 patInfo
= SW_VAR_R_X_8xaa_RBPLUS_PATINFO
;
3635 else if (IsZOrderSwizzle(swizzleMode
))
3639 patInfo
= SW_VAR_Z_X_1xaa_RBPLUS_PATINFO
;
3641 else if (numFrag
== 2)
3643 patInfo
= SW_VAR_Z_X_2xaa_RBPLUS_PATINFO
;
3645 else if (numFrag
== 4)
3647 patInfo
= SW_VAR_Z_X_4xaa_RBPLUS_PATINFO
;
3651 ADDR_ASSERT(numFrag
== 8);
3652 patInfo
= SW_VAR_Z_X_8xaa_RBPLUS_PATINFO
;
3657 else if (resourceType
== ADDR_RSRC_TEX_3D
)
3659 ADDR_ASSERT(numFrag
== 1);
3661 if ((swizzleMask
& Gfx10Rsrc3dSwModeMask
) != 0)
3663 if (IsRtOptSwizzle(swizzleMode
))
3665 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_1xaa_RBPLUS_PATINFO
: SW_64K_R_X_1xaa_PATINFO
;
3667 else if (IsZOrderSwizzle(swizzleMode
))
3669 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_1xaa_RBPLUS_PATINFO
: SW_64K_Z_X_1xaa_PATINFO
;
3671 else if (IsDisplaySwizzle(resourceType
, swizzleMode
))
3673 ADDR_ASSERT(swizzleMode
== ADDR_SW_64KB_D_X
);
3674 patInfo
= m_settings
.supportRbPlus
? SW_64K_D3_X_RBPLUS_PATINFO
: SW_64K_D3_X_PATINFO
;
3678 ADDR_ASSERT(IsStandardSwizzle(resourceType
, swizzleMode
));
3680 if (IsBlock4kb(swizzleMode
))
3682 if (swizzleMode
== ADDR_SW_4KB_S
)
3684 patInfo
= m_settings
.supportRbPlus
? SW_4K_S3_RBPLUS_PATINFO
: SW_4K_S3_PATINFO
;
3688 ADDR_ASSERT(swizzleMode
== ADDR_SW_4KB_S_X
);
3689 patInfo
= m_settings
.supportRbPlus
? SW_4K_S3_X_RBPLUS_PATINFO
: SW_4K_S3_X_PATINFO
;
3694 if (swizzleMode
== ADDR_SW_64KB_S
)
3696 patInfo
= m_settings
.supportRbPlus
? SW_64K_S3_RBPLUS_PATINFO
: SW_64K_S3_PATINFO
;
3698 else if (swizzleMode
== ADDR_SW_64KB_S_X
)
3700 patInfo
= m_settings
.supportRbPlus
? SW_64K_S3_X_RBPLUS_PATINFO
: SW_64K_S3_X_PATINFO
;
3704 ADDR_ASSERT(swizzleMode
== ADDR_SW_64KB_S_T
);
3705 patInfo
= m_settings
.supportRbPlus
? SW_64K_S3_T_RBPLUS_PATINFO
: SW_64K_S3_T_PATINFO
;
3713 if ((swizzleMask
& Gfx10Rsrc2dSwModeMask
) != 0)
3715 if (IsBlock256b(swizzleMode
))
3717 if (swizzleMode
== ADDR_SW_256B_S
)
3719 patInfo
= m_settings
.supportRbPlus
? SW_256_S_RBPLUS_PATINFO
: SW_256_S_PATINFO
;
3723 ADDR_ASSERT(swizzleMode
== ADDR_SW_256B_D
);
3724 patInfo
= m_settings
.supportRbPlus
? SW_256_D_RBPLUS_PATINFO
: SW_256_D_PATINFO
;
3727 else if (IsBlock4kb(swizzleMode
))
3729 if (IsStandardSwizzle(resourceType
, swizzleMode
))
3731 if (swizzleMode
== ADDR_SW_4KB_S
)
3733 patInfo
= m_settings
.supportRbPlus
? SW_4K_S_RBPLUS_PATINFO
: SW_4K_S_PATINFO
;
3737 ADDR_ASSERT(swizzleMode
== ADDR_SW_4KB_S_X
);
3738 patInfo
= m_settings
.supportRbPlus
? SW_4K_S_X_RBPLUS_PATINFO
: SW_4K_S_X_PATINFO
;
3743 if (swizzleMode
== ADDR_SW_4KB_D
)
3745 patInfo
= m_settings
.supportRbPlus
? SW_4K_D_RBPLUS_PATINFO
: SW_4K_D_PATINFO
;
3749 ADDR_ASSERT(swizzleMode
== ADDR_SW_4KB_D_X
);
3750 patInfo
= m_settings
.supportRbPlus
? SW_4K_D_X_RBPLUS_PATINFO
: SW_4K_D_X_PATINFO
;
3756 if (IsRtOptSwizzle(swizzleMode
))
3760 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_1xaa_RBPLUS_PATINFO
: SW_64K_R_X_1xaa_PATINFO
;
3762 else if (numFrag
== 2)
3764 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_2xaa_RBPLUS_PATINFO
: SW_64K_R_X_2xaa_PATINFO
;
3766 else if (numFrag
== 4)
3768 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_4xaa_RBPLUS_PATINFO
: SW_64K_R_X_4xaa_PATINFO
;
3772 ADDR_ASSERT(numFrag
== 8);
3773 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_8xaa_RBPLUS_PATINFO
: SW_64K_R_X_8xaa_PATINFO
;
3776 else if (IsZOrderSwizzle(swizzleMode
))
3780 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_1xaa_RBPLUS_PATINFO
: SW_64K_Z_X_1xaa_PATINFO
;
3782 else if (numFrag
== 2)
3784 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_2xaa_RBPLUS_PATINFO
: SW_64K_Z_X_2xaa_PATINFO
;
3786 else if (numFrag
== 4)
3788 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_4xaa_RBPLUS_PATINFO
: SW_64K_Z_X_4xaa_PATINFO
;
3792 ADDR_ASSERT(numFrag
== 8);
3793 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_8xaa_RBPLUS_PATINFO
: SW_64K_Z_X_8xaa_PATINFO
;
3796 else if (IsDisplaySwizzle(resourceType
, swizzleMode
))
3798 if (swizzleMode
== ADDR_SW_64KB_D
)
3800 patInfo
= m_settings
.supportRbPlus
? SW_64K_D_RBPLUS_PATINFO
: SW_64K_D_PATINFO
;
3802 else if (swizzleMode
== ADDR_SW_64KB_D_X
)
3804 patInfo
= m_settings
.supportRbPlus
? SW_64K_D_X_RBPLUS_PATINFO
: SW_64K_D_X_PATINFO
;
3808 ADDR_ASSERT(swizzleMode
== ADDR_SW_64KB_D_T
);
3809 patInfo
= m_settings
.supportRbPlus
? SW_64K_D_T_RBPLUS_PATINFO
: SW_64K_D_T_PATINFO
;
3814 if (swizzleMode
== ADDR_SW_64KB_S
)
3816 patInfo
= m_settings
.supportRbPlus
? SW_64K_S_RBPLUS_PATINFO
: SW_64K_S_PATINFO
;
3818 else if (swizzleMode
== ADDR_SW_64KB_S_X
)
3820 patInfo
= m_settings
.supportRbPlus
? SW_64K_S_X_RBPLUS_PATINFO
: SW_64K_S_X_PATINFO
;
3824 ADDR_ASSERT(swizzleMode
== ADDR_SW_64KB_S_T
);
3825 patInfo
= m_settings
.supportRbPlus
? SW_64K_S_T_RBPLUS_PATINFO
: SW_64K_S_T_PATINFO
;
3833 return (patInfo
!= NULL
) ? &patInfo
[index
] : NULL
;
3837 ************************************************************************************************************************
3838 * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
3841 * Internal function to calculate address from coord for micro tiled swizzle surface
3845 ************************************************************************************************************************
3847 ADDR_E_RETURNCODE
Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
3848 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
3849 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
3852 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {0};
3853 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut
= {0};
3854 ADDR2_MIP_INFO mipInfo
[MaxMipLevels
];
3856 localIn
.swizzleMode
= pIn
->swizzleMode
;
3857 localIn
.flags
= pIn
->flags
;
3858 localIn
.resourceType
= pIn
->resourceType
;
3859 localIn
.bpp
= pIn
->bpp
;
3860 localIn
.width
= Max(pIn
->unalignedWidth
, 1u);
3861 localIn
.height
= Max(pIn
->unalignedHeight
, 1u);
3862 localIn
.numSlices
= Max(pIn
->numSlices
, 1u);
3863 localIn
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
3864 localIn
.numSamples
= Max(pIn
->numSamples
, 1u);
3865 localIn
.numFrags
= Max(pIn
->numFrags
, 1u);
3866 localOut
.pMipInfo
= mipInfo
;
3868 ADDR_E_RETURNCODE ret
= ComputeSurfaceInfoMicroTiled(&localIn
, &localOut
);
3872 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
3873 const UINT_32 rsrcType
= static_cast<UINT_32
>(pIn
->resourceType
) - 1;
3874 const UINT_32 swMode
= static_cast<UINT_32
>(pIn
->swizzleMode
);
3875 const UINT_32 eqIndex
= m_equationLookupTable
[rsrcType
][swMode
][elemLog2
];
3877 if (eqIndex
!= ADDR_INVALID_EQUATION_INDEX
)
3879 const UINT_32 pb
= mipInfo
[pIn
->mipId
].pitch
/ localOut
.blockWidth
;
3880 const UINT_32 yb
= pIn
->y
/ localOut
.blockHeight
;
3881 const UINT_32 xb
= pIn
->x
/ localOut
.blockWidth
;
3882 const UINT_32 blockIndex
= yb
* pb
+ xb
;
3883 const UINT_32 blockSize
= 256;
3884 const UINT_32 blk256Offset
= ComputeOffsetFromEquation(&m_equationTable
[eqIndex
],
3888 pOut
->addr
= localOut
.sliceSize
* pIn
->slice
+
3889 mipInfo
[pIn
->mipId
].macroBlockOffset
+
3890 (blockIndex
* blockSize
) +
3895 ret
= ADDR_INVALIDPARAMS
;
3903 ************************************************************************************************************************
3904 * Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
3907 * Internal function to calculate address from coord for macro tiled swizzle surface
3911 ************************************************************************************************************************
3913 ADDR_E_RETURNCODE
Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
3914 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
3915 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
3918 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {0};
3919 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut
= {0};
3920 ADDR2_MIP_INFO mipInfo
[MaxMipLevels
];
3922 localIn
.swizzleMode
= pIn
->swizzleMode
;
3923 localIn
.flags
= pIn
->flags
;
3924 localIn
.resourceType
= pIn
->resourceType
;
3925 localIn
.bpp
= pIn
->bpp
;
3926 localIn
.width
= Max(pIn
->unalignedWidth
, 1u);
3927 localIn
.height
= Max(pIn
->unalignedHeight
, 1u);
3928 localIn
.numSlices
= Max(pIn
->numSlices
, 1u);
3929 localIn
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
3930 localIn
.numSamples
= Max(pIn
->numSamples
, 1u);
3931 localIn
.numFrags
= Max(pIn
->numFrags
, 1u);
3932 localOut
.pMipInfo
= mipInfo
;
3934 ADDR_E_RETURNCODE ret
= ComputeSurfaceInfoMacroTiled(&localIn
, &localOut
);
3938 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
3939 const UINT_32 blkSizeLog2
= GetBlockSizeLog2(pIn
->swizzleMode
);
3940 const UINT_32 blkMask
= (1 << blkSizeLog2
) - 1;
3941 const UINT_32 pipeMask
= (1 << m_pipesLog2
) - 1;
3942 const UINT_32 bankMask
= ((1 << GetBankXorBits(blkSizeLog2
)) - 1) << (m_pipesLog2
+ ColumnBits
);
3943 const UINT_32 pipeBankXor
= IsXor(pIn
->swizzleMode
) ?
3944 (((pIn
->pipeBankXor
& (pipeMask
| bankMask
)) << m_pipeInterleaveLog2
) & blkMask
) : 0;
3946 if (localIn
.numFrags
> 1)
3948 const ADDR_SW_PATINFO
* pPatInfo
= GetSwizzlePatternInfo(pIn
->swizzleMode
,
3953 if (pPatInfo
!= NULL
)
3955 const UINT_32 pb
= localOut
.pitch
/ localOut
.blockWidth
;
3956 const UINT_32 yb
= pIn
->y
/ localOut
.blockHeight
;
3957 const UINT_32 xb
= pIn
->x
/ localOut
.blockWidth
;
3958 const UINT_64 blkIdx
= yb
* pb
+ xb
;
3960 ADDR_BIT_SETTING fullSwizzlePattern
[20];
3961 GetSwizzlePatternFromPatternInfo(pPatInfo
, fullSwizzlePattern
);
3963 const UINT_32 blkOffset
=
3964 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64
*>(fullSwizzlePattern
),
3971 pOut
->addr
= (localOut
.sliceSize
* pIn
->slice
) +
3972 (blkIdx
<< blkSizeLog2
) +
3973 (blkOffset
^ pipeBankXor
);
3977 ret
= ADDR_INVALIDPARAMS
;
3982 const UINT_32 rsrcIdx
= (pIn
->resourceType
== ADDR_RSRC_TEX_3D
) ? 1 : 0;
3983 const UINT_32 swMode
= static_cast<UINT_32
>(pIn
->swizzleMode
);
3984 const UINT_32 eqIndex
= m_equationLookupTable
[rsrcIdx
][swMode
][elemLog2
];
3986 if (eqIndex
!= ADDR_INVALID_EQUATION_INDEX
)
3988 const BOOL_32 inTail
= (mipInfo
[pIn
->mipId
].mipTailOffset
!= 0) ? TRUE
: FALSE
;
3989 const BOOL_32 isThin
= IsThin(pIn
->resourceType
, pIn
->swizzleMode
);
3990 const UINT_64 sliceSize
= isThin
? localOut
.sliceSize
: (localOut
.sliceSize
* localOut
.blockSlices
);
3991 const UINT_32 sliceId
= isThin
? pIn
->slice
: (pIn
->slice
/ localOut
.blockSlices
);
3992 const UINT_32 x
= inTail
? (pIn
->x
+ mipInfo
[pIn
->mipId
].mipTailCoordX
) : pIn
->x
;
3993 const UINT_32 y
= inTail
? (pIn
->y
+ mipInfo
[pIn
->mipId
].mipTailCoordY
) : pIn
->y
;
3994 const UINT_32 z
= inTail
? (pIn
->slice
+ mipInfo
[pIn
->mipId
].mipTailCoordZ
) : pIn
->slice
;
3995 const UINT_32 pb
= mipInfo
[pIn
->mipId
].pitch
/ localOut
.blockWidth
;
3996 const UINT_32 yb
= pIn
->y
/ localOut
.blockHeight
;
3997 const UINT_32 xb
= pIn
->x
/ localOut
.blockWidth
;
3998 const UINT_64 blkIdx
= yb
* pb
+ xb
;
3999 const UINT_32 blkOffset
= ComputeOffsetFromEquation(&m_equationTable
[eqIndex
],
4003 pOut
->addr
= sliceSize
* sliceId
+
4004 mipInfo
[pIn
->mipId
].macroBlockOffset
+
4005 (blkIdx
<< blkSizeLog2
) +
4006 (blkOffset
^ pipeBankXor
);
4010 ret
= ADDR_INVALIDPARAMS
;
4019 ************************************************************************************************************************
4020 * Gfx10Lib::HwlComputeMaxBaseAlignments
4023 * Gets maximum alignments
4025 * maximum alignments
4026 ************************************************************************************************************************
4028 UINT_32
Gfx10Lib::HwlComputeMaxBaseAlignments() const
4030 return m_blockVarSizeLog2
? Max(Size64K
, 1u << m_blockVarSizeLog2
) : Size64K
;
4034 ************************************************************************************************************************
4035 * Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4038 * Gets maximum alignments for metadata
4040 * maximum alignments for metadata
4041 ************************************************************************************************************************
4043 UINT_32
Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4047 const AddrSwizzleMode ValidSwizzleModeForXmask
[] =
4050 m_blockVarSizeLog2
? ADDR_SW_VAR_Z_X
: ADDR_SW_64KB_Z_X
,
4053 UINT_32 maxBaseAlignHtile
= 0;
4054 UINT_32 maxBaseAlignCmask
= 0;
4056 for (UINT_32 swIdx
= 0; swIdx
< sizeof(ValidSwizzleModeForXmask
) / sizeof(ValidSwizzleModeForXmask
[0]); swIdx
++)
4058 for (UINT_32 bppLog2
= 0; bppLog2
< 3; bppLog2
++)
4060 for (UINT_32 numFragLog2
= 0; numFragLog2
< 4; numFragLog2
++)
4062 // Max base alignment for Htile
4063 const UINT_32 metaBlkSizeHtile
= GetMetaBlkSize(Gfx10DataDepthStencil
,
4065 ValidSwizzleModeForXmask
[swIdx
],
4071 maxBaseAlignHtile
= Max(maxBaseAlignHtile
, metaBlkSizeHtile
);
4075 // Max base alignment for Cmask
4076 const UINT_32 metaBlkSizeCmask
= GetMetaBlkSize(Gfx10DataFmask
,
4078 ValidSwizzleModeForXmask
[swIdx
],
4084 maxBaseAlignCmask
= Max(maxBaseAlignCmask
, metaBlkSizeCmask
);
4087 // Max base alignment for 2D Dcc
4088 const AddrSwizzleMode ValidSwizzleModeForDcc2D
[] =
4093 m_blockVarSizeLog2
? ADDR_SW_VAR_R_X
: ADDR_SW_64KB_R_X
,
4096 UINT_32 maxBaseAlignDcc2D
= 0;
4098 for (UINT_32 swIdx
= 0; swIdx
< sizeof(ValidSwizzleModeForDcc2D
) / sizeof(ValidSwizzleModeForDcc2D
[0]); swIdx
++)
4100 for (UINT_32 bppLog2
= 0; bppLog2
< MaxNumOfBpp
; bppLog2
++)
4102 for (UINT_32 numFragLog2
= 0; numFragLog2
< 4; numFragLog2
++)
4104 const UINT_32 metaBlkSize2D
= GetMetaBlkSize(Gfx10DataColor
,
4106 ValidSwizzleModeForDcc2D
[swIdx
],
4112 maxBaseAlignDcc2D
= Max(maxBaseAlignDcc2D
, metaBlkSize2D
);
4117 // Max base alignment for 3D Dcc
4118 const AddrSwizzleMode ValidSwizzleModeForDcc3D
[] =
4124 m_blockVarSizeLog2
? ADDR_SW_VAR_R_X
: ADDR_SW_64KB_R_X
,
4127 UINT_32 maxBaseAlignDcc3D
= 0;
4129 for (UINT_32 swIdx
= 0; swIdx
< sizeof(ValidSwizzleModeForDcc3D
) / sizeof(ValidSwizzleModeForDcc3D
[0]); swIdx
++)
4131 for (UINT_32 bppLog2
= 0; bppLog2
< MaxNumOfBpp
; bppLog2
++)
4133 const UINT_32 metaBlkSize3D
= GetMetaBlkSize(Gfx10DataColor
,
4135 ValidSwizzleModeForDcc3D
[swIdx
],
4141 maxBaseAlignDcc3D
= Max(maxBaseAlignDcc3D
, metaBlkSize3D
);
4145 return Max(Max(maxBaseAlignHtile
, maxBaseAlignCmask
), Max(maxBaseAlignDcc2D
, maxBaseAlignDcc3D
));
4149 ************************************************************************************************************************
4150 * Gfx10Lib::GetMetaElementSizeLog2
4153 * Gets meta data element size log2
4155 * Meta data element size log2
4156 ************************************************************************************************************************
4158 INT_32
Gfx10Lib::GetMetaElementSizeLog2(
4159 Gfx10DataType dataType
) ///< Data surface type
4161 INT_32 elemSizeLog2
= 0;
4163 if (dataType
== Gfx10DataColor
)
4167 else if (dataType
== Gfx10DataDepthStencil
)
4173 ADDR_ASSERT(dataType
== Gfx10DataFmask
);
4177 return elemSizeLog2
;
4181 ************************************************************************************************************************
4182 * Gfx10Lib::GetMetaCacheSizeLog2
4185 * Gets meta data cache line size log2
4187 * Meta data cache line size log2
4188 ************************************************************************************************************************
4190 INT_32
Gfx10Lib::GetMetaCacheSizeLog2(
4191 Gfx10DataType dataType
) ///< Data surface type
4193 INT_32 cacheSizeLog2
= 0;
4195 if (dataType
== Gfx10DataColor
)
4199 else if (dataType
== Gfx10DataDepthStencil
)
4205 ADDR_ASSERT(dataType
== Gfx10DataFmask
);
4208 return cacheSizeLog2
;
4212 ************************************************************************************************************************
4213 * Gfx10Lib::HwlComputeSurfaceInfoLinear
4216 * Internal function to calculate alignment for linear surface
4220 ************************************************************************************************************************
4222 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSurfaceInfoLinear(
4223 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
4224 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
4227 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
4229 if (IsTex1d(pIn
->resourceType
) && (pIn
->height
> 1))
4231 returnCode
= ADDR_INVALIDPARAMS
;
4235 const UINT_32 elementBytes
= pIn
->bpp
>> 3;
4236 const UINT_32 pitchAlign
= (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
) ? 1 : (256 / elementBytes
);
4237 const UINT_32 mipDepth
= (pIn
->resourceType
== ADDR_RSRC_TEX_3D
) ? pIn
->numSlices
: 1;
4238 UINT_32 pitch
= PowTwoAlign(pIn
->width
, pitchAlign
);
4239 UINT_32 actualHeight
= pIn
->height
;
4240 UINT_64 sliceSize
= 0;
4242 if (pIn
->numMipLevels
> 1)
4244 for (INT_32 i
= static_cast<INT_32
>(pIn
->numMipLevels
) - 1; i
>= 0; i
--)
4246 UINT_32 mipWidth
, mipHeight
;
4248 GetMipSize(pIn
->width
, pIn
->height
, 1, i
, &mipWidth
, &mipHeight
);
4250 const UINT_32 mipActualWidth
= PowTwoAlign(mipWidth
, pitchAlign
);
4252 if (pOut
->pMipInfo
!= NULL
)
4254 pOut
->pMipInfo
[i
].pitch
= mipActualWidth
;
4255 pOut
->pMipInfo
[i
].height
= mipHeight
;
4256 pOut
->pMipInfo
[i
].depth
= mipDepth
;
4257 pOut
->pMipInfo
[i
].offset
= sliceSize
;
4258 pOut
->pMipInfo
[i
].mipTailOffset
= 0;
4259 pOut
->pMipInfo
[i
].macroBlockOffset
= sliceSize
;
4262 sliceSize
+= static_cast<UINT_64
>(mipActualWidth
) * mipHeight
* elementBytes
;
4267 returnCode
= ApplyCustomizedPitchHeight(pIn
, elementBytes
, pitchAlign
, &pitch
, &actualHeight
);
4269 if (returnCode
== ADDR_OK
)
4271 sliceSize
= static_cast<UINT_64
>(pitch
) * actualHeight
* elementBytes
;
4273 if (pOut
->pMipInfo
!= NULL
)
4275 pOut
->pMipInfo
[0].pitch
= pitch
;
4276 pOut
->pMipInfo
[0].height
= actualHeight
;
4277 pOut
->pMipInfo
[0].depth
= mipDepth
;
4278 pOut
->pMipInfo
[0].offset
= 0;
4279 pOut
->pMipInfo
[0].mipTailOffset
= 0;
4280 pOut
->pMipInfo
[0].macroBlockOffset
= 0;
4285 if (returnCode
== ADDR_OK
)
4287 pOut
->pitch
= pitch
;
4288 pOut
->height
= actualHeight
;
4289 pOut
->numSlices
= pIn
->numSlices
;
4290 pOut
->sliceSize
= sliceSize
;
4291 pOut
->surfSize
= sliceSize
* pOut
->numSlices
;
4292 pOut
->baseAlign
= (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
) ? elementBytes
: 256;
4293 pOut
->blockWidth
= pitchAlign
;
4294 pOut
->blockHeight
= 1;
4295 pOut
->blockSlices
= 1;
4297 // Following members are useless on GFX10
4298 pOut
->mipChainPitch
= 0;
4299 pOut
->mipChainHeight
= 0;
4300 pOut
->mipChainSlice
= 0;
4301 pOut
->epitchIsHeight
= FALSE
;
4303 // Post calculation validate
4304 ADDR_ASSERT(pOut
->sliceSize
> 0);