2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
28 ************************************************************************************************************************
29 * @file gfx10addrlib.cpp
30 * @brief Contain the implementation for the Gfx10Lib class.
31 ************************************************************************************************************************
34 #include "gfx10addrlib.h"
35 #include "gfx10_gb_reg.h"
37 #include "amdgpu_asic_addr.h"
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
45 ************************************************************************************************************************
49 * Creates an Gfx10Lib object.
52 * Returns an Gfx10Lib object pointer.
53 ************************************************************************************************************************
55 Addr::Lib
* Gfx10HwlInit(const Client
* pClient
)
57 return V2::Gfx10Lib::CreateObj(pClient
);
63 ////////////////////////////////////////////////////////////////////////////////////////////////////
64 // Static Const Member
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
67 const SwizzleModeFlags
Gfx10Lib::SwizzleModeTable
[ADDR_SW_MAX_TYPE
] =
68 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
69 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
70 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
71 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
72 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
74 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
75 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
76 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
77 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
79 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
80 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
81 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
82 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
84 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
85 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
86 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
87 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
91 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
92 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
94 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
95 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_X
96 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_X
97 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
99 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
100 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
101 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
102 {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_64KB_R_X
104 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_Z_X
105 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
106 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
107 {0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_VAR_R_X
108 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
111 const Dim3d
Gfx10Lib::Block256_3d
[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
113 const Dim3d
Gfx10Lib::Block64K_Log2_3d
[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114 const Dim3d
Gfx10Lib::Block4K_Log2_3d
[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
117 ************************************************************************************************************************
123 ************************************************************************************************************************
125 Gfx10Lib::Gfx10Lib(const Client
* pClient
)
132 m_class
= AI_ADDRLIB
;
133 memset(&m_settings
, 0, sizeof(m_settings
));
134 memcpy(m_swizzleModeTable
, SwizzleModeTable
, sizeof(SwizzleModeTable
));
138 ************************************************************************************************************************
139 * Gfx10Lib::~Gfx10Lib
143 ************************************************************************************************************************
145 Gfx10Lib::~Gfx10Lib()
150 ************************************************************************************************************************
151 * Gfx10Lib::HwlComputeHtileInfo
154 * Interface function stub of AddrComputeHtilenfo
158 ************************************************************************************************************************
160 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeHtileInfo(
161 const ADDR2_COMPUTE_HTILE_INFO_INPUT
* pIn
, ///< [in] input structure
162 ADDR2_COMPUTE_HTILE_INFO_OUTPUT
* pOut
///< [out] output structure
165 ADDR_E_RETURNCODE ret
= ADDR_OK
;
167 if (((pIn
->swizzleMode
!= ADDR_SW_64KB_Z_X
) &&
168 ((pIn
->swizzleMode
!= ADDR_SW_VAR_Z_X
) || (m_blockVarSizeLog2
== 0))) ||
169 (pIn
->hTileFlags
.pipeAligned
!= TRUE
))
171 ret
= ADDR_INVALIDPARAMS
;
176 const UINT_32 metaBlkSize
= GetMetaBlkSize(Gfx10DataDepthStencil
,
184 pOut
->pitch
= PowTwoAlign(pIn
->unalignedWidth
, metaBlk
.w
);
185 pOut
->height
= PowTwoAlign(pIn
->unalignedHeight
, metaBlk
.h
);
186 pOut
->baseAlign
= Max(metaBlkSize
, 1u << (m_pipesLog2
+ 11u));
187 pOut
->metaBlkWidth
= metaBlk
.w
;
188 pOut
->metaBlkHeight
= metaBlk
.h
;
190 if (pIn
->numMipLevels
> 1)
192 ADDR_ASSERT(pIn
->firstMipIdInTail
<= pIn
->numMipLevels
);
194 UINT_32 offset
= (pIn
->firstMipIdInTail
== pIn
->numMipLevels
) ? 0 : metaBlkSize
;
196 for (INT_32 i
= static_cast<INT_32
>(pIn
->firstMipIdInTail
) - 1; i
>=0; i
--)
198 UINT_32 mipWidth
, mipHeight
;
200 GetMipSize(pIn
->unalignedWidth
, pIn
->unalignedHeight
, 1, i
, &mipWidth
, &mipHeight
);
202 mipWidth
= PowTwoAlign(mipWidth
, metaBlk
.w
);
203 mipHeight
= PowTwoAlign(mipHeight
, metaBlk
.h
);
205 const UINT_32 pitchInM
= mipWidth
/ metaBlk
.w
;
206 const UINT_32 heightInM
= mipHeight
/ metaBlk
.h
;
207 const UINT_32 mipSliceSize
= pitchInM
* heightInM
* metaBlkSize
;
209 if (pOut
->pMipInfo
!= NULL
)
211 pOut
->pMipInfo
[i
].inMiptail
= FALSE
;
212 pOut
->pMipInfo
[i
].offset
= offset
;
213 pOut
->pMipInfo
[i
].sliceSize
= mipSliceSize
;
216 offset
+= mipSliceSize
;
219 pOut
->sliceSize
= offset
;
220 pOut
->metaBlkNumPerSlice
= offset
/ metaBlkSize
;
221 pOut
->htileBytes
= pOut
->sliceSize
* pIn
->numSlices
;
223 if (pOut
->pMipInfo
!= NULL
)
225 for (UINT_32 i
= pIn
->firstMipIdInTail
; i
< pIn
->numMipLevels
; i
++)
227 pOut
->pMipInfo
[i
].inMiptail
= TRUE
;
228 pOut
->pMipInfo
[i
].offset
= 0;
229 pOut
->pMipInfo
[i
].sliceSize
= 0;
232 if (pIn
->firstMipIdInTail
!= pIn
->numMipLevels
)
234 pOut
->pMipInfo
[pIn
->firstMipIdInTail
].sliceSize
= metaBlkSize
;
240 const UINT_32 pitchInM
= pOut
->pitch
/ metaBlk
.w
;
241 const UINT_32 heightInM
= pOut
->height
/ metaBlk
.h
;
243 pOut
->metaBlkNumPerSlice
= pitchInM
* heightInM
;
244 pOut
->sliceSize
= pOut
->metaBlkNumPerSlice
* metaBlkSize
;
245 pOut
->htileBytes
= pOut
->sliceSize
* pIn
->numSlices
;
247 if (pOut
->pMipInfo
!= NULL
)
249 pOut
->pMipInfo
[0].inMiptail
= FALSE
;
250 pOut
->pMipInfo
[0].offset
= 0;
251 pOut
->pMipInfo
[0].sliceSize
= pOut
->sliceSize
;
260 ************************************************************************************************************************
261 * Gfx10Lib::HwlComputeCmaskInfo
264 * Interface function stub of AddrComputeCmaskInfo
268 ************************************************************************************************************************
270 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeCmaskInfo(
271 const ADDR2_COMPUTE_CMASK_INFO_INPUT
* pIn
, ///< [in] input structure
272 ADDR2_COMPUTE_CMASK_INFO_OUTPUT
* pOut
///< [out] output structure
275 ADDR_E_RETURNCODE ret
= ADDR_OK
;
277 if ((pIn
->resourceType
!= ADDR_RSRC_TEX_2D
) ||
278 (pIn
->cMaskFlags
.pipeAligned
!= TRUE
) ||
279 ((pIn
->swizzleMode
!= ADDR_SW_64KB_Z_X
) &&
280 ((pIn
->swizzleMode
!= ADDR_SW_VAR_Z_X
) || (m_blockVarSizeLog2
== 0))))
282 ret
= ADDR_INVALIDPARAMS
;
287 const UINT_32 metaBlkSize
= GetMetaBlkSize(Gfx10DataFmask
,
295 pOut
->pitch
= PowTwoAlign(pIn
->unalignedWidth
, metaBlk
.w
);
296 pOut
->height
= PowTwoAlign(pIn
->unalignedHeight
, metaBlk
.h
);
297 pOut
->baseAlign
= metaBlkSize
;
298 pOut
->metaBlkWidth
= metaBlk
.w
;
299 pOut
->metaBlkHeight
= metaBlk
.h
;
301 if (pIn
->numMipLevels
> 1)
303 ADDR_ASSERT(pIn
->firstMipIdInTail
<= pIn
->numMipLevels
);
305 UINT_32 metaBlkPerSlice
= (pIn
->firstMipIdInTail
== pIn
->numMipLevels
) ? 0 : 1;
307 for (INT_32 i
= static_cast<INT_32
>(pIn
->firstMipIdInTail
) - 1; i
>= 0; i
--)
309 UINT_32 mipWidth
, mipHeight
;
311 GetMipSize(pIn
->unalignedWidth
, pIn
->unalignedHeight
, 1, i
, &mipWidth
, &mipHeight
);
313 mipWidth
= PowTwoAlign(mipWidth
, metaBlk
.w
);
314 mipHeight
= PowTwoAlign(mipHeight
, metaBlk
.h
);
316 const UINT_32 pitchInM
= mipWidth
/ metaBlk
.w
;
317 const UINT_32 heightInM
= mipHeight
/ metaBlk
.h
;
319 if (pOut
->pMipInfo
!= NULL
)
321 pOut
->pMipInfo
[i
].inMiptail
= FALSE
;
322 pOut
->pMipInfo
[i
].offset
= metaBlkPerSlice
* metaBlkSize
;
323 pOut
->pMipInfo
[i
].sliceSize
= pitchInM
* heightInM
* metaBlkSize
;
326 metaBlkPerSlice
+= pitchInM
* heightInM
;
329 pOut
->metaBlkNumPerSlice
= metaBlkPerSlice
;
331 if (pOut
->pMipInfo
!= NULL
)
333 for (UINT_32 i
= pIn
->firstMipIdInTail
; i
< pIn
->numMipLevels
; i
++)
335 pOut
->pMipInfo
[i
].inMiptail
= TRUE
;
336 pOut
->pMipInfo
[i
].offset
= 0;
337 pOut
->pMipInfo
[i
].sliceSize
= 0;
340 if (pIn
->firstMipIdInTail
!= pIn
->numMipLevels
)
342 pOut
->pMipInfo
[pIn
->firstMipIdInTail
].sliceSize
= metaBlkSize
;
348 const UINT_32 pitchInM
= pOut
->pitch
/ metaBlk
.w
;
349 const UINT_32 heightInM
= pOut
->height
/ metaBlk
.h
;
351 pOut
->metaBlkNumPerSlice
= pitchInM
* heightInM
;
353 if (pOut
->pMipInfo
!= NULL
)
355 pOut
->pMipInfo
[0].inMiptail
= FALSE
;
356 pOut
->pMipInfo
[0].offset
= 0;
357 pOut
->pMipInfo
[0].sliceSize
= pOut
->metaBlkNumPerSlice
* metaBlkSize
;
361 pOut
->sliceSize
= pOut
->metaBlkNumPerSlice
* metaBlkSize
;
362 pOut
->cmaskBytes
= pOut
->sliceSize
* pIn
->numSlices
;
369 ************************************************************************************************************************
370 * Gfx10Lib::HwlComputeDccInfo
373 * Interface function to compute DCC key info
377 ************************************************************************************************************************
379 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeDccInfo(
380 const ADDR2_COMPUTE_DCCINFO_INPUT
* pIn
, ///< [in] input structure
381 ADDR2_COMPUTE_DCCINFO_OUTPUT
* pOut
///< [out] output structure
384 ADDR_E_RETURNCODE ret
= ADDR_OK
;
386 if (pIn
->swizzleMode
!= ADDR_SW_64KB_Z_X
&& pIn
->swizzleMode
!= ADDR_SW_64KB_R_X
)
388 // Hardware does not support DCC for this swizzle mode.
389 ret
= ADDR_INVALIDPARAMS
;
391 else if (m_settings
.dccUnsup3DSwDis
&& IsTex3d(pIn
->resourceType
) && IsDisplaySwizzle(pIn
->swizzleMode
))
393 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
394 ret
= ADDR_INVALIDPARAMS
;
398 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
399 ADDR_ASSERT(IsRtOptSwizzle(pIn
->swizzleMode
));
402 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
403 const UINT_32 numFragLog2
= Log2(pIn
->numFrags
);
404 const UINT_32 metaBlkSize
= GetMetaBlkSize(Gfx10DataColor
,
409 pIn
->dccKeyFlags
.pipeAligned
,
411 const BOOL_32 isThick
= IsThick(pIn
->resourceType
, pIn
->swizzleMode
);
413 pOut
->compressBlkWidth
= isThick
? Block256_3d
[elemLog2
].w
: Block256_2d
[elemLog2
].w
;
414 pOut
->compressBlkHeight
= isThick
? Block256_3d
[elemLog2
].h
: Block256_2d
[elemLog2
].h
;
415 pOut
->compressBlkDepth
= isThick
? Block256_3d
[elemLog2
].d
: 1;
417 pOut
->dccRamBaseAlign
= metaBlkSize
;
418 pOut
->metaBlkWidth
= metaBlk
.w
;
419 pOut
->metaBlkHeight
= metaBlk
.h
;
420 pOut
->metaBlkDepth
= metaBlk
.d
;
422 pOut
->pitch
= PowTwoAlign(pIn
->unalignedWidth
, metaBlk
.w
);
423 pOut
->height
= PowTwoAlign(pIn
->unalignedHeight
, metaBlk
.h
);
424 pOut
->depth
= PowTwoAlign(pIn
->numSlices
, metaBlk
.d
);
426 if (pIn
->numMipLevels
> 1)
428 ADDR_ASSERT(pIn
->firstMipIdInTail
<= pIn
->numMipLevels
);
430 UINT_32 offset
= (pIn
->firstMipIdInTail
== pIn
->numMipLevels
) ? 0 : metaBlkSize
;
432 for (INT_32 i
= static_cast<INT_32
>(pIn
->firstMipIdInTail
) - 1; i
>= 0; i
--)
434 UINT_32 mipWidth
, mipHeight
;
436 GetMipSize(pIn
->unalignedWidth
, pIn
->unalignedHeight
, 1, i
, &mipWidth
, &mipHeight
);
438 mipWidth
= PowTwoAlign(mipWidth
, metaBlk
.w
);
439 mipHeight
= PowTwoAlign(mipHeight
, metaBlk
.h
);
441 const UINT_32 pitchInM
= mipWidth
/ metaBlk
.w
;
442 const UINT_32 heightInM
= mipHeight
/ metaBlk
.h
;
443 const UINT_32 mipSliceSize
= pitchInM
* heightInM
* metaBlkSize
;
445 if (pOut
->pMipInfo
!= NULL
)
447 pOut
->pMipInfo
[i
].inMiptail
= FALSE
;
448 pOut
->pMipInfo
[i
].offset
= offset
;
449 pOut
->pMipInfo
[i
].sliceSize
= mipSliceSize
;
452 offset
+= mipSliceSize
;
455 pOut
->dccRamSliceSize
= offset
;
456 pOut
->metaBlkNumPerSlice
= offset
/ metaBlkSize
;
457 pOut
->dccRamSize
= pOut
->dccRamSliceSize
* (pOut
->depth
/ metaBlk
.d
);
459 if (pOut
->pMipInfo
!= NULL
)
461 for (UINT_32 i
= pIn
->firstMipIdInTail
; i
< pIn
->numMipLevels
; i
++)
463 pOut
->pMipInfo
[i
].inMiptail
= TRUE
;
464 pOut
->pMipInfo
[i
].offset
= 0;
465 pOut
->pMipInfo
[i
].sliceSize
= 0;
468 if (pIn
->firstMipIdInTail
!= pIn
->numMipLevels
)
470 pOut
->pMipInfo
[pIn
->firstMipIdInTail
].sliceSize
= metaBlkSize
;
476 const UINT_32 pitchInM
= pOut
->pitch
/ metaBlk
.w
;
477 const UINT_32 heightInM
= pOut
->height
/ metaBlk
.h
;
479 pOut
->metaBlkNumPerSlice
= pitchInM
* heightInM
;
480 pOut
->dccRamSliceSize
= pOut
->metaBlkNumPerSlice
* metaBlkSize
;
481 pOut
->dccRamSize
= pOut
->dccRamSliceSize
* (pOut
->depth
/ metaBlk
.d
);
483 if (pOut
->pMipInfo
!= NULL
)
485 pOut
->pMipInfo
[0].inMiptail
= FALSE
;
486 pOut
->pMipInfo
[0].offset
= 0;
487 pOut
->pMipInfo
[0].sliceSize
= pOut
->dccRamSliceSize
;
496 ************************************************************************************************************************
497 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
500 * Interface function stub of AddrComputeCmaskAddrFromCoord
504 ************************************************************************************************************************
506 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeCmaskAddrFromCoord(
507 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
508 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
510 // Only support pipe aligned CMask
511 ADDR_ASSERT(pIn
->cMaskFlags
.pipeAligned
== TRUE
);
513 ADDR2_COMPUTE_CMASK_INFO_INPUT input
= {};
514 input
.size
= sizeof(input
);
515 input
.cMaskFlags
= pIn
->cMaskFlags
;
516 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
517 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
518 input
.numSlices
= Max(pIn
->numSlices
, 1u);
519 input
.swizzleMode
= pIn
->swizzleMode
;
520 input
.resourceType
= pIn
->resourceType
;
522 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output
= {};
523 output
.size
= sizeof(output
);
525 ADDR_E_RETURNCODE returnCode
= ComputeCmaskInfo(&input
, &output
);
527 if (returnCode
== ADDR_OK
)
529 const UINT_32 fmaskBpp
= GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
);
530 const UINT_32 fmaskElemLog2
= Log2(fmaskBpp
>> 3);
531 const UINT_32 pipeMask
= (1 << m_pipesLog2
) - 1;
532 const UINT_32 index
= m_xmaskBaseIndex
+ fmaskElemLog2
;
533 const UINT_8
* patIdxTable
= (pIn
->swizzleMode
== ADDR_SW_VAR_Z_X
) ? CMASK_VAR_RBPLUS_PATIDX
:
534 (m_settings
.supportRbPlus
? CMASK_64K_RBPLUS_PATIDX
: CMASK_64K_PATIDX
);
536 const UINT_32 blkSizeLog2
= Log2(output
.metaBlkWidth
) + Log2(output
.metaBlkHeight
) - 7;
537 const UINT_32 blkMask
= (1 << blkSizeLog2
) - 1;
538 const UINT_32 blkOffset
= ComputeOffsetFromSwizzlePattern(CMASK_SW_PATTERN
[patIdxTable
[index
]],
539 blkSizeLog2
+ 1, // +1 for nibble offset
544 const UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
545 const UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
546 const UINT_32 pb
= output
.pitch
/ output
.metaBlkWidth
;
547 const UINT_32 blkIndex
= (yb
* pb
) + xb
;
548 const UINT_32 pipeXor
= ((pIn
->pipeXor
& pipeMask
) << m_pipeInterleaveLog2
) & blkMask
;
550 pOut
->addr
= (output
.sliceSize
* pIn
->slice
) +
551 (blkIndex
* (1 << blkSizeLog2
)) +
552 ((blkOffset
>> 1) ^ pipeXor
);
553 pOut
->bitPosition
= (blkOffset
& 1) << 2;
560 ************************************************************************************************************************
561 * Gfx10Lib::HwlComputeHtileAddrFromCoord
564 * Interface function stub of AddrComputeHtileAddrFromCoord
568 ************************************************************************************************************************
570 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeHtileAddrFromCoord(
571 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
572 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
574 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
576 if (pIn
->numMipLevels
> 1)
578 returnCode
= ADDR_NOTIMPLEMENTED
;
582 ADDR2_COMPUTE_HTILE_INFO_INPUT input
= {0};
583 input
.size
= sizeof(input
);
584 input
.hTileFlags
= pIn
->hTileFlags
;
585 input
.depthFlags
= pIn
->depthflags
;
586 input
.swizzleMode
= pIn
->swizzleMode
;
587 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
588 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
589 input
.numSlices
= Max(pIn
->numSlices
, 1u);
590 input
.numMipLevels
= 1;
592 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output
= {0};
593 output
.size
= sizeof(output
);
595 returnCode
= ComputeHtileInfo(&input
, &output
);
597 if (returnCode
== ADDR_OK
)
599 const UINT_32 numSampleLog2
= Log2(pIn
->numSamples
);
600 const UINT_32 pipeMask
= (1 << m_pipesLog2
) - 1;
601 const UINT_32 index
= m_xmaskBaseIndex
+ numSampleLog2
;
602 const UINT_8
* patIdxTable
= m_settings
.supportRbPlus
? HTILE_RBPLUS_PATIDX
: HTILE_PATIDX
;
604 const UINT_32 blkSizeLog2
= Log2(output
.metaBlkWidth
) + Log2(output
.metaBlkHeight
) - 4;
605 const UINT_32 blkMask
= (1 << blkSizeLog2
) - 1;
606 const UINT_32 blkOffset
= ComputeOffsetFromSwizzlePattern(HTILE_SW_PATTERN
[patIdxTable
[index
]],
607 blkSizeLog2
+ 1, // +1 for nibble offset
612 const UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
613 const UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
614 const UINT_32 pb
= output
.pitch
/ output
.metaBlkWidth
;
615 const UINT_32 blkIndex
= (yb
* pb
) + xb
;
616 const UINT_32 pipeXor
= ((pIn
->pipeXor
& pipeMask
) << m_pipeInterleaveLog2
) & blkMask
;
618 pOut
->addr
= (static_cast<UINT_64
>(output
.sliceSize
) * pIn
->slice
) +
619 (blkIndex
* (1 << blkSizeLog2
)) +
620 ((blkOffset
>> 1) ^ pipeXor
);
628 ************************************************************************************************************************
629 * Gfx10Lib::HwlComputeHtileCoordFromAddr
632 * Interface function stub of AddrComputeHtileCoordFromAddr
636 ************************************************************************************************************************
638 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeHtileCoordFromAddr(
639 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT
* pIn
, ///< [in] input structure
640 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
* pOut
) ///< [out] output structure
642 ADDR_NOT_IMPLEMENTED();
648 ************************************************************************************************************************
649 * Gfx10Lib::HwlComputeDccAddrFromCoord
652 * Interface function stub of AddrComputeDccAddrFromCoord
656 ************************************************************************************************************************
658 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeDccAddrFromCoord(
659 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
660 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
662 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
664 if ((pIn
->resourceType
!= ADDR_RSRC_TEX_2D
) ||
665 (pIn
->swizzleMode
!= ADDR_SW_64KB_R_X
) ||
666 (pIn
->dccKeyFlags
.linear
== TRUE
) ||
667 (pIn
->numFrags
> 1) ||
668 (pIn
->numMipLevels
> 1) ||
671 returnCode
= ADDR_NOTSUPPORTED
;
675 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
676 const UINT_32 numPipeLog2
= m_pipesLog2
;
677 const UINT_32 pipeMask
= (1 << numPipeLog2
) - 1;
678 UINT_32 index
= m_dccBaseIndex
+ elemLog2
;
679 const UINT_8
* patIdxTable
;
681 if (m_settings
.supportRbPlus
)
683 patIdxTable
= DCC_64K_R_X_RBPLUS_PATIDX
;
685 if (pIn
->dccKeyFlags
.pipeAligned
)
687 index
+= MaxNumOfBpp
;
689 if (m_numPkrLog2
< 2)
691 index
+= m_pipesLog2
* MaxNumOfBpp
;
695 // 4 groups for "m_numPkrLog2 < 2" case
696 index
+= 4 * MaxNumOfBpp
;
698 const UINT_32 dccPipePerPkr
= 3;
700 index
+= (m_numPkrLog2
- 2) * dccPipePerPkr
* MaxNumOfBpp
+
701 (m_pipesLog2
- m_numPkrLog2
) * MaxNumOfBpp
;
707 patIdxTable
= DCC_64K_R_X_PATIDX
;
709 if (pIn
->dccKeyFlags
.pipeAligned
)
711 index
+= (numPipeLog2
+ UnalignedDccType
) * MaxNumOfBpp
;
715 index
+= Min(numPipeLog2
, UnalignedDccType
- 1) * MaxNumOfBpp
;
719 const UINT_32 blkSizeLog2
= Log2(pIn
->metaBlkWidth
) + Log2(pIn
->metaBlkHeight
) + elemLog2
- 8;
720 const UINT_32 blkMask
= (1 << blkSizeLog2
) - 1;
721 const UINT_32 blkOffset
= ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN
[patIdxTable
[index
]],
722 blkSizeLog2
+ 1, // +1 for nibble offset
727 const UINT_32 xb
= pIn
->x
/ pIn
->metaBlkWidth
;
728 const UINT_32 yb
= pIn
->y
/ pIn
->metaBlkHeight
;
729 const UINT_32 pb
= pIn
->pitch
/ pIn
->metaBlkWidth
;
730 const UINT_32 blkIndex
= (yb
* pb
) + xb
;
731 const UINT_32 pipeXor
= ((pIn
->pipeXor
& pipeMask
) << m_pipeInterleaveLog2
) & blkMask
;
733 pOut
->addr
= (static_cast<UINT_64
>(pIn
->dccRamSliceSize
) * pIn
->slice
) +
734 (blkIndex
* (1 << blkSizeLog2
)) +
735 ((blkOffset
>> 1) ^ pipeXor
);
742 ************************************************************************************************************************
743 * Gfx10Lib::HwlInitGlobalParams
746 * Initializes global parameters
749 * TRUE if all settings are valid
751 ************************************************************************************************************************
753 BOOL_32
Gfx10Lib::HwlInitGlobalParams(
754 const ADDR_CREATE_INPUT
* pCreateIn
) ///< [in] create input
756 BOOL_32 valid
= TRUE
;
757 GB_ADDR_CONFIG_gfx10 gbAddrConfig
;
759 gbAddrConfig
.u32All
= pCreateIn
->regValue
.gbAddrConfig
;
761 // These values are copied from CModel code
762 switch (gbAddrConfig
.bits
.NUM_PIPES
)
764 case ADDR_CONFIG_1_PIPE
:
768 case ADDR_CONFIG_2_PIPE
:
772 case ADDR_CONFIG_4_PIPE
:
776 case ADDR_CONFIG_8_PIPE
:
780 case ADDR_CONFIG_16_PIPE
:
784 case ADDR_CONFIG_32_PIPE
:
788 case ADDR_CONFIG_64_PIPE
:
793 ADDR_ASSERT_ALWAYS();
798 switch (gbAddrConfig
.bits
.PIPE_INTERLEAVE_SIZE
)
800 case ADDR_CONFIG_PIPE_INTERLEAVE_256B
:
801 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_256B
;
802 m_pipeInterleaveLog2
= 8;
804 case ADDR_CONFIG_PIPE_INTERLEAVE_512B
:
805 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_512B
;
806 m_pipeInterleaveLog2
= 9;
808 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB
:
809 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_1KB
;
810 m_pipeInterleaveLog2
= 10;
812 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB
:
813 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_2KB
;
814 m_pipeInterleaveLog2
= 11;
817 ADDR_ASSERT_ALWAYS();
822 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
823 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
824 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
825 ADDR_ASSERT(m_pipeInterleaveBytes
== ADDR_PIPEINTERLEAVE_256B
);
827 switch (gbAddrConfig
.bits
.MAX_COMPRESSED_FRAGS
)
829 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS
:
831 m_maxCompFragLog2
= 0;
833 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS
:
835 m_maxCompFragLog2
= 1;
837 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS
:
839 m_maxCompFragLog2
= 2;
841 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS
:
843 m_maxCompFragLog2
= 3;
846 ADDR_ASSERT_ALWAYS();
852 // Skip unaligned case
853 m_xmaskBaseIndex
+= MaxNumOfAA
;
855 m_xmaskBaseIndex
+= m_pipesLog2
* MaxNumOfAA
;
856 m_colorBaseIndex
+= m_pipesLog2
* MaxNumOfBpp
;
858 if (m_settings
.supportRbPlus
)
860 m_numPkrLog2
= gbAddrConfig
.bits
.NUM_PKRS
;
861 m_numSaLog2
= (m_numPkrLog2
> 0) ? (m_numPkrLog2
- 1) : 0;
863 ADDR_ASSERT((m_numPkrLog2
<= m_pipesLog2
) && ((m_pipesLog2
- m_numPkrLog2
) <= 2));
865 ADDR_C_ASSERT(sizeof(HTILE_RBPLUS_PATIDX
) / sizeof(HTILE_RBPLUS_PATIDX
[0]) ==
866 sizeof(CMASK_64K_RBPLUS_PATIDX
) / sizeof(CMASK_64K_RBPLUS_PATIDX
[0]));
868 if (m_numPkrLog2
>= 2)
870 m_colorBaseIndex
+= (2 * m_numPkrLog2
- 2) * MaxNumOfBpp
;
871 m_xmaskBaseIndex
+= (m_numPkrLog2
- 1) * 3 * MaxNumOfAA
;
876 const UINT_32 numPipeType
= static_cast<UINT_32
>(ADDR_CONFIG_64_PIPE
) -
877 static_cast<UINT_32
>(ADDR_CONFIG_1_PIPE
) +
880 ADDR_C_ASSERT(sizeof(HTILE_PATIDX
) / sizeof(HTILE_PATIDX
[0]) == (numPipeType
+ 1) * MaxNumOfAA
);
882 ADDR_C_ASSERT(sizeof(HTILE_PATIDX
) / sizeof(HTILE_PATIDX
[0]) ==
883 sizeof(CMASK_64K_PATIDX
) / sizeof(CMASK_64K_PATIDX
[0]));
887 if (m_settings
.supportRbPlus
)
889 // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
890 // corresponding SW_64KB_* mode
891 m_blockVarSizeLog2
= m_pipesLog2
+ 14;
903 ************************************************************************************************************************
904 * Gfx10Lib::HwlConvertChipFamily
907 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
910 ************************************************************************************************************************
912 ChipFamily
Gfx10Lib::HwlConvertChipFamily(
913 UINT_32 chipFamily
, ///< [in] chip family defined in atiih.h
914 UINT_32 chipRevision
) ///< [in] chip revision defined in "asic_family"_id.h
916 ChipFamily family
= ADDR_CHIP_FAMILY_NAVI
;
918 m_settings
.dccUnsup3DSwDis
= 1;
923 m_settings
.isDcn2
= 1;
925 if (ASICREV_IS_SIENNA_CICHLID(chipRevision
))
927 m_settings
.supportRbPlus
= 1;
928 m_settings
.dccUnsup3DSwDis
= 0;
932 ADDR_ASSERT(!"Unknown chip family");
936 m_settings
.dsMipmapHtileFix
= 1;
938 if (ASICREV_IS_NAVI10_P(chipRevision
))
940 m_settings
.dsMipmapHtileFix
= 0;
943 m_configFlags
.use32bppFor422Fmt
= TRUE
;
949 ************************************************************************************************************************
950 * Gfx10Lib::GetBlk256SizeLog2
957 ************************************************************************************************************************
959 void Gfx10Lib::GetBlk256SizeLog2(
960 AddrResourceType resourceType
, ///< [in] Resource type
961 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
962 UINT_32 elemLog2
, ///< [in] element size log2
963 UINT_32 numSamplesLog2
, ///< [in] number of samples
964 Dim3d
* pBlock
///< [out] block size
967 if (IsThin(resourceType
, swizzleMode
))
969 UINT_32 blockBits
= 8 - elemLog2
;
971 if (IsZOrderSwizzle(swizzleMode
))
973 blockBits
-= numSamplesLog2
;
976 pBlock
->w
= (blockBits
>> 1) + (blockBits
& 1);
977 pBlock
->h
= (blockBits
>> 1);
982 ADDR_ASSERT(IsThick(resourceType
, swizzleMode
));
984 UINT_32 blockBits
= 8 - elemLog2
;
986 pBlock
->d
= (blockBits
/ 3) + (((blockBits
% 3) > 0) ? 1 : 0);
987 pBlock
->w
= (blockBits
/ 3) + (((blockBits
% 3) > 1) ? 1 : 0);
988 pBlock
->h
= (blockBits
/ 3);
993 ************************************************************************************************************************
994 * Gfx10Lib::GetCompressedBlockSizeLog2
997 * Get compress block size
1001 ************************************************************************************************************************
1003 void Gfx10Lib::GetCompressedBlockSizeLog2(
1004 Gfx10DataType dataType
, ///< [in] Data type
1005 AddrResourceType resourceType
, ///< [in] Resource type
1006 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
1007 UINT_32 elemLog2
, ///< [in] element size log2
1008 UINT_32 numSamplesLog2
, ///< [in] number of samples
1009 Dim3d
* pBlock
///< [out] block size
1012 if (dataType
== Gfx10DataColor
)
1014 GetBlk256SizeLog2(resourceType
, swizzleMode
, elemLog2
, numSamplesLog2
, pBlock
);
1018 ADDR_ASSERT((dataType
== Gfx10DataDepthStencil
) || (dataType
== Gfx10DataFmask
));
1026 ************************************************************************************************************************
1027 * Gfx10Lib::GetMetaOverlapLog2
1030 * Get meta block overlap
1034 ************************************************************************************************************************
1036 INT_32
Gfx10Lib::GetMetaOverlapLog2(
1037 Gfx10DataType dataType
, ///< [in] Data type
1038 AddrResourceType resourceType
, ///< [in] Resource type
1039 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
1040 UINT_32 elemLog2
, ///< [in] element size log2
1041 UINT_32 numSamplesLog2
///< [in] number of samples
1047 GetCompressedBlockSizeLog2(dataType
, resourceType
, swizzleMode
, elemLog2
, numSamplesLog2
, &compBlock
);
1048 GetBlk256SizeLog2(resourceType
, swizzleMode
, elemLog2
, numSamplesLog2
, µBlock
);
1050 const INT_32 compSizeLog2
= compBlock
.w
+ compBlock
.h
+ compBlock
.d
;
1051 const INT_32 blk256SizeLog2
= microBlock
.w
+ microBlock
.h
+ microBlock
.d
;
1052 const INT_32 maxSizeLog2
= Max(compSizeLog2
, blk256SizeLog2
);
1053 const INT_32 numPipesLog2
= GetEffectiveNumPipes();
1054 INT_32 overlap
= numPipesLog2
- maxSizeLog2
;
1056 if ((numPipesLog2
> 1) && m_settings
.supportRbPlus
)
1061 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1062 if ((elemLog2
== 4) && (numSamplesLog2
== 3))
1066 overlap
= Max(overlap
, 0);
1071 ************************************************************************************************************************
1072 * Gfx10Lib::Get3DMetaOverlapLog2
1075 * Get 3d meta block overlap
1079 ************************************************************************************************************************
1081 INT_32
Gfx10Lib::Get3DMetaOverlapLog2(
1082 AddrResourceType resourceType
, ///< [in] Resource type
1083 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
1084 UINT_32 elemLog2
///< [in] element size log2
1088 GetBlk256SizeLog2(resourceType
, swizzleMode
, elemLog2
, 0, µBlock
);
1090 INT_32 overlap
= GetEffectiveNumPipes() - static_cast<INT_32
>(microBlock
.w
);
1092 if (m_settings
.supportRbPlus
)
1097 if ((overlap
< 0) || (IsStandardSwizzle(resourceType
, swizzleMode
) == TRUE
))
1105 ************************************************************************************************************************
1106 * Gfx10Lib::GetPipeRotateAmount
1109 * Get pipe rotate amount
1112 * Pipe rotate amount
1113 ************************************************************************************************************************
1116 INT_32
Gfx10Lib::GetPipeRotateAmount(
1117 AddrResourceType resourceType
, ///< [in] Resource type
1118 AddrSwizzleMode swizzleMode
///< [in] Swizzle mode
1123 if (m_settings
.supportRbPlus
&& (m_pipesLog2
>= (m_numSaLog2
+ 1)) && (m_pipesLog2
> 1))
1125 amount
= ((m_pipesLog2
== (m_numSaLog2
+ 1)) && IsRbAligned(resourceType
, swizzleMode
)) ?
1126 1 : m_pipesLog2
- (m_numSaLog2
+ 1);
1133 ************************************************************************************************************************
1134 * Gfx10Lib::GetMetaBlkSize
1137 * Get metadata block size
1141 ************************************************************************************************************************
1143 UINT_32
Gfx10Lib::GetMetaBlkSize(
1144 Gfx10DataType dataType
, ///< [in] Data type
1145 AddrResourceType resourceType
, ///< [in] Resource type
1146 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
1147 UINT_32 elemLog2
, ///< [in] element size log2
1148 UINT_32 numSamplesLog2
, ///< [in] number of samples
1149 BOOL_32 pipeAlign
, ///< [in] pipe align
1150 Dim3d
* pBlock
///< [out] block size
1153 INT_32 metablkSizeLog2
;
1154 const INT_32 metaElemSizeLog2
= GetMetaElementSizeLog2(dataType
);
1155 const INT_32 metaCacheSizeLog2
= GetMetaCacheSizeLog2(dataType
);
1156 const INT_32 compBlkSizeLog2
= (dataType
== Gfx10DataColor
) ? 8 : 6 + numSamplesLog2
+ elemLog2
;
1157 const INT_32 metaBlkSamplesLog2
= (dataType
== Gfx10DataDepthStencil
) ?
1158 numSamplesLog2
: Min(numSamplesLog2
, m_maxCompFragLog2
);
1159 const INT_32 dataBlkSizeLog2
= GetBlockSizeLog2(swizzleMode
);
1160 INT_32 numPipesLog2
= m_pipesLog2
;
1162 if (IsThin(resourceType
, swizzleMode
))
1164 if ((pipeAlign
== FALSE
) ||
1165 (IsStandardSwizzle(resourceType
, swizzleMode
) == TRUE
) ||
1166 (IsDisplaySwizzle(resourceType
, swizzleMode
) == TRUE
))
1170 metablkSizeLog2
= Max(static_cast<INT_32
>(m_pipeInterleaveLog2
) + numPipesLog2
, 12);
1171 metablkSizeLog2
= Min(metablkSizeLog2
, dataBlkSizeLog2
);
1175 metablkSizeLog2
= Min(dataBlkSizeLog2
, 12);
1180 if (m_settings
.supportRbPlus
&& (m_pipesLog2
== m_numSaLog2
+ 1) && (m_pipesLog2
> 1))
1185 INT_32 pipeRotateLog2
= GetPipeRotateAmount(resourceType
, swizzleMode
);
1187 if (numPipesLog2
>= 4)
1189 INT_32 overlapLog2
= GetMetaOverlapLog2(dataType
, resourceType
, swizzleMode
, elemLog2
, numSamplesLog2
);
1191 // In 16Bpe 8xaa, we have an extra overlap bit
1192 if ((pipeRotateLog2
> 0) &&
1194 (numSamplesLog2
== 3) &&
1195 (IsZOrderSwizzle(swizzleMode
) || (GetEffectiveNumPipes() > 3)))
1200 metablkSizeLog2
= metaCacheSizeLog2
+ overlapLog2
+ numPipesLog2
;
1201 metablkSizeLog2
= Max(metablkSizeLog2
, static_cast<INT_32
>(m_pipeInterleaveLog2
) + numPipesLog2
);
1203 if (m_settings
.supportRbPlus
&&
1204 IsRtOptSwizzle(swizzleMode
) &&
1205 (numPipesLog2
== 6) &&
1206 (numSamplesLog2
== 3) &&
1207 (m_maxCompFragLog2
== 3) &&
1208 (metablkSizeLog2
< 15))
1210 metablkSizeLog2
= 15;
1215 metablkSizeLog2
= Max(static_cast<INT_32
>(m_pipeInterleaveLog2
) + numPipesLog2
, 12);
1218 if (dataType
== Gfx10DataDepthStencil
)
1220 // For htile surfaces, pad meta block size to 2K * num_pipes
1221 metablkSizeLog2
= Max(metablkSizeLog2
, 11 + numPipesLog2
);
1224 const INT_32 compFragLog2
= Min(m_maxCompFragLog2
, numSamplesLog2
);
1226 if (IsRtOptSwizzle(swizzleMode
) && (compFragLog2
> 1) && (pipeRotateLog2
>= 1))
1228 const INT_32 tmp
= 8 + m_pipesLog2
+ Max(pipeRotateLog2
, compFragLog2
- 1);
1230 metablkSizeLog2
= Max(metablkSizeLog2
, tmp
);
1234 const INT_32 metablkBitsLog2
=
1235 metablkSizeLog2
+ compBlkSizeLog2
- elemLog2
- metaBlkSamplesLog2
- metaElemSizeLog2
;
1236 pBlock
->w
= 1 << ((metablkBitsLog2
>> 1) + (metablkBitsLog2
& 1));
1237 pBlock
->h
= 1 << (metablkBitsLog2
>> 1);
1242 ADDR_ASSERT(IsThick(resourceType
, swizzleMode
));
1246 if (m_settings
.supportRbPlus
&&
1247 (m_pipesLog2
== m_numSaLog2
+ 1) &&
1248 (m_pipesLog2
> 1) &&
1249 IsRbAligned(resourceType
, swizzleMode
))
1254 const INT_32 overlapLog2
= Get3DMetaOverlapLog2(resourceType
, swizzleMode
, elemLog2
);
1256 metablkSizeLog2
= metaCacheSizeLog2
+ overlapLog2
+ numPipesLog2
;
1257 metablkSizeLog2
= Max(metablkSizeLog2
, static_cast<INT_32
>(m_pipeInterleaveLog2
) + numPipesLog2
);
1258 metablkSizeLog2
= Max(metablkSizeLog2
, 12);
1262 metablkSizeLog2
= 12;
1265 const INT_32 metablkBitsLog2
=
1266 metablkSizeLog2
+ compBlkSizeLog2
- elemLog2
- metaBlkSamplesLog2
- metaElemSizeLog2
;
1267 pBlock
->w
= 1 << ((metablkBitsLog2
/ 3) + (((metablkBitsLog2
% 3) > 0) ? 1 : 0));
1268 pBlock
->h
= 1 << ((metablkBitsLog2
/ 3) + (((metablkBitsLog2
% 3) > 1) ? 1 : 0));
1269 pBlock
->d
= 1 << (metablkBitsLog2
/ 3);
1272 return (1 << static_cast<UINT_32
>(metablkSizeLog2
));
1276 ************************************************************************************************************************
1277 * Gfx10Lib::ConvertSwizzlePatternToEquation
1280 * Convert swizzle pattern to equation.
1284 ************************************************************************************************************************
1286 VOID
Gfx10Lib::ConvertSwizzlePatternToEquation(
1287 UINT_32 elemLog2
, ///< [in] element bytes log2
1288 AddrResourceType rsrcType
, ///< [in] resource type
1289 AddrSwizzleMode swMode
, ///< [in] swizzle mode
1290 const ADDR_SW_PATINFO
* pPatInfo
, ///< [in] swizzle pattern infor
1291 ADDR_EQUATION
* pEquation
) ///< [out] equation converted from swizzle pattern
1294 ADDR_BIT_SETTING fullSwizzlePattern
[20];
1295 GetSwizzlePatternFromPatternInfo(pPatInfo
, fullSwizzlePattern
);
1297 const ADDR_BIT_SETTING
* pSwizzle
= fullSwizzlePattern
;
1298 const UINT_32 blockSizeLog2
= GetBlockSizeLog2(swMode
);
1300 pEquation
->numBits
= blockSizeLog2
;
1301 pEquation
->stackedDepthSlices
= FALSE
;
1303 for (UINT_32 i
= 0; i
< elemLog2
; i
++)
1305 pEquation
->addr
[i
].channel
= 0;
1306 pEquation
->addr
[i
].valid
= 1;
1307 pEquation
->addr
[i
].index
= i
;
1310 if (IsXor(swMode
) == FALSE
)
1312 for (UINT_32 i
= elemLog2
; i
< blockSizeLog2
; i
++)
1314 ADDR_ASSERT(IsPow2(pSwizzle
[i
].value
));
1316 if (pSwizzle
[i
].x
!= 0)
1318 ADDR_ASSERT(IsPow2(static_cast<UINT_32
>(pSwizzle
[i
].x
)));
1320 pEquation
->addr
[i
].channel
= 0;
1321 pEquation
->addr
[i
].valid
= 1;
1322 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].x
) + elemLog2
;
1324 else if (pSwizzle
[i
].y
!= 0)
1326 ADDR_ASSERT(IsPow2(static_cast<UINT_32
>(pSwizzle
[i
].y
)));
1328 pEquation
->addr
[i
].channel
= 1;
1329 pEquation
->addr
[i
].valid
= 1;
1330 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].y
);
1334 ADDR_ASSERT(pSwizzle
[i
].z
!= 0);
1335 ADDR_ASSERT(IsPow2(static_cast<UINT_32
>(pSwizzle
[i
].z
)));
1337 pEquation
->addr
[i
].channel
= 2;
1338 pEquation
->addr
[i
].valid
= 1;
1339 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].z
);
1342 pEquation
->xor1
[i
].value
= 0;
1343 pEquation
->xor2
[i
].value
= 0;
1346 else if (IsThin(rsrcType
, swMode
))
1349 ComputeThinBlockDimension(&dim
.w
, &dim
.h
, &dim
.d
, 8u << elemLog2
, 0, rsrcType
, swMode
);
1351 const UINT_32 blkXLog2
= Log2(dim
.w
);
1352 const UINT_32 blkYLog2
= Log2(dim
.h
);
1353 const UINT_32 blkXMask
= dim
.w
- 1;
1354 const UINT_32 blkYMask
= dim
.h
- 1;
1356 ADDR_BIT_SETTING swizzle
[ADDR_MAX_EQUATION_BIT
];
1359 UINT_32 bMask
= (1 << elemLog2
) - 1;
1361 for (UINT_32 i
= elemLog2
; i
< blockSizeLog2
; i
++)
1363 if (IsPow2(pSwizzle
[i
].value
))
1365 if (pSwizzle
[i
].x
!= 0)
1367 ADDR_ASSERT((xMask
& pSwizzle
[i
].x
) == 0);
1368 xMask
|= pSwizzle
[i
].x
;
1370 const UINT_32 xLog2
= Log2(pSwizzle
[i
].x
);
1372 ADDR_ASSERT(xLog2
< blkXLog2
);
1374 pEquation
->addr
[i
].channel
= 0;
1375 pEquation
->addr
[i
].valid
= 1;
1376 pEquation
->addr
[i
].index
= xLog2
+ elemLog2
;
1380 ADDR_ASSERT(pSwizzle
[i
].y
!= 0);
1381 ADDR_ASSERT((yMask
& pSwizzle
[i
].y
) == 0);
1382 yMask
|= pSwizzle
[i
].y
;
1384 pEquation
->addr
[i
].channel
= 1;
1385 pEquation
->addr
[i
].valid
= 1;
1386 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].y
);
1388 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkYLog2
);
1391 swizzle
[i
].value
= 0;
1396 if (pSwizzle
[i
].z
!= 0)
1398 ADDR_ASSERT(IsPow2(static_cast<UINT_32
>(pSwizzle
[i
].z
)));
1400 pEquation
->xor2
[i
].channel
= 2;
1401 pEquation
->xor2
[i
].valid
= 1;
1402 pEquation
->xor2
[i
].index
= Log2(pSwizzle
[i
].z
);
1405 swizzle
[i
].x
= pSwizzle
[i
].x
;
1406 swizzle
[i
].y
= pSwizzle
[i
].y
;
1407 swizzle
[i
].z
= swizzle
[i
].s
= 0;
1409 ADDR_ASSERT(IsPow2(swizzle
[i
].value
) == FALSE
);
1411 const UINT_32 xHi
= swizzle
[i
].x
& (~blkXMask
);
1415 ADDR_ASSERT(IsPow2(xHi
));
1416 ADDR_ASSERT(pEquation
->xor1
[i
].value
== 0);
1418 pEquation
->xor1
[i
].channel
= 0;
1419 pEquation
->xor1
[i
].valid
= 1;
1420 pEquation
->xor1
[i
].index
= Log2(xHi
) + elemLog2
;
1422 swizzle
[i
].x
&= blkXMask
;
1425 const UINT_32 yHi
= swizzle
[i
].y
& (~blkYMask
);
1429 ADDR_ASSERT(IsPow2(yHi
));
1433 ADDR_ASSERT(pEquation
->xor1
[i
].value
== 0);
1434 pEquation
->xor1
[i
].channel
= 1;
1435 pEquation
->xor1
[i
].valid
= 1;
1436 pEquation
->xor1
[i
].index
= Log2(yHi
);
1440 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1441 pEquation
->xor2
[i
].channel
= 1;
1442 pEquation
->xor2
[i
].valid
= 1;
1443 pEquation
->xor2
[i
].index
= Log2(yHi
);
1446 swizzle
[i
].y
&= blkYMask
;
1449 if (swizzle
[i
].value
== 0)
1456 const UINT_32 pipeIntMask
= (1 << m_pipeInterleaveLog2
) - 1;
1457 const UINT_32 blockMask
= (1 << blockSizeLog2
) - 1;
1459 ADDR_ASSERT((bMask
& pipeIntMask
) == pipeIntMask
);
1461 while (bMask
!= blockMask
)
1463 for (UINT_32 i
= m_pipeInterleaveLog2
; i
< blockSizeLog2
; i
++)
1465 if ((bMask
& (1 << i
)) == 0)
1467 if (IsPow2(swizzle
[i
].value
))
1469 if (swizzle
[i
].x
!= 0)
1471 ADDR_ASSERT((xMask
& swizzle
[i
].x
) == 0);
1472 xMask
|= swizzle
[i
].x
;
1474 const UINT_32 xLog2
= Log2(swizzle
[i
].x
);
1476 ADDR_ASSERT(xLog2
< blkXLog2
);
1478 pEquation
->addr
[i
].channel
= 0;
1479 pEquation
->addr
[i
].valid
= 1;
1480 pEquation
->addr
[i
].index
= xLog2
+ elemLog2
;
1484 ADDR_ASSERT(swizzle
[i
].y
!= 0);
1485 ADDR_ASSERT((yMask
& swizzle
[i
].y
) == 0);
1486 yMask
|= swizzle
[i
].y
;
1488 pEquation
->addr
[i
].channel
= 1;
1489 pEquation
->addr
[i
].valid
= 1;
1490 pEquation
->addr
[i
].index
= Log2(swizzle
[i
].y
);
1492 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkYLog2
);
1495 swizzle
[i
].value
= 0;
1500 const UINT_32 x
= swizzle
[i
].x
& xMask
;
1501 const UINT_32 y
= swizzle
[i
].y
& yMask
;
1505 ADDR_ASSERT(IsPow2(x
));
1507 if (pEquation
->xor1
[i
].value
== 0)
1509 pEquation
->xor1
[i
].channel
= 0;
1510 pEquation
->xor1
[i
].valid
= 1;
1511 pEquation
->xor1
[i
].index
= Log2(x
) + elemLog2
;
1515 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1516 pEquation
->xor2
[i
].channel
= 0;
1517 pEquation
->xor2
[i
].valid
= 1;
1518 pEquation
->xor2
[i
].index
= Log2(x
) + elemLog2
;
1524 ADDR_ASSERT(IsPow2(y
));
1526 if (pEquation
->xor1
[i
].value
== 0)
1528 pEquation
->xor1
[i
].channel
= 1;
1529 pEquation
->xor1
[i
].valid
= 1;
1530 pEquation
->xor1
[i
].index
= Log2(y
);
1534 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1535 pEquation
->xor2
[i
].channel
= 1;
1536 pEquation
->xor2
[i
].valid
= 1;
1537 pEquation
->xor2
[i
].index
= Log2(y
);
1548 ADDR_ASSERT((xMask
== blkXMask
) && (yMask
== blkYMask
));
1552 const UINT_32 blkXLog2
= (blockSizeLog2
== 12) ? Block4K_Log2_3d
[elemLog2
].w
: Block64K_Log2_3d
[elemLog2
].w
;
1553 const UINT_32 blkYLog2
= (blockSizeLog2
== 12) ? Block4K_Log2_3d
[elemLog2
].h
: Block64K_Log2_3d
[elemLog2
].h
;
1554 const UINT_32 blkZLog2
= (blockSizeLog2
== 12) ? Block4K_Log2_3d
[elemLog2
].d
: Block64K_Log2_3d
[elemLog2
].d
;
1555 const UINT_32 blkXMask
= (1 << blkXLog2
) - 1;
1556 const UINT_32 blkYMask
= (1 << blkYLog2
) - 1;
1557 const UINT_32 blkZMask
= (1 << blkZLog2
) - 1;
1559 ADDR_BIT_SETTING swizzle
[ADDR_MAX_EQUATION_BIT
];
1563 UINT_32 bMask
= (1 << elemLog2
) - 1;
1565 for (UINT_32 i
= elemLog2
; i
< blockSizeLog2
; i
++)
1567 if (IsPow2(pSwizzle
[i
].value
))
1569 if (pSwizzle
[i
].x
!= 0)
1571 ADDR_ASSERT((xMask
& pSwizzle
[i
].x
) == 0);
1572 xMask
|= pSwizzle
[i
].x
;
1574 const UINT_32 xLog2
= Log2(pSwizzle
[i
].x
);
1576 ADDR_ASSERT(xLog2
< blkXLog2
);
1578 pEquation
->addr
[i
].channel
= 0;
1579 pEquation
->addr
[i
].valid
= 1;
1580 pEquation
->addr
[i
].index
= xLog2
+ elemLog2
;
1582 else if (pSwizzle
[i
].y
!= 0)
1584 ADDR_ASSERT((yMask
& pSwizzle
[i
].y
) == 0);
1585 yMask
|= pSwizzle
[i
].y
;
1587 pEquation
->addr
[i
].channel
= 1;
1588 pEquation
->addr
[i
].valid
= 1;
1589 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].y
);
1591 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkYLog2
);
1595 ADDR_ASSERT(pSwizzle
[i
].z
!= 0);
1596 ADDR_ASSERT((zMask
& pSwizzle
[i
].z
) == 0);
1597 zMask
|= pSwizzle
[i
].z
;
1599 pEquation
->addr
[i
].channel
= 2;
1600 pEquation
->addr
[i
].valid
= 1;
1601 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].z
);
1603 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkZLog2
);
1606 swizzle
[i
].value
= 0;
1611 swizzle
[i
].x
= pSwizzle
[i
].x
;
1612 swizzle
[i
].y
= pSwizzle
[i
].y
;
1613 swizzle
[i
].z
= pSwizzle
[i
].z
;
1616 ADDR_ASSERT(IsPow2(swizzle
[i
].value
) == FALSE
);
1618 const UINT_32 xHi
= swizzle
[i
].x
& (~blkXMask
);
1619 const UINT_32 yHi
= swizzle
[i
].y
& (~blkYMask
);
1620 const UINT_32 zHi
= swizzle
[i
].z
& (~blkZMask
);
1622 ADDR_ASSERT((xHi
== 0) || (yHi
== 0) || (zHi
== 0));
1626 ADDR_ASSERT(IsPow2(xHi
));
1627 ADDR_ASSERT(pEquation
->xor1
[i
].value
== 0);
1629 pEquation
->xor1
[i
].channel
= 0;
1630 pEquation
->xor1
[i
].valid
= 1;
1631 pEquation
->xor1
[i
].index
= Log2(xHi
) + elemLog2
;
1633 swizzle
[i
].x
&= blkXMask
;
1638 ADDR_ASSERT(IsPow2(yHi
));
1640 if (pEquation
->xor1
[i
].value
== 0)
1642 pEquation
->xor1
[i
].channel
= 1;
1643 pEquation
->xor1
[i
].valid
= 1;
1644 pEquation
->xor1
[i
].index
= Log2(yHi
);
1648 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1649 pEquation
->xor2
[i
].channel
= 1;
1650 pEquation
->xor2
[i
].valid
= 1;
1651 pEquation
->xor2
[i
].index
= Log2(yHi
);
1654 swizzle
[i
].y
&= blkYMask
;
1659 ADDR_ASSERT(IsPow2(zHi
));
1661 if (pEquation
->xor1
[i
].value
== 0)
1663 pEquation
->xor1
[i
].channel
= 2;
1664 pEquation
->xor1
[i
].valid
= 1;
1665 pEquation
->xor1
[i
].index
= Log2(zHi
);
1669 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1670 pEquation
->xor2
[i
].channel
= 2;
1671 pEquation
->xor2
[i
].valid
= 1;
1672 pEquation
->xor2
[i
].index
= Log2(zHi
);
1675 swizzle
[i
].z
&= blkZMask
;
1678 if (swizzle
[i
].value
== 0)
1685 const UINT_32 pipeIntMask
= (1 << m_pipeInterleaveLog2
) - 1;
1686 const UINT_32 blockMask
= (1 << blockSizeLog2
) - 1;
1688 ADDR_ASSERT((bMask
& pipeIntMask
) == pipeIntMask
);
1690 while (bMask
!= blockMask
)
1692 for (UINT_32 i
= m_pipeInterleaveLog2
; i
< blockSizeLog2
; i
++)
1694 if ((bMask
& (1 << i
)) == 0)
1696 if (IsPow2(swizzle
[i
].value
))
1698 if (swizzle
[i
].x
!= 0)
1700 ADDR_ASSERT((xMask
& swizzle
[i
].x
) == 0);
1701 xMask
|= swizzle
[i
].x
;
1703 const UINT_32 xLog2
= Log2(swizzle
[i
].x
);
1705 ADDR_ASSERT(xLog2
< blkXLog2
);
1707 pEquation
->addr
[i
].channel
= 0;
1708 pEquation
->addr
[i
].valid
= 1;
1709 pEquation
->addr
[i
].index
= xLog2
+ elemLog2
;
1711 else if (swizzle
[i
].y
!= 0)
1713 ADDR_ASSERT((yMask
& swizzle
[i
].y
) == 0);
1714 yMask
|= swizzle
[i
].y
;
1716 pEquation
->addr
[i
].channel
= 1;
1717 pEquation
->addr
[i
].valid
= 1;
1718 pEquation
->addr
[i
].index
= Log2(swizzle
[i
].y
);
1720 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkYLog2
);
1724 ADDR_ASSERT(swizzle
[i
].z
!= 0);
1725 ADDR_ASSERT((zMask
& swizzle
[i
].z
) == 0);
1726 zMask
|= swizzle
[i
].z
;
1728 pEquation
->addr
[i
].channel
= 2;
1729 pEquation
->addr
[i
].valid
= 1;
1730 pEquation
->addr
[i
].index
= Log2(swizzle
[i
].z
);
1732 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkZLog2
);
1735 swizzle
[i
].value
= 0;
1740 const UINT_32 x
= swizzle
[i
].x
& xMask
;
1741 const UINT_32 y
= swizzle
[i
].y
& yMask
;
1742 const UINT_32 z
= swizzle
[i
].z
& zMask
;
1746 ADDR_ASSERT(IsPow2(x
));
1748 if (pEquation
->xor1
[i
].value
== 0)
1750 pEquation
->xor1
[i
].channel
= 0;
1751 pEquation
->xor1
[i
].valid
= 1;
1752 pEquation
->xor1
[i
].index
= Log2(x
) + elemLog2
;
1756 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1757 pEquation
->xor2
[i
].channel
= 0;
1758 pEquation
->xor2
[i
].valid
= 1;
1759 pEquation
->xor2
[i
].index
= Log2(x
) + elemLog2
;
1765 ADDR_ASSERT(IsPow2(y
));
1767 if (pEquation
->xor1
[i
].value
== 0)
1769 pEquation
->xor1
[i
].channel
= 1;
1770 pEquation
->xor1
[i
].valid
= 1;
1771 pEquation
->xor1
[i
].index
= Log2(y
);
1775 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1776 pEquation
->xor2
[i
].channel
= 1;
1777 pEquation
->xor2
[i
].valid
= 1;
1778 pEquation
->xor2
[i
].index
= Log2(y
);
1784 ADDR_ASSERT(IsPow2(z
));
1786 if (pEquation
->xor1
[i
].value
== 0)
1788 pEquation
->xor1
[i
].channel
= 2;
1789 pEquation
->xor1
[i
].valid
= 1;
1790 pEquation
->xor1
[i
].index
= Log2(z
);
1794 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1795 pEquation
->xor2
[i
].channel
= 2;
1796 pEquation
->xor2
[i
].valid
= 1;
1797 pEquation
->xor2
[i
].index
= Log2(z
);
1809 ADDR_ASSERT((xMask
== blkXMask
) && (yMask
== blkYMask
) && (zMask
== blkZMask
));
1814 ************************************************************************************************************************
1815 * Gfx10Lib::InitEquationTable
1818 * Initialize Equation table.
1822 ************************************************************************************************************************
1824 VOID
Gfx10Lib::InitEquationTable()
1826 memset(m_equationTable
, 0, sizeof(m_equationTable
));
1828 for (UINT_32 rsrcTypeIdx
= 0; rsrcTypeIdx
< MaxRsrcType
; rsrcTypeIdx
++)
1830 const AddrResourceType rsrcType
= static_cast<AddrResourceType
>(rsrcTypeIdx
+ ADDR_RSRC_TEX_2D
);
1832 for (UINT_32 swModeIdx
= 0; swModeIdx
< MaxSwModeType
; swModeIdx
++)
1834 const AddrSwizzleMode swMode
= static_cast<AddrSwizzleMode
>(swModeIdx
);
1836 for (UINT_32 elemLog2
= 0; elemLog2
< MaxElementBytesLog2
; elemLog2
++)
1838 UINT_32 equationIndex
= ADDR_INVALID_EQUATION_INDEX
;
1839 const ADDR_SW_PATINFO
* pPatInfo
= GetSwizzlePatternInfo(swMode
, rsrcType
, elemLog2
, 1);
1841 if (pPatInfo
!= NULL
)
1843 ADDR_ASSERT(IsValidSwMode(swMode
));
1845 if (pPatInfo
->maxItemCount
<= 3)
1847 ADDR_EQUATION equation
= {};
1849 ConvertSwizzlePatternToEquation(elemLog2
, rsrcType
, swMode
, pPatInfo
, &equation
);
1851 equationIndex
= m_numEquations
;
1852 ADDR_ASSERT(equationIndex
< EquationTableSize
);
1854 m_equationTable
[equationIndex
] = equation
;
1860 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
1861 ADDR_ASSERT((elemLog2
== 3) || (elemLog2
== 4));
1862 ADDR_ASSERT(rsrcTypeIdx
== 1);
1863 ADDR_ASSERT(swMode
== ADDR_SW_64KB_D_X
);
1864 ADDR_ASSERT(m_settings
.supportRbPlus
== 1);
1868 m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][elemLog2
] = equationIndex
;
1875 ************************************************************************************************************************
1876 * Gfx10Lib::HwlGetEquationIndex
1879 * Interface function stub of GetEquationIndex
1883 ************************************************************************************************************************
1885 UINT_32
Gfx10Lib::HwlGetEquationIndex(
1886 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
1887 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
1890 UINT_32 equationIdx
= ADDR_INVALID_EQUATION_INDEX
;
1892 if ((pIn
->resourceType
== ADDR_RSRC_TEX_2D
) ||
1893 (pIn
->resourceType
== ADDR_RSRC_TEX_3D
))
1895 const UINT_32 rsrcTypeIdx
= static_cast<UINT_32
>(pIn
->resourceType
) - 1;
1896 const UINT_32 swModeIdx
= static_cast<UINT_32
>(pIn
->swizzleMode
);
1897 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
1899 equationIdx
= m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][elemLog2
];
1902 if (pOut
->pMipInfo
!= NULL
)
1904 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
1906 pOut
->pMipInfo
[i
].equationIndex
= equationIdx
;
1914 ************************************************************************************************************************
1915 * Gfx10Lib::IsValidDisplaySwizzleMode
1918 * Check if a swizzle mode is supported by display engine
1921 * TRUE is swizzle mode is supported by display engine
1922 ************************************************************************************************************************
1924 BOOL_32
Gfx10Lib::IsValidDisplaySwizzleMode(
1925 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
///< [in] input structure
1928 ADDR_ASSERT(pIn
->resourceType
== ADDR_RSRC_TEX_2D
);
1930 BOOL_32 support
= FALSE
;
1932 if (m_settings
.isDcn2
)
1934 switch (pIn
->swizzleMode
)
1937 case ADDR_SW_4KB_D_X
:
1938 case ADDR_SW_64KB_D
:
1939 case ADDR_SW_64KB_D_T
:
1940 case ADDR_SW_64KB_D_X
:
1941 support
= (pIn
->bpp
== 64);
1944 case ADDR_SW_LINEAR
:
1946 case ADDR_SW_4KB_S_X
:
1947 case ADDR_SW_64KB_S
:
1948 case ADDR_SW_64KB_S_T
:
1949 case ADDR_SW_64KB_S_X
:
1950 case ADDR_SW_64KB_R_X
:
1951 support
= (pIn
->bpp
<= 64);
1960 ADDR_NOT_IMPLEMENTED();
1967 ************************************************************************************************************************
1968 * Gfx10Lib::GetMaxNumMipsInTail
1971 * Return max number of mips in tails
1974 * Max number of mips in tails
1975 ************************************************************************************************************************
1977 UINT_32
Gfx10Lib::GetMaxNumMipsInTail(
1978 UINT_32 blockSizeLog2
, ///< block size log2
1979 BOOL_32 isThin
///< is thin or thick
1982 UINT_32 effectiveLog2
= blockSizeLog2
;
1984 if (isThin
== FALSE
)
1986 effectiveLog2
-= (blockSizeLog2
- 8) / 3;
1989 return (effectiveLog2
<= 11) ? (1 + (1 << (effectiveLog2
- 9))) : (effectiveLog2
- 4);
1993 ************************************************************************************************************************
1994 * Gfx10Lib::HwlComputePipeBankXor
1997 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2001 ************************************************************************************************************************
2003 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputePipeBankXor(
2004 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT
* pIn
, ///< [in] input structure
2005 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
* pOut
///< [out] output structure
2008 if (IsNonPrtXor(pIn
->swizzleMode
))
2010 const UINT_32 blockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
2011 const UINT_32 pipeBits
= GetPipeXorBits(blockBits
);
2012 const UINT_32 bankBits
= GetBankXorBits(blockBits
);
2014 UINT_32 pipeXor
= 0;
2015 UINT_32 bankXor
= 0;
2019 if (blockBits
== 16)
2021 const UINT_32 XorPatternLen
= 8;
2022 static const UINT_32 XorBank1b
[XorPatternLen
] = {0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80};
2023 static const UINT_32 XorBank2b
[XorPatternLen
] = {0x00, 0x80, 0x40, 0xC0, 0x80, 0x00, 0xC0, 0x40};
2024 static const UINT_32 XorBank3b
[XorPatternLen
] = {0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0};
2026 const UINT_32 index
= pIn
->surfIndex
% XorPatternLen
;
2030 bankXor
= XorBank1b
[index
];
2032 else if (bankBits
== 2)
2034 bankXor
= XorBank2b
[index
];
2038 bankXor
= XorBank3b
[index
];
2042 bankXor
>>= (2 - pipeBits
);
2048 pOut
->pipeBankXor
= bankXor
| pipeXor
;
2052 pOut
->pipeBankXor
= 0;
2059 ************************************************************************************************************************
2060 * Gfx10Lib::HwlComputeSlicePipeBankXor
2063 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2067 ************************************************************************************************************************
2069 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSlicePipeBankXor(
2070 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT
* pIn
, ///< [in] input structure
2071 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
* pOut
///< [out] output structure
2074 if (IsNonPrtXor(pIn
->swizzleMode
))
2076 const UINT_32 blockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
2077 const UINT_32 pipeBits
= GetPipeXorBits(blockBits
);
2078 const UINT_32 pipeXor
= ReverseBitVector(pIn
->slice
, pipeBits
);
2080 pOut
->pipeBankXor
= pIn
->basePipeBankXor
^ pipeXor
;
2084 pOut
->pipeBankXor
= 0;
2091 ************************************************************************************************************************
2092 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2095 * Compute sub resource offset to support swizzle pattern
2099 ************************************************************************************************************************
2101 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2102 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
* pIn
, ///< [in] input structure
2103 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
* pOut
///< [out] output structure
2106 ADDR_ASSERT(IsThin(pIn
->resourceType
, pIn
->swizzleMode
));
2108 pOut
->offset
= pIn
->slice
* pIn
->sliceSize
+ pIn
->macroBlockOffset
;
2114 ************************************************************************************************************************
2115 * Gfx10Lib::ValidateNonSwModeParams
2118 * Validate compute surface info params except swizzle mode
2121 * TRUE if parameters are valid, FALSE otherwise
2122 ************************************************************************************************************************
2124 BOOL_32
Gfx10Lib::ValidateNonSwModeParams(
2125 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
2127 BOOL_32 valid
= TRUE
;
2129 if ((pIn
->bpp
== 0) || (pIn
->bpp
> 128) || (pIn
->width
== 0) || (pIn
->numFrags
> 8) || (pIn
->numSamples
> 16))
2131 ADDR_ASSERT_ALWAYS();
2135 if (pIn
->resourceType
>= ADDR_RSRC_MAX_TYPE
)
2137 ADDR_ASSERT_ALWAYS();
2141 const ADDR2_SURFACE_FLAGS flags
= pIn
->flags
;
2142 const AddrResourceType rsrcType
= pIn
->resourceType
;
2143 const BOOL_32 mipmap
= (pIn
->numMipLevels
> 1);
2144 const BOOL_32 msaa
= (pIn
->numFrags
> 1);
2145 const BOOL_32 display
= flags
.display
;
2146 const BOOL_32 tex3d
= IsTex3d(rsrcType
);
2147 const BOOL_32 tex2d
= IsTex2d(rsrcType
);
2148 const BOOL_32 tex1d
= IsTex1d(rsrcType
);
2149 const BOOL_32 stereo
= flags
.qbStereo
;
2151 // Resource type check
2154 if (msaa
|| display
|| stereo
)
2156 ADDR_ASSERT_ALWAYS();
2162 if ((msaa
&& mipmap
) || (stereo
&& msaa
) || (stereo
&& mipmap
))
2164 ADDR_ASSERT_ALWAYS();
2170 if (msaa
|| display
|| stereo
)
2172 ADDR_ASSERT_ALWAYS();
2178 ADDR_ASSERT_ALWAYS();
2186 ************************************************************************************************************************
2187 * Gfx10Lib::ValidateSwModeParams
2190 * Validate compute surface info related to swizzle mode
2193 * TRUE if parameters are valid, FALSE otherwise
2194 ************************************************************************************************************************
2196 BOOL_32
Gfx10Lib::ValidateSwModeParams(
2197 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
2199 BOOL_32 valid
= TRUE
;
2201 if ((pIn
->swizzleMode
>= ADDR_SW_MAX_TYPE
) || (IsValidSwMode(pIn
->swizzleMode
) == FALSE
))
2203 ADDR_ASSERT_ALWAYS();
2207 const ADDR2_SURFACE_FLAGS flags
= pIn
->flags
;
2208 const AddrResourceType rsrcType
= pIn
->resourceType
;
2209 const AddrSwizzleMode swizzle
= pIn
->swizzleMode
;
2210 const BOOL_32 msaa
= (pIn
->numFrags
> 1);
2211 const BOOL_32 zbuffer
= flags
.depth
|| flags
.stencil
;
2212 const BOOL_32 color
= flags
.color
;
2213 const BOOL_32 display
= flags
.display
;
2214 const BOOL_32 tex3d
= IsTex3d(rsrcType
);
2215 const BOOL_32 tex2d
= IsTex2d(rsrcType
);
2216 const BOOL_32 tex1d
= IsTex1d(rsrcType
);
2217 const BOOL_32 thin3d
= flags
.view3dAs2dArray
;
2218 const BOOL_32 linear
= IsLinear(swizzle
);
2219 const BOOL_32 blk256B
= IsBlock256b(swizzle
);
2220 const BOOL_32 blkVar
= IsBlockVariable(swizzle
);
2221 const BOOL_32 isNonPrtXor
= IsNonPrtXor(swizzle
);
2222 const BOOL_32 prt
= flags
.prt
;
2223 const BOOL_32 fmask
= flags
.fmask
;
2226 if ((pIn
->numFrags
> 1) &&
2227 (GetBlockSize(swizzle
) < (m_pipeInterleaveBytes
* pIn
->numFrags
)))
2229 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2230 ADDR_ASSERT_ALWAYS();
2234 if (display
&& (IsValidDisplaySwizzleMode(pIn
) == FALSE
))
2236 ADDR_ASSERT_ALWAYS();
2240 if ((pIn
->bpp
== 96) && (linear
== FALSE
))
2242 ADDR_ASSERT_ALWAYS();
2246 const UINT_32 swizzleMask
= 1 << swizzle
;
2248 // Resource type check
2251 if ((swizzleMask
& Gfx10Rsrc1dSwModeMask
) == 0)
2253 ADDR_ASSERT_ALWAYS();
2259 if (((swizzleMask
& Gfx10Rsrc2dSwModeMask
) == 0) ||
2260 (prt
&& ((swizzleMask
& Gfx10Rsrc2dPrtSwModeMask
) == 0)) ||
2261 (fmask
&& ((swizzleMask
& Gfx10ZSwModeMask
) == 0)))
2263 ADDR_ASSERT_ALWAYS();
2269 if (((swizzleMask
& Gfx10Rsrc3dSwModeMask
) == 0) ||
2270 (prt
&& ((swizzleMask
& Gfx10Rsrc3dPrtSwModeMask
) == 0)) ||
2271 (thin3d
&& ((swizzleMask
& Gfx10Rsrc3dThinSwModeMask
) == 0)))
2273 ADDR_ASSERT_ALWAYS();
2278 // Swizzle type check
2281 if (zbuffer
|| msaa
|| (pIn
->bpp
== 0) || ((pIn
->bpp
% 8) != 0))
2283 ADDR_ASSERT_ALWAYS();
2287 else if (IsZOrderSwizzle(swizzle
))
2289 if ((pIn
->bpp
> 64) ||
2290 (msaa
&& (color
|| (pIn
->bpp
> 32))) ||
2291 ElemLib::IsBlockCompressed(pIn
->format
) ||
2292 ElemLib::IsMacroPixelPacked(pIn
->format
))
2294 ADDR_ASSERT_ALWAYS();
2298 else if (IsStandardSwizzle(rsrcType
, swizzle
))
2300 if (zbuffer
|| msaa
)
2302 ADDR_ASSERT_ALWAYS();
2306 else if (IsDisplaySwizzle(rsrcType
, swizzle
))
2308 if (zbuffer
|| msaa
)
2310 ADDR_ASSERT_ALWAYS();
2314 else if (IsRtOptSwizzle(swizzle
))
2318 ADDR_ASSERT_ALWAYS();
2324 ADDR_ASSERT_ALWAYS();
2331 if (zbuffer
|| tex3d
|| msaa
)
2333 ADDR_ASSERT_ALWAYS();
2339 if (m_blockVarSizeLog2
== 0)
2341 ADDR_ASSERT_ALWAYS();
2350 ************************************************************************************************************************
2351 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2354 * Compute surface info sanity check
2358 ************************************************************************************************************************
2360 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2361 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
///< [in] input structure
2364 return ValidateNonSwModeParams(pIn
) && ValidateSwModeParams(pIn
) ? ADDR_OK
: ADDR_INVALIDPARAMS
;
2368 ************************************************************************************************************************
2369 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2372 * Internal function to get suggested surface information for cliet to use
2376 ************************************************************************************************************************
2378 ADDR_E_RETURNCODE
Gfx10Lib::HwlGetPreferredSurfaceSetting(
2379 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
* pIn
, ///< [in] input structure
2380 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
* pOut
///< [out] output structure
2383 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
2385 if (pIn
->flags
.fmask
)
2387 const BOOL_32 forbid64KbBlockType
= pIn
->forbiddenBlock
.macroThin64KB
? TRUE
: FALSE
;
2388 const BOOL_32 forbidVarBlockType
= ((m_blockVarSizeLog2
== 0) || (pIn
->forbiddenBlock
.var
!= 0));
2390 if (forbid64KbBlockType
&& forbidVarBlockType
)
2392 // Invalid combination...
2393 ADDR_ASSERT_ALWAYS();
2394 returnCode
= ADDR_INVALIDPARAMS
;
2398 pOut
->resourceType
= ADDR_RSRC_TEX_2D
;
2399 pOut
->validBlockSet
.value
= 0;
2400 pOut
->validBlockSet
.macroThin64KB
= forbid64KbBlockType
? 0 : 1;
2401 pOut
->validBlockSet
.var
= forbidVarBlockType
? 0 : 1;
2402 pOut
->validSwModeSet
.value
= 0;
2403 pOut
->validSwModeSet
.sw64KB_Z_X
= forbid64KbBlockType
? 0 : 1;
2404 pOut
->validSwModeSet
.swVar_Z_X
= forbidVarBlockType
? 0 : 1;
2405 pOut
->canXor
= TRUE
;
2406 pOut
->validSwTypeSet
.value
= AddrSwSetZ
;
2407 pOut
->clientPreferredSwSet
= pOut
->validSwTypeSet
;
2409 BOOL_32 use64KbBlockType
= (forbid64KbBlockType
== FALSE
);
2411 if ((forbid64KbBlockType
== FALSE
) && (forbidVarBlockType
== FALSE
))
2413 const UINT_8 maxFmaskSwizzleModeType
= 2;
2414 const UINT_32 ratioLow
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 3 : 2);
2415 const UINT_32 ratioHi
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 2 : 1);
2416 const UINT_32 fmaskBpp
= GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
);
2417 const UINT_32 numSlices
= Max(pIn
->numSlices
, 1u);
2418 const UINT_32 width
= Max(pIn
->width
, 1u);
2419 const UINT_32 height
= Max(pIn
->height
, 1u);
2420 const UINT_64 sizeAlignInElement
= Max(NextPow2(pIn
->minSizeAlign
) / (fmaskBpp
>> 3), 1u);
2422 AddrSwizzleMode swMode
[maxFmaskSwizzleModeType
] = {ADDR_SW_64KB_Z_X
, ADDR_SW_VAR_Z_X
};
2423 Dim3d blkDim
[maxFmaskSwizzleModeType
] = {{0}, {0}};
2424 Dim3d padDim
[maxFmaskSwizzleModeType
] = {{0}, {0}};
2425 UINT_64 padSize
[maxFmaskSwizzleModeType
] = {0};
2427 for (UINT_8 i
= 0; i
< maxFmaskSwizzleModeType
; i
++)
2429 ComputeBlockDimensionForSurf(&blkDim
[i
].w
,
2437 padSize
[i
] = ComputePadSize(&blkDim
[i
], width
, height
, numSlices
, &padDim
[i
]);
2438 padSize
[i
] = PowTwoAlign(padSize
[i
], sizeAlignInElement
);
2441 if (GetBlockSizeLog2(swMode
[1]) >= GetBlockSizeLog2(swMode
[0]))
2443 if ((padSize
[1] * ratioHi
) <= (padSize
[0] * ratioLow
))
2445 use64KbBlockType
= FALSE
;
2450 if ((padSize
[1] * ratioLow
) < (padSize
[0] * ratioHi
))
2452 use64KbBlockType
= FALSE
;
2456 else if (forbidVarBlockType
)
2458 use64KbBlockType
= TRUE
;
2461 if (use64KbBlockType
)
2463 pOut
->swizzleMode
= ADDR_SW_64KB_Z_X
;
2467 pOut
->swizzleMode
= ADDR_SW_VAR_Z_X
;
2473 UINT_32 bpp
= pIn
->bpp
;
2474 UINT_32 width
= Max(pIn
->width
, 1u);
2475 UINT_32 height
= Max(pIn
->height
, 1u);
2477 // Set format to INVALID will skip this conversion
2478 if (pIn
->format
!= ADDR_FMT_INVALID
)
2480 ElemMode elemMode
= ADDR_UNCOMPRESSED
;
2481 UINT_32 expandX
, expandY
;
2483 // Get compression/expansion factors and element mode which indicates compression/expansion
2484 bpp
= GetElemLib()->GetBitsPerPixel(pIn
->format
,
2489 UINT_32 basePitch
= 0;
2490 GetElemLib()->AdjustSurfaceInfo(elemMode
,
2499 const UINT_32 numSlices
= Max(pIn
->numSlices
, 1u);
2500 const UINT_32 numMipLevels
= Max(pIn
->numMipLevels
, 1u);
2501 const UINT_32 numSamples
= Max(pIn
->numSamples
, 1u);
2502 const UINT_32 numFrags
= (pIn
->numFrags
== 0) ? numSamples
: pIn
->numFrags
;
2503 const BOOL_32 msaa
= (numFrags
> 1) || (numSamples
> 1);
2505 // Pre sanity check on non swizzle mode parameters
2506 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {};
2507 localIn
.flags
= pIn
->flags
;
2508 localIn
.resourceType
= pIn
->resourceType
;
2509 localIn
.format
= pIn
->format
;
2511 localIn
.width
= width
;
2512 localIn
.height
= height
;
2513 localIn
.numSlices
= numSlices
;
2514 localIn
.numMipLevels
= numMipLevels
;
2515 localIn
.numSamples
= numSamples
;
2516 localIn
.numFrags
= numFrags
;
2518 if (ValidateNonSwModeParams(&localIn
))
2520 // Forbid swizzle mode(s) by client setting
2521 ADDR2_SWMODE_SET allowedSwModeSet
= {};
2522 allowedSwModeSet
.value
|= pIn
->forbiddenBlock
.linear
? 0 : Gfx10LinearSwModeMask
;
2523 allowedSwModeSet
.value
|= pIn
->forbiddenBlock
.micro
? 0 : Gfx10Blk256BSwModeMask
;
2524 allowedSwModeSet
.value
|=
2525 pIn
->forbiddenBlock
.macroThin4KB
? 0 :
2526 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? 0 : Gfx10Blk4KBSwModeMask
);
2527 allowedSwModeSet
.value
|=
2528 pIn
->forbiddenBlock
.macroThick4KB
? 0 :
2529 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? Gfx10Rsrc3dThick4KBSwModeMask
: 0);
2530 allowedSwModeSet
.value
|=
2531 pIn
->forbiddenBlock
.macroThin64KB
? 0 :
2532 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? Gfx10Rsrc3dThin64KBSwModeMask
: Gfx10Blk64KBSwModeMask
);
2533 allowedSwModeSet
.value
|=
2534 pIn
->forbiddenBlock
.macroThick64KB
? 0 :
2535 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? Gfx10Rsrc3dThick64KBSwModeMask
: 0);
2536 allowedSwModeSet
.value
|=
2537 pIn
->forbiddenBlock
.var
? 0 : (m_blockVarSizeLog2
? Gfx10BlkVarSwModeMask
: 0);
2539 if (pIn
->preferredSwSet
.value
!= 0)
2541 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_Z
? ~0 : ~Gfx10ZSwModeMask
;
2542 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_S
? ~0 : ~Gfx10StandardSwModeMask
;
2543 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_D
? ~0 : ~Gfx10DisplaySwModeMask
;
2544 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_R
? ~0 : ~Gfx10RenderSwModeMask
;
2549 allowedSwModeSet
.value
&= ~Gfx10XorSwModeMask
;
2552 if (pIn
->maxAlign
> 0)
2554 if (pIn
->maxAlign
< (1u << m_blockVarSizeLog2
))
2556 allowedSwModeSet
.value
&= ~Gfx10BlkVarSwModeMask
;
2559 if (pIn
->maxAlign
< Size64K
)
2561 allowedSwModeSet
.value
&= ~Gfx10Blk64KBSwModeMask
;
2564 if (pIn
->maxAlign
< Size4K
)
2566 allowedSwModeSet
.value
&= ~Gfx10Blk4KBSwModeMask
;
2569 if (pIn
->maxAlign
< Size256
)
2571 allowedSwModeSet
.value
&= ~Gfx10Blk256BSwModeMask
;
2575 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2576 switch (pIn
->resourceType
)
2578 case ADDR_RSRC_TEX_1D
:
2579 allowedSwModeSet
.value
&= Gfx10Rsrc1dSwModeMask
;
2582 case ADDR_RSRC_TEX_2D
:
2583 allowedSwModeSet
.value
&= pIn
->flags
.prt
? Gfx10Rsrc2dPrtSwModeMask
: Gfx10Rsrc2dSwModeMask
;
2586 case ADDR_RSRC_TEX_3D
:
2587 allowedSwModeSet
.value
&= pIn
->flags
.prt
? Gfx10Rsrc3dPrtSwModeMask
: Gfx10Rsrc3dSwModeMask
;
2589 if (pIn
->flags
.view3dAs2dArray
)
2591 allowedSwModeSet
.value
&= Gfx10Rsrc3dThinSwModeMask
;
2596 ADDR_ASSERT_ALWAYS();
2597 allowedSwModeSet
.value
= 0;
2601 if (ElemLib::IsBlockCompressed(pIn
->format
) ||
2602 ElemLib::IsMacroPixelPacked(pIn
->format
) ||
2604 (msaa
&& ((bpp
> 32) || pIn
->flags
.color
|| pIn
->flags
.unordered
)))
2606 allowedSwModeSet
.value
&= ~Gfx10ZSwModeMask
;
2609 if (pIn
->format
== ADDR_FMT_32_32_32
)
2611 allowedSwModeSet
.value
&= Gfx10LinearSwModeMask
;
2616 allowedSwModeSet
.value
&= Gfx10MsaaSwModeMask
;
2619 if (pIn
->flags
.depth
|| pIn
->flags
.stencil
)
2621 allowedSwModeSet
.value
&= Gfx10ZSwModeMask
;
2624 if (pIn
->flags
.display
)
2626 if (m_settings
.isDcn2
)
2628 allowedSwModeSet
.value
&= (bpp
== 64) ? Dcn2Bpp64SwModeMask
: Dcn2NonBpp64SwModeMask
;
2632 ADDR_NOT_IMPLEMENTED();
2636 if (allowedSwModeSet
.value
!= 0)
2639 // Post sanity check, at least AddrLib should accept the output generated by its own
2640 UINT_32 validateSwModeSet
= allowedSwModeSet
.value
;
2642 for (UINT_32 i
= 0; validateSwModeSet
!= 0; i
++)
2644 if (validateSwModeSet
& 1)
2646 localIn
.swizzleMode
= static_cast<AddrSwizzleMode
>(i
);
2647 ADDR_ASSERT(ValidateSwModeParams(&localIn
));
2650 validateSwModeSet
>>= 1;
2654 pOut
->resourceType
= pIn
->resourceType
;
2655 pOut
->validSwModeSet
= allowedSwModeSet
;
2656 pOut
->canXor
= (allowedSwModeSet
.value
& Gfx10XorSwModeMask
) ? TRUE
: FALSE
;
2657 pOut
->validBlockSet
= GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
);
2658 pOut
->validSwTypeSet
= GetAllowedSwSet(allowedSwModeSet
);
2660 pOut
->clientPreferredSwSet
= pIn
->preferredSwSet
;
2662 if (pOut
->clientPreferredSwSet
.value
== 0)
2664 pOut
->clientPreferredSwSet
.value
= AddrSwSetAll
;
2667 // Apply optional restrictions
2668 if ((pIn
->flags
.depth
|| pIn
->flags
.stencil
) && msaa
&& m_configFlags
.nonPower2MemConfig
)
2670 if ((allowedSwModeSet
.value
&= ~Gfx10BlkVarSwModeMask
) != 0)
2672 // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
2673 // the GL2 in VAR mode, so it should be avoided.
2674 allowedSwModeSet
.value
&= ~Gfx10BlkVarSwModeMask
;
2678 // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
2679 // But we have to suffer from low performance because there is no other choice...
2680 ADDR_ASSERT_ALWAYS();
2684 if (pIn
->flags
.needEquation
)
2686 FilterInvalidEqSwizzleMode(allowedSwModeSet
, pIn
->resourceType
, Log2(bpp
>> 3));
2689 if (allowedSwModeSet
.value
== Gfx10LinearSwModeMask
)
2691 pOut
->swizzleMode
= ADDR_SW_LINEAR
;
2695 // Always ignore linear swizzle mode if there is other choice.
2696 allowedSwModeSet
.swLinear
= 0;
2698 ADDR2_BLOCK_SET allowedBlockSet
= GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
);
2700 // Determine block size if there is 2 or more block type candidates
2701 if (IsPow2(allowedBlockSet
.value
) == FALSE
)
2703 AddrSwizzleMode swMode
[AddrBlockMaxTiledType
] = { ADDR_SW_LINEAR
};
2705 if (m_blockVarSizeLog2
!= 0)
2707 swMode
[AddrBlockVar
] = ADDR_SW_VAR_R_X
;
2710 if (pOut
->resourceType
== ADDR_RSRC_TEX_3D
)
2712 swMode
[AddrBlockThick4KB
] = ADDR_SW_4KB_S
;
2713 swMode
[AddrBlockThin64KB
] = ADDR_SW_64KB_R_X
;
2714 swMode
[AddrBlockThick64KB
] = ADDR_SW_64KB_S
;
2718 swMode
[AddrBlockMicro
] = ADDR_SW_256B_S
;
2719 swMode
[AddrBlockThin4KB
] = ADDR_SW_4KB_S
;
2720 swMode
[AddrBlockThin64KB
] = ADDR_SW_64KB_S
;
2723 Dim3d blkDim
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}, {0}, {0}, {0}};
2724 Dim3d padDim
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}, {0}, {0}, {0}};
2725 UINT_64 padSize
[AddrBlockMaxTiledType
] = {0};
2727 const UINT_32 ratioLow
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 3 : 2);
2728 const UINT_32 ratioHi
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 2 : 1);
2729 const UINT_64 sizeAlignInElement
= Max(NextPow2(pIn
->minSizeAlign
) / (bpp
>> 3), 1u);
2730 UINT_32 minSizeBlk
= AddrBlockMicro
;
2731 UINT_64 minSize
= 0;
2733 for (UINT_32 i
= AddrBlockMicro
; i
< AddrBlockMaxTiledType
; i
++)
2735 if (allowedBlockSet
.value
& (1 << i
))
2737 ComputeBlockDimensionForSurf(&blkDim
[i
].w
,
2745 padSize
[i
] = ComputePadSize(&blkDim
[i
], width
, height
, numSlices
, &padDim
[i
]);
2746 padSize
[i
] = PowTwoAlign(padSize
[i
] * numFrags
, sizeAlignInElement
);
2750 minSize
= padSize
[i
];
2755 // Due to the fact that VAR block size = 16KB * m_pipes, it is possible that VAR
2756 // block size < 64KB. And ratio[Hi/Low] logic implicitly requires iterating from
2757 // smaller block type to bigger block type. So we have to correct comparing logic
2758 // according to the size of existing "minimun block" and size of coming/comparing
2759 // block. The new logic can also be useful to any future change about AddrBlockType.
2760 if (GetBlockSizeLog2(swMode
[i
]) >= GetBlockSizeLog2(swMode
[minSizeBlk
]))
2762 if ((padSize
[i
] * ratioHi
) <= (minSize
* ratioLow
))
2764 minSize
= padSize
[i
];
2770 if ((padSize
[i
] * ratioLow
) < (minSize
* ratioHi
))
2772 minSize
= padSize
[i
];
2780 if ((allowedBlockSet
.micro
== TRUE
) &&
2781 (width
<= blkDim
[AddrBlockMicro
].w
) &&
2782 (height
<= blkDim
[AddrBlockMicro
].h
))
2784 minSizeBlk
= AddrBlockMicro
;
2787 if (minSizeBlk
== AddrBlockMicro
)
2789 ADDR_ASSERT(pOut
->resourceType
!= ADDR_RSRC_TEX_3D
);
2790 allowedSwModeSet
.value
&= Gfx10Blk256BSwModeMask
;
2792 else if (minSizeBlk
== AddrBlockThick4KB
)
2794 ADDR_ASSERT(pOut
->resourceType
== ADDR_RSRC_TEX_3D
);
2795 allowedSwModeSet
.value
&= Gfx10Rsrc3dThick4KBSwModeMask
;
2797 else if (minSizeBlk
== AddrBlockThin4KB
)
2799 ADDR_ASSERT(pOut
->resourceType
!= ADDR_RSRC_TEX_3D
);
2800 allowedSwModeSet
.value
&= Gfx10Blk4KBSwModeMask
;
2802 else if (minSizeBlk
== AddrBlockThick64KB
)
2804 ADDR_ASSERT(pOut
->resourceType
== ADDR_RSRC_TEX_3D
);
2805 allowedSwModeSet
.value
&= Gfx10Rsrc3dThick64KBSwModeMask
;
2807 else if (minSizeBlk
== AddrBlockThin64KB
)
2809 allowedSwModeSet
.value
&= (pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ?
2810 Gfx10Rsrc3dThin64KBSwModeMask
: Gfx10Blk64KBSwModeMask
;
2814 ADDR_ASSERT(minSizeBlk
== AddrBlockVar
);
2815 allowedSwModeSet
.value
&= Gfx10BlkVarSwModeMask
;
2819 // Block type should be determined.
2820 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
).value
));
2822 ADDR2_SWTYPE_SET allowedSwSet
= GetAllowedSwSet(allowedSwModeSet
);
2824 // Determine swizzle type if there is 2 or more swizzle type candidates
2825 if (IsPow2(allowedSwSet
.value
) == FALSE
)
2827 if (ElemLib::IsBlockCompressed(pIn
->format
))
2829 if (allowedSwSet
.sw_D
)
2831 allowedSwModeSet
.value
&= Gfx10DisplaySwModeMask
;
2833 else if (allowedSwSet
.sw_S
)
2835 allowedSwModeSet
.value
&= Gfx10StandardSwModeMask
;
2839 ADDR_ASSERT(allowedSwSet
.sw_R
);
2840 allowedSwModeSet
.value
&= Gfx10RenderSwModeMask
;
2843 else if (ElemLib::IsMacroPixelPacked(pIn
->format
))
2845 if (allowedSwSet
.sw_S
)
2847 allowedSwModeSet
.value
&= Gfx10StandardSwModeMask
;
2849 else if (allowedSwSet
.sw_D
)
2851 allowedSwModeSet
.value
&= Gfx10DisplaySwModeMask
;
2855 ADDR_ASSERT(allowedSwSet
.sw_R
);
2856 allowedSwModeSet
.value
&= Gfx10RenderSwModeMask
;
2859 else if (pIn
->resourceType
== ADDR_RSRC_TEX_3D
)
2861 if (pIn
->flags
.color
&&
2862 GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
).macroThick64KB
&&
2865 allowedSwModeSet
.value
&= Gfx10DisplaySwModeMask
;
2867 else if (allowedSwSet
.sw_S
)
2869 allowedSwModeSet
.value
&= Gfx10StandardSwModeMask
;
2871 else if (allowedSwSet
.sw_R
)
2873 allowedSwModeSet
.value
&= Gfx10RenderSwModeMask
;
2877 ADDR_ASSERT(allowedSwSet
.sw_Z
);
2878 allowedSwModeSet
.value
&= Gfx10ZSwModeMask
;
2883 if (allowedSwSet
.sw_R
)
2885 allowedSwModeSet
.value
&= Gfx10RenderSwModeMask
;
2887 else if (allowedSwSet
.sw_D
)
2889 allowedSwModeSet
.value
&= Gfx10DisplaySwModeMask
;
2891 else if (allowedSwSet
.sw_S
)
2893 allowedSwModeSet
.value
&= Gfx10StandardSwModeMask
;
2897 ADDR_ASSERT(allowedSwSet
.sw_Z
);
2898 allowedSwModeSet
.value
&= Gfx10ZSwModeMask
;
2903 // Swizzle type should be determined.
2904 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet
).value
));
2906 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2907 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2908 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2909 pOut
->swizzleMode
= static_cast<AddrSwizzleMode
>(Log2NonPow2(allowedSwModeSet
.value
));
2914 // Invalid combination...
2915 ADDR_ASSERT_ALWAYS();
2916 returnCode
= ADDR_INVALIDPARAMS
;
2921 // Invalid combination...
2922 ADDR_ASSERT_ALWAYS();
2923 returnCode
= ADDR_INVALIDPARAMS
;
2931 ************************************************************************************************************************
2932 * Gfx10Lib::ComputeStereoInfo
2935 * Compute height alignment and right eye pipeBankXor for stereo surface
2940 ************************************************************************************************************************
2942 ADDR_E_RETURNCODE
Gfx10Lib::ComputeStereoInfo(
2943 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< Compute surface info
2944 UINT_32 blkHeight
, ///< Block height
2945 UINT_32
* pAlignY
, ///< Stereo requested additional alignment in Y
2946 UINT_32
* pRightXor
///< Right eye xor
2949 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2954 if (IsNonPrtXor(pIn
->swizzleMode
))
2956 const UINT_32 blkSizeLog2
= GetBlockSizeLog2(pIn
->swizzleMode
);
2957 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
2958 const UINT_32 rsrcType
= static_cast<UINT_32
>(pIn
->resourceType
) - 1;
2959 const UINT_32 swMode
= static_cast<UINT_32
>(pIn
->swizzleMode
);
2960 const UINT_32 eqIndex
= m_equationLookupTable
[rsrcType
][swMode
][elemLog2
];
2962 if (eqIndex
!= ADDR_INVALID_EQUATION_INDEX
)
2967 for (UINT_32 i
= m_pipeInterleaveLog2
; i
< blkSizeLog2
; i
++)
2969 if (m_equationTable
[eqIndex
].xor1
[i
].value
== 0)
2974 ADDR_ASSERT(m_equationTable
[eqIndex
].xor1
[i
].valid
== 1);
2976 if ((m_equationTable
[eqIndex
].xor1
[i
].channel
== 1) &&
2977 (m_equationTable
[eqIndex
].xor1
[i
].index
> yMax
))
2979 yMax
= m_equationTable
[eqIndex
].xor1
[i
].index
;
2984 const UINT_32 additionalAlign
= 1 << yMax
;
2986 if (additionalAlign
>= blkHeight
)
2988 *pAlignY
*= (additionalAlign
/ blkHeight
);
2990 const UINT_32 alignedHeight
= PowTwoAlign(pIn
->height
, additionalAlign
);
2992 if ((alignedHeight
>> yMax
) & 1)
2994 *pRightXor
= 1 << (yPos
- m_pipeInterleaveLog2
);
3000 ret
= ADDR_INVALIDPARAMS
;
3008 ************************************************************************************************************************
3009 * Gfx10Lib::HwlComputeSurfaceInfoTiled
3012 * Internal function to calculate alignment for tiled surface
3016 ************************************************************************************************************************
3018 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSurfaceInfoTiled(
3019 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
3020 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
3023 ADDR_E_RETURNCODE ret
;
3025 if (IsBlock256b(pIn
->swizzleMode
))
3027 ret
= ComputeSurfaceInfoMicroTiled(pIn
, pOut
);
3031 ret
= ComputeSurfaceInfoMacroTiled(pIn
, pOut
);
3038 ************************************************************************************************************************
3039 * Gfx10Lib::ComputeSurfaceInfoMicroTiled
3042 * Internal function to calculate alignment for micro tiled surface
3046 ************************************************************************************************************************
3048 ADDR_E_RETURNCODE
Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3049 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
3050 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
3053 ADDR_E_RETURNCODE ret
= ComputeBlockDimensionForSurf(&pOut
->blockWidth
,
3063 pOut
->mipChainPitch
= 0;
3064 pOut
->mipChainHeight
= 0;
3065 pOut
->mipChainSlice
= 0;
3066 pOut
->epitchIsHeight
= FALSE
;
3067 pOut
->mipChainInTail
= FALSE
;
3068 pOut
->firstMipIdInTail
= pIn
->numMipLevels
;
3070 const UINT_32 blockSize
= GetBlockSize(pIn
->swizzleMode
);
3072 pOut
->pitch
= PowTwoAlign(pIn
->width
, pOut
->blockWidth
);
3073 pOut
->height
= PowTwoAlign(pIn
->height
, pOut
->blockHeight
);
3074 pOut
->numSlices
= pIn
->numSlices
;
3075 pOut
->baseAlign
= blockSize
;
3077 if (pIn
->numMipLevels
> 1)
3079 const UINT_32 mip0Width
= pIn
->width
;
3080 const UINT_32 mip0Height
= pIn
->height
;
3081 UINT_64 mipSliceSize
= 0;
3083 for (INT_32 i
= static_cast<INT_32
>(pIn
->numMipLevels
) - 1; i
>= 0; i
--)
3085 UINT_32 mipWidth
, mipHeight
;
3087 GetMipSize(mip0Width
, mip0Height
, 1, i
, &mipWidth
, &mipHeight
);
3089 const UINT_32 mipActualWidth
= PowTwoAlign(mipWidth
, pOut
->blockWidth
);
3090 const UINT_32 mipActualHeight
= PowTwoAlign(mipHeight
, pOut
->blockHeight
);
3092 if (pOut
->pMipInfo
!= NULL
)
3094 pOut
->pMipInfo
[i
].pitch
= mipActualWidth
;
3095 pOut
->pMipInfo
[i
].height
= mipActualHeight
;
3096 pOut
->pMipInfo
[i
].depth
= 1;
3097 pOut
->pMipInfo
[i
].offset
= mipSliceSize
;
3098 pOut
->pMipInfo
[i
].mipTailOffset
= 0;
3099 pOut
->pMipInfo
[i
].macroBlockOffset
= mipSliceSize
;
3102 mipSliceSize
+= mipActualWidth
* mipActualHeight
* (pIn
->bpp
>> 3);
3105 pOut
->sliceSize
= mipSliceSize
;
3106 pOut
->surfSize
= mipSliceSize
* pOut
->numSlices
;
3110 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->pitch
) * pOut
->height
* (pIn
->bpp
>> 3);
3111 pOut
->surfSize
= pOut
->sliceSize
* pOut
->numSlices
;
3113 if (pOut
->pMipInfo
!= NULL
)
3115 pOut
->pMipInfo
[0].pitch
= pOut
->pitch
;
3116 pOut
->pMipInfo
[0].height
= pOut
->height
;
3117 pOut
->pMipInfo
[0].depth
= 1;
3118 pOut
->pMipInfo
[0].offset
= 0;
3119 pOut
->pMipInfo
[0].mipTailOffset
= 0;
3120 pOut
->pMipInfo
[0].macroBlockOffset
= 0;
3130 ************************************************************************************************************************
3131 * Gfx10Lib::ComputeSurfaceInfoMacroTiled
3134 * Internal function to calculate alignment for macro tiled surface
3138 ************************************************************************************************************************
3140 ADDR_E_RETURNCODE
Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3141 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
3142 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
3145 ADDR_E_RETURNCODE returnCode
= ComputeBlockDimensionForSurf(&pOut
->blockWidth
,
3153 if (returnCode
== ADDR_OK
)
3155 UINT_32 heightAlign
= pOut
->blockHeight
;
3157 if (pIn
->flags
.qbStereo
)
3159 UINT_32 rightXor
= 0;
3162 returnCode
= ComputeStereoInfo(pIn
, heightAlign
, &alignY
, &rightXor
);
3164 if (returnCode
== ADDR_OK
)
3166 pOut
->pStereoInfo
->rightSwizzle
= rightXor
;
3168 heightAlign
*= alignY
;
3172 if (returnCode
== ADDR_OK
)
3174 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3175 pOut
->mipChainPitch
= 0;
3176 pOut
->mipChainHeight
= 0;
3177 pOut
->mipChainSlice
= 0;
3178 pOut
->epitchIsHeight
= FALSE
;
3179 pOut
->mipChainInTail
= FALSE
;
3180 pOut
->firstMipIdInTail
= pIn
->numMipLevels
;
3182 const UINT_32 blockSizeLog2
= GetBlockSizeLog2(pIn
->swizzleMode
);
3183 const UINT_32 blockSize
= 1 << blockSizeLog2
;
3185 pOut
->pitch
= PowTwoAlign(pIn
->width
, pOut
->blockWidth
);
3186 pOut
->height
= PowTwoAlign(pIn
->height
, heightAlign
);
3187 pOut
->numSlices
= PowTwoAlign(pIn
->numSlices
, pOut
->blockSlices
);
3188 pOut
->baseAlign
= blockSize
;
3190 if (pIn
->numMipLevels
> 1)
3192 const Dim3d tailMaxDim
= GetMipTailDim(pIn
->resourceType
,
3197 const UINT_32 mip0Width
= pIn
->width
;
3198 const UINT_32 mip0Height
= pIn
->height
;
3199 const BOOL_32 isThin
= IsThin(pIn
->resourceType
, pIn
->swizzleMode
);
3200 const UINT_32 mip0Depth
= isThin
? 1 : pIn
->numSlices
;
3201 const UINT_32 maxMipsInTail
= GetMaxNumMipsInTail(blockSizeLog2
, isThin
);
3202 const UINT_32 index
= Log2(pIn
->bpp
>> 3);
3203 UINT_32 firstMipInTail
= pIn
->numMipLevels
;
3204 UINT_64 mipChainSliceSize
= 0;
3205 UINT_64 mipSize
[MaxMipLevels
];
3206 UINT_64 mipSliceSize
[MaxMipLevels
];
3208 Dim3d fixedTailMaxDim
= tailMaxDim
;
3210 if (m_settings
.dsMipmapHtileFix
&& IsZOrderSwizzle(pIn
->swizzleMode
) && (index
<= 1))
3212 fixedTailMaxDim
.w
/= Block256_2d
[index
].w
/ Block256_2d
[2].w
;
3213 fixedTailMaxDim
.h
/= Block256_2d
[index
].h
/ Block256_2d
[2].h
;
3216 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
3218 UINT_32 mipWidth
, mipHeight
, mipDepth
;
3220 GetMipSize(mip0Width
, mip0Height
, mip0Depth
, i
, &mipWidth
, &mipHeight
, &mipDepth
);
3222 if (IsInMipTail(fixedTailMaxDim
, maxMipsInTail
, mipWidth
, mipHeight
, pIn
->numMipLevels
- i
))
3225 mipChainSliceSize
+= blockSize
/ pOut
->blockSlices
;
3230 const UINT_32 pitch
= PowTwoAlign(mipWidth
, pOut
->blockWidth
);
3231 const UINT_32 height
= PowTwoAlign(mipHeight
, pOut
->blockHeight
);
3232 const UINT_32 depth
= PowTwoAlign(mipDepth
, pOut
->blockSlices
);
3233 const UINT_64 sliceSize
= static_cast<UINT_64
>(pitch
) * height
* (pIn
->bpp
>> 3);
3235 mipSize
[i
] = sliceSize
* depth
;
3236 mipSliceSize
[i
] = sliceSize
* pOut
->blockSlices
;
3237 mipChainSliceSize
+= sliceSize
;
3239 if (pOut
->pMipInfo
!= NULL
)
3241 pOut
->pMipInfo
[i
].pitch
= pitch
;
3242 pOut
->pMipInfo
[i
].height
= height
;
3243 pOut
->pMipInfo
[i
].depth
= depth
;
3248 pOut
->sliceSize
= mipChainSliceSize
;
3249 pOut
->surfSize
= mipChainSliceSize
* pOut
->numSlices
;
3250 pOut
->mipChainInTail
= (firstMipInTail
== 0) ? TRUE
: FALSE
;
3251 pOut
->firstMipIdInTail
= firstMipInTail
;
3253 if (pOut
->pMipInfo
!= NULL
)
3256 UINT_64 macroBlkOffset
= 0;
3257 UINT_32 tailMaxDepth
= 0;
3259 if (firstMipInTail
!= pIn
->numMipLevels
)
3261 UINT_32 mipWidth
, mipHeight
;
3263 GetMipSize(mip0Width
, mip0Height
, mip0Depth
, firstMipInTail
,
3264 &mipWidth
, &mipHeight
, &tailMaxDepth
);
3266 offset
= blockSize
* PowTwoAlign(tailMaxDepth
, pOut
->blockSlices
) / pOut
->blockSlices
;
3267 macroBlkOffset
= blockSize
;
3270 for (INT_32 i
= firstMipInTail
- 1; i
>= 0; i
--)
3272 pOut
->pMipInfo
[i
].offset
= offset
;
3273 pOut
->pMipInfo
[i
].macroBlockOffset
= macroBlkOffset
;
3274 pOut
->pMipInfo
[i
].mipTailOffset
= 0;
3276 offset
+= mipSize
[i
];
3277 macroBlkOffset
+= mipSliceSize
[i
];
3280 UINT_32 pitch
= tailMaxDim
.w
;
3281 UINT_32 height
= tailMaxDim
.h
;
3282 UINT_32 depth
= isThin
? 1 : PowTwoAlign(tailMaxDepth
, Block256_3d
[index
].d
);
3284 tailMaxDepth
= isThin
? 1 : (depth
/ Block256_3d
[index
].d
);
3286 for (UINT_32 i
= firstMipInTail
; i
< pIn
->numMipLevels
; i
++)
3288 const UINT_32 m
= maxMipsInTail
- 1 - (i
- firstMipInTail
);
3289 const UINT_32 mipOffset
= (m
> 6) ? (16 << m
) : (m
<< 8);
3291 pOut
->pMipInfo
[i
].offset
= mipOffset
* tailMaxDepth
;
3292 pOut
->pMipInfo
[i
].mipTailOffset
= mipOffset
;
3293 pOut
->pMipInfo
[i
].macroBlockOffset
= 0;
3295 pOut
->pMipInfo
[i
].pitch
= pitch
;
3296 pOut
->pMipInfo
[i
].height
= height
;
3297 pOut
->pMipInfo
[i
].depth
= depth
;
3299 UINT_32 mipX
= ((mipOffset
>> 9) & 1) |
3300 ((mipOffset
>> 10) & 2) |
3301 ((mipOffset
>> 11) & 4) |
3302 ((mipOffset
>> 12) & 8) |
3303 ((mipOffset
>> 13) & 16) |
3304 ((mipOffset
>> 14) & 32);
3305 UINT_32 mipY
= ((mipOffset
>> 8) & 1) |
3306 ((mipOffset
>> 9) & 2) |
3307 ((mipOffset
>> 10) & 4) |
3308 ((mipOffset
>> 11) & 8) |
3309 ((mipOffset
>> 12) & 16) |
3310 ((mipOffset
>> 13) & 32);
3312 if (blockSizeLog2
& 1)
3314 const UINT_32 temp
= mipX
;
3320 mipY
= (mipY
<< 1) | (mipX
& 1);
3327 pOut
->pMipInfo
[i
].mipTailCoordX
= mipX
* Block256_2d
[index
].w
;
3328 pOut
->pMipInfo
[i
].mipTailCoordY
= mipY
* Block256_2d
[index
].h
;
3329 pOut
->pMipInfo
[i
].mipTailCoordZ
= 0;
3331 pitch
= Max(pitch
>> 1, Block256_2d
[index
].w
);
3332 height
= Max(height
>> 1, Block256_2d
[index
].h
);
3337 pOut
->pMipInfo
[i
].mipTailCoordX
= mipX
* Block256_3d
[index
].w
;
3338 pOut
->pMipInfo
[i
].mipTailCoordY
= mipY
* Block256_3d
[index
].h
;
3339 pOut
->pMipInfo
[i
].mipTailCoordZ
= 0;
3341 pitch
= Max(pitch
>> 1, Block256_3d
[index
].w
);
3342 height
= Max(height
>> 1, Block256_3d
[index
].h
);
3343 depth
= PowTwoAlign(Max(depth
>> 1, 1u), Block256_3d
[index
].d
);
3350 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->pitch
) * pOut
->height
* (pIn
->bpp
>> 3) * pIn
->numFrags
;
3351 pOut
->surfSize
= pOut
->sliceSize
* pOut
->numSlices
;
3353 if (pOut
->pMipInfo
!= NULL
)
3355 pOut
->pMipInfo
[0].pitch
= pOut
->pitch
;
3356 pOut
->pMipInfo
[0].height
= pOut
->height
;
3357 pOut
->pMipInfo
[0].depth
= IsTex3d(pIn
->resourceType
)? pOut
->numSlices
: 1;
3358 pOut
->pMipInfo
[0].offset
= 0;
3359 pOut
->pMipInfo
[0].mipTailOffset
= 0;
3360 pOut
->pMipInfo
[0].macroBlockOffset
= 0;
3361 pOut
->pMipInfo
[0].mipTailCoordX
= 0;
3362 pOut
->pMipInfo
[0].mipTailCoordY
= 0;
3363 pOut
->pMipInfo
[0].mipTailCoordZ
= 0;
3373 ************************************************************************************************************************
3374 * Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3377 * Internal function to calculate address from coord for tiled swizzle surface
3381 ************************************************************************************************************************
3383 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3384 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
3385 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
3388 ADDR_E_RETURNCODE ret
;
3390 if (IsBlock256b(pIn
->swizzleMode
))
3392 ret
= ComputeSurfaceAddrFromCoordMicroTiled(pIn
, pOut
);
3396 ret
= ComputeSurfaceAddrFromCoordMacroTiled(pIn
, pOut
);
3403 ************************************************************************************************************************
3404 * Gfx10Lib::ComputeOffsetFromEquation
3407 * Compute offset from equation
3411 ************************************************************************************************************************
3413 UINT_32
Gfx10Lib::ComputeOffsetFromEquation(
3414 const ADDR_EQUATION
* pEq
, ///< Equation
3415 UINT_32 x
, ///< x coord in bytes
3416 UINT_32 y
, ///< y coord in pixel
3417 UINT_32 z
///< z coord in slice
3422 for (UINT_32 i
= 0; i
< pEq
->numBits
; i
++)
3426 if (pEq
->addr
[i
].valid
)
3428 if (pEq
->addr
[i
].channel
== 0)
3430 v
^= (x
>> pEq
->addr
[i
].index
) & 1;
3432 else if (pEq
->addr
[i
].channel
== 1)
3434 v
^= (y
>> pEq
->addr
[i
].index
) & 1;
3438 ADDR_ASSERT(pEq
->addr
[i
].channel
== 2);
3439 v
^= (z
>> pEq
->addr
[i
].index
) & 1;
3443 if (pEq
->xor1
[i
].valid
)
3445 if (pEq
->xor1
[i
].channel
== 0)
3447 v
^= (x
>> pEq
->xor1
[i
].index
) & 1;
3449 else if (pEq
->xor1
[i
].channel
== 1)
3451 v
^= (y
>> pEq
->xor1
[i
].index
) & 1;
3455 ADDR_ASSERT(pEq
->xor1
[i
].channel
== 2);
3456 v
^= (z
>> pEq
->xor1
[i
].index
) & 1;
3460 if (pEq
->xor2
[i
].valid
)
3462 if (pEq
->xor2
[i
].channel
== 0)
3464 v
^= (x
>> pEq
->xor2
[i
].index
) & 1;
3466 else if (pEq
->xor2
[i
].channel
== 1)
3468 v
^= (y
>> pEq
->xor2
[i
].index
) & 1;
3472 ADDR_ASSERT(pEq
->xor2
[i
].channel
== 2);
3473 v
^= (z
>> pEq
->xor2
[i
].index
) & 1;
3484 ************************************************************************************************************************
3485 * Gfx10Lib::ComputeOffsetFromSwizzlePattern
3488 * Compute offset from swizzle pattern
3492 ************************************************************************************************************************
3494 UINT_32
Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3495 const UINT_64
* pPattern
, ///< Swizzle pattern
3496 UINT_32 numBits
, ///< Number of bits in pattern
3497 UINT_32 x
, ///< x coord in pixel
3498 UINT_32 y
, ///< y coord in pixel
3499 UINT_32 z
, ///< z coord in slice
3500 UINT_32 s
///< sample id
3504 const ADDR_BIT_SETTING
* pSwizzlePattern
= reinterpret_cast<const ADDR_BIT_SETTING
*>(pPattern
);
3506 for (UINT_32 i
= 0; i
< numBits
; i
++)
3510 if (pSwizzlePattern
[i
].x
!= 0)
3512 UINT_16 mask
= pSwizzlePattern
[i
].x
;
3527 if (pSwizzlePattern
[i
].y
!= 0)
3529 UINT_16 mask
= pSwizzlePattern
[i
].y
;
3544 if (pSwizzlePattern
[i
].z
!= 0)
3546 UINT_16 mask
= pSwizzlePattern
[i
].z
;
3561 if (pSwizzlePattern
[i
].s
!= 0)
3563 UINT_16 mask
= pSwizzlePattern
[i
].s
;
3585 ************************************************************************************************************************
3586 * Gfx10Lib::GetSwizzlePatternInfo
3589 * Get swizzle pattern
3592 * Swizzle pattern information
3593 ************************************************************************************************************************
3595 const ADDR_SW_PATINFO
* Gfx10Lib::GetSwizzlePatternInfo(
3596 AddrSwizzleMode swizzleMode
, ///< Swizzle mode
3597 AddrResourceType resourceType
, ///< Resource type
3598 UINT_32 elemLog2
, ///< Element size in bytes log2
3599 UINT_32 numFrag
///< Number of fragment
3602 const UINT_32 index
= IsXor(swizzleMode
) ? (m_colorBaseIndex
+ elemLog2
) : elemLog2
;
3603 const ADDR_SW_PATINFO
* patInfo
= NULL
;
3604 const UINT_32 swizzleMask
= 1 << swizzleMode
;
3606 if (IsLinear(swizzleMode
) == FALSE
)
3608 if (IsBlockVariable(swizzleMode
))
3610 if (m_blockVarSizeLog2
!= 0)
3612 ADDR_ASSERT(m_settings
.supportRbPlus
);
3614 if (IsRtOptSwizzle(swizzleMode
))
3618 patInfo
= SW_VAR_R_X_1xaa_RBPLUS_PATINFO
;
3620 else if (numFrag
== 2)
3622 patInfo
= SW_VAR_R_X_2xaa_RBPLUS_PATINFO
;
3624 else if (numFrag
== 4)
3626 patInfo
= SW_VAR_R_X_4xaa_RBPLUS_PATINFO
;
3630 ADDR_ASSERT(numFrag
== 8);
3631 patInfo
= SW_VAR_R_X_8xaa_RBPLUS_PATINFO
;
3634 else if (IsZOrderSwizzle(swizzleMode
))
3638 patInfo
= SW_VAR_Z_X_1xaa_RBPLUS_PATINFO
;
3640 else if (numFrag
== 2)
3642 patInfo
= SW_VAR_Z_X_2xaa_RBPLUS_PATINFO
;
3644 else if (numFrag
== 4)
3646 patInfo
= SW_VAR_Z_X_4xaa_RBPLUS_PATINFO
;
3650 ADDR_ASSERT(numFrag
== 8);
3651 patInfo
= SW_VAR_Z_X_8xaa_RBPLUS_PATINFO
;
3656 else if (resourceType
== ADDR_RSRC_TEX_3D
)
3658 ADDR_ASSERT(numFrag
== 1);
3660 if ((swizzleMask
& Gfx10Rsrc3dSwModeMask
) != 0)
3662 if (IsRtOptSwizzle(swizzleMode
))
3664 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_1xaa_RBPLUS_PATINFO
: SW_64K_R_X_1xaa_PATINFO
;
3666 else if (IsZOrderSwizzle(swizzleMode
))
3668 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_1xaa_RBPLUS_PATINFO
: SW_64K_Z_X_1xaa_PATINFO
;
3670 else if (IsDisplaySwizzle(resourceType
, swizzleMode
))
3672 ADDR_ASSERT(swizzleMode
== ADDR_SW_64KB_D_X
);
3673 patInfo
= m_settings
.supportRbPlus
? SW_64K_D3_X_RBPLUS_PATINFO
: SW_64K_D3_X_PATINFO
;
3677 ADDR_ASSERT(IsStandardSwizzle(resourceType
, swizzleMode
));
3679 if (IsBlock4kb(swizzleMode
))
3681 if (swizzleMode
== ADDR_SW_4KB_S
)
3683 patInfo
= m_settings
.supportRbPlus
? SW_4K_S3_RBPLUS_PATINFO
: SW_4K_S3_PATINFO
;
3687 ADDR_ASSERT(swizzleMode
== ADDR_SW_4KB_S_X
);
3688 patInfo
= m_settings
.supportRbPlus
? SW_4K_S3_X_RBPLUS_PATINFO
: SW_4K_S3_X_PATINFO
;
3693 if (swizzleMode
== ADDR_SW_64KB_S
)
3695 patInfo
= m_settings
.supportRbPlus
? SW_64K_S3_RBPLUS_PATINFO
: SW_64K_S3_PATINFO
;
3697 else if (swizzleMode
== ADDR_SW_64KB_S_X
)
3699 patInfo
= m_settings
.supportRbPlus
? SW_64K_S3_X_RBPLUS_PATINFO
: SW_64K_S3_X_PATINFO
;
3703 ADDR_ASSERT(swizzleMode
== ADDR_SW_64KB_S_T
);
3704 patInfo
= m_settings
.supportRbPlus
? SW_64K_S3_T_RBPLUS_PATINFO
: SW_64K_S3_T_PATINFO
;
3712 if ((swizzleMask
& Gfx10Rsrc2dSwModeMask
) != 0)
3714 if (IsBlock256b(swizzleMode
))
3716 if (swizzleMode
== ADDR_SW_256B_S
)
3718 patInfo
= m_settings
.supportRbPlus
? SW_256_S_RBPLUS_PATINFO
: SW_256_S_PATINFO
;
3722 ADDR_ASSERT(swizzleMode
== ADDR_SW_256B_D
);
3723 patInfo
= m_settings
.supportRbPlus
? SW_256_D_RBPLUS_PATINFO
: SW_256_D_PATINFO
;
3726 else if (IsBlock4kb(swizzleMode
))
3728 if (IsStandardSwizzle(resourceType
, swizzleMode
))
3730 if (swizzleMode
== ADDR_SW_4KB_S
)
3732 patInfo
= m_settings
.supportRbPlus
? SW_4K_S_RBPLUS_PATINFO
: SW_4K_S_PATINFO
;
3736 ADDR_ASSERT(swizzleMode
== ADDR_SW_4KB_S_X
);
3737 patInfo
= m_settings
.supportRbPlus
? SW_4K_S_X_RBPLUS_PATINFO
: SW_4K_S_X_PATINFO
;
3742 if (swizzleMode
== ADDR_SW_4KB_D
)
3744 patInfo
= m_settings
.supportRbPlus
? SW_4K_D_RBPLUS_PATINFO
: SW_4K_D_PATINFO
;
3748 ADDR_ASSERT(swizzleMode
== ADDR_SW_4KB_D_X
);
3749 patInfo
= m_settings
.supportRbPlus
? SW_4K_D_X_RBPLUS_PATINFO
: SW_4K_D_X_PATINFO
;
3755 if (IsRtOptSwizzle(swizzleMode
))
3759 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_1xaa_RBPLUS_PATINFO
: SW_64K_R_X_1xaa_PATINFO
;
3761 else if (numFrag
== 2)
3763 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_2xaa_RBPLUS_PATINFO
: SW_64K_R_X_2xaa_PATINFO
;
3765 else if (numFrag
== 4)
3767 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_4xaa_RBPLUS_PATINFO
: SW_64K_R_X_4xaa_PATINFO
;
3771 ADDR_ASSERT(numFrag
== 8);
3772 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_8xaa_RBPLUS_PATINFO
: SW_64K_R_X_8xaa_PATINFO
;
3775 else if (IsZOrderSwizzle(swizzleMode
))
3779 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_1xaa_RBPLUS_PATINFO
: SW_64K_Z_X_1xaa_PATINFO
;
3781 else if (numFrag
== 2)
3783 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_2xaa_RBPLUS_PATINFO
: SW_64K_Z_X_2xaa_PATINFO
;
3785 else if (numFrag
== 4)
3787 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_4xaa_RBPLUS_PATINFO
: SW_64K_Z_X_4xaa_PATINFO
;
3791 ADDR_ASSERT(numFrag
== 8);
3792 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_8xaa_RBPLUS_PATINFO
: SW_64K_Z_X_8xaa_PATINFO
;
3795 else if (IsDisplaySwizzle(resourceType
, swizzleMode
))
3797 if (swizzleMode
== ADDR_SW_64KB_D
)
3799 patInfo
= m_settings
.supportRbPlus
? SW_64K_D_RBPLUS_PATINFO
: SW_64K_D_PATINFO
;
3801 else if (swizzleMode
== ADDR_SW_64KB_D_X
)
3803 patInfo
= m_settings
.supportRbPlus
? SW_64K_D_X_RBPLUS_PATINFO
: SW_64K_D_X_PATINFO
;
3807 ADDR_ASSERT(swizzleMode
== ADDR_SW_64KB_D_T
);
3808 patInfo
= m_settings
.supportRbPlus
? SW_64K_D_T_RBPLUS_PATINFO
: SW_64K_D_T_PATINFO
;
3813 if (swizzleMode
== ADDR_SW_64KB_S
)
3815 patInfo
= m_settings
.supportRbPlus
? SW_64K_S_RBPLUS_PATINFO
: SW_64K_S_PATINFO
;
3817 else if (swizzleMode
== ADDR_SW_64KB_S_X
)
3819 patInfo
= m_settings
.supportRbPlus
? SW_64K_S_X_RBPLUS_PATINFO
: SW_64K_S_X_PATINFO
;
3823 ADDR_ASSERT(swizzleMode
== ADDR_SW_64KB_S_T
);
3824 patInfo
= m_settings
.supportRbPlus
? SW_64K_S_T_RBPLUS_PATINFO
: SW_64K_S_T_PATINFO
;
3832 return (patInfo
!= NULL
) ? &patInfo
[index
] : NULL
;
3836 ************************************************************************************************************************
3837 * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
3840 * Internal function to calculate address from coord for micro tiled swizzle surface
3844 ************************************************************************************************************************
3846 ADDR_E_RETURNCODE
Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
3847 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
3848 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
3851 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {0};
3852 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut
= {0};
3853 ADDR2_MIP_INFO mipInfo
[MaxMipLevels
];
3855 localIn
.swizzleMode
= pIn
->swizzleMode
;
3856 localIn
.flags
= pIn
->flags
;
3857 localIn
.resourceType
= pIn
->resourceType
;
3858 localIn
.bpp
= pIn
->bpp
;
3859 localIn
.width
= Max(pIn
->unalignedWidth
, 1u);
3860 localIn
.height
= Max(pIn
->unalignedHeight
, 1u);
3861 localIn
.numSlices
= Max(pIn
->numSlices
, 1u);
3862 localIn
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
3863 localIn
.numSamples
= Max(pIn
->numSamples
, 1u);
3864 localIn
.numFrags
= Max(pIn
->numFrags
, 1u);
3865 localOut
.pMipInfo
= mipInfo
;
3867 ADDR_E_RETURNCODE ret
= ComputeSurfaceInfoMicroTiled(&localIn
, &localOut
);
3871 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
3872 const UINT_32 rsrcType
= static_cast<UINT_32
>(pIn
->resourceType
) - 1;
3873 const UINT_32 swMode
= static_cast<UINT_32
>(pIn
->swizzleMode
);
3874 const UINT_32 eqIndex
= m_equationLookupTable
[rsrcType
][swMode
][elemLog2
];
3876 if (eqIndex
!= ADDR_INVALID_EQUATION_INDEX
)
3878 const UINT_32 pb
= mipInfo
[pIn
->mipId
].pitch
/ localOut
.blockWidth
;
3879 const UINT_32 yb
= pIn
->y
/ localOut
.blockHeight
;
3880 const UINT_32 xb
= pIn
->x
/ localOut
.blockWidth
;
3881 const UINT_32 blockIndex
= yb
* pb
+ xb
;
3882 const UINT_32 blockSize
= 256;
3883 const UINT_32 blk256Offset
= ComputeOffsetFromEquation(&m_equationTable
[eqIndex
],
3887 pOut
->addr
= localOut
.sliceSize
* pIn
->slice
+
3888 mipInfo
[pIn
->mipId
].macroBlockOffset
+
3889 (blockIndex
* blockSize
) +
3894 ret
= ADDR_INVALIDPARAMS
;
3902 ************************************************************************************************************************
3903 * Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
3906 * Internal function to calculate address from coord for macro tiled swizzle surface
3910 ************************************************************************************************************************
3912 ADDR_E_RETURNCODE
Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
3913 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
3914 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
3917 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {0};
3918 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut
= {0};
3919 ADDR2_MIP_INFO mipInfo
[MaxMipLevels
];
3921 localIn
.swizzleMode
= pIn
->swizzleMode
;
3922 localIn
.flags
= pIn
->flags
;
3923 localIn
.resourceType
= pIn
->resourceType
;
3924 localIn
.bpp
= pIn
->bpp
;
3925 localIn
.width
= Max(pIn
->unalignedWidth
, 1u);
3926 localIn
.height
= Max(pIn
->unalignedHeight
, 1u);
3927 localIn
.numSlices
= Max(pIn
->numSlices
, 1u);
3928 localIn
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
3929 localIn
.numSamples
= Max(pIn
->numSamples
, 1u);
3930 localIn
.numFrags
= Max(pIn
->numFrags
, 1u);
3931 localOut
.pMipInfo
= mipInfo
;
3933 ADDR_E_RETURNCODE ret
= ComputeSurfaceInfoMacroTiled(&localIn
, &localOut
);
3937 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
3938 const UINT_32 blkSizeLog2
= GetBlockSizeLog2(pIn
->swizzleMode
);
3939 const UINT_32 blkMask
= (1 << blkSizeLog2
) - 1;
3940 const UINT_32 pipeMask
= (1 << m_pipesLog2
) - 1;
3941 const UINT_32 bankMask
= ((1 << GetBankXorBits(blkSizeLog2
)) - 1) << (m_pipesLog2
+ ColumnBits
);
3942 const UINT_32 pipeBankXor
= IsXor(pIn
->swizzleMode
) ?
3943 (((pIn
->pipeBankXor
& (pipeMask
| bankMask
)) << m_pipeInterleaveLog2
) & blkMask
) : 0;
3945 if (localIn
.numFrags
> 1)
3947 const ADDR_SW_PATINFO
* pPatInfo
= GetSwizzlePatternInfo(pIn
->swizzleMode
,
3952 if (pPatInfo
!= NULL
)
3954 const UINT_32 pb
= localOut
.pitch
/ localOut
.blockWidth
;
3955 const UINT_32 yb
= pIn
->y
/ localOut
.blockHeight
;
3956 const UINT_32 xb
= pIn
->x
/ localOut
.blockWidth
;
3957 const UINT_64 blkIdx
= yb
* pb
+ xb
;
3959 ADDR_BIT_SETTING fullSwizzlePattern
[20];
3960 GetSwizzlePatternFromPatternInfo(pPatInfo
, fullSwizzlePattern
);
3962 const UINT_32 blkOffset
=
3963 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64
*>(fullSwizzlePattern
),
3970 pOut
->addr
= (localOut
.sliceSize
* pIn
->slice
) +
3971 (blkIdx
<< blkSizeLog2
) +
3972 (blkOffset
^ pipeBankXor
);
3976 ret
= ADDR_INVALIDPARAMS
;
3981 const UINT_32 rsrcIdx
= (pIn
->resourceType
== ADDR_RSRC_TEX_3D
) ? 1 : 0;
3982 const UINT_32 swMode
= static_cast<UINT_32
>(pIn
->swizzleMode
);
3983 const UINT_32 eqIndex
= m_equationLookupTable
[rsrcIdx
][swMode
][elemLog2
];
3985 if (eqIndex
!= ADDR_INVALID_EQUATION_INDEX
)
3987 const BOOL_32 inTail
= (mipInfo
[pIn
->mipId
].mipTailOffset
!= 0) ? TRUE
: FALSE
;
3988 const BOOL_32 isThin
= IsThin(pIn
->resourceType
, pIn
->swizzleMode
);
3989 const UINT_64 sliceSize
= isThin
? localOut
.sliceSize
: (localOut
.sliceSize
* localOut
.blockSlices
);
3990 const UINT_32 sliceId
= isThin
? pIn
->slice
: (pIn
->slice
/ localOut
.blockSlices
);
3991 const UINT_32 x
= inTail
? (pIn
->x
+ mipInfo
[pIn
->mipId
].mipTailCoordX
) : pIn
->x
;
3992 const UINT_32 y
= inTail
? (pIn
->y
+ mipInfo
[pIn
->mipId
].mipTailCoordY
) : pIn
->y
;
3993 const UINT_32 z
= inTail
? (pIn
->slice
+ mipInfo
[pIn
->mipId
].mipTailCoordZ
) : pIn
->slice
;
3994 const UINT_32 pb
= mipInfo
[pIn
->mipId
].pitch
/ localOut
.blockWidth
;
3995 const UINT_32 yb
= pIn
->y
/ localOut
.blockHeight
;
3996 const UINT_32 xb
= pIn
->x
/ localOut
.blockWidth
;
3997 const UINT_64 blkIdx
= yb
* pb
+ xb
;
3998 const UINT_32 blkOffset
= ComputeOffsetFromEquation(&m_equationTable
[eqIndex
],
4002 pOut
->addr
= sliceSize
* sliceId
+
4003 mipInfo
[pIn
->mipId
].macroBlockOffset
+
4004 (blkIdx
<< blkSizeLog2
) +
4005 (blkOffset
^ pipeBankXor
);
4009 ret
= ADDR_INVALIDPARAMS
;
4018 ************************************************************************************************************************
4019 * Gfx10Lib::HwlComputeMaxBaseAlignments
4022 * Gets maximum alignments
4024 * maximum alignments
4025 ************************************************************************************************************************
4027 UINT_32
Gfx10Lib::HwlComputeMaxBaseAlignments() const
4029 return m_blockVarSizeLog2
? Max(Size64K
, 1u << m_blockVarSizeLog2
) : Size64K
;
4033 ************************************************************************************************************************
4034 * Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4037 * Gets maximum alignments for metadata
4039 * maximum alignments for metadata
4040 ************************************************************************************************************************
4042 UINT_32
Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4046 const AddrSwizzleMode ValidSwizzleModeForXmask
[] =
4049 m_blockVarSizeLog2
? ADDR_SW_VAR_Z_X
: ADDR_SW_64KB_Z_X
,
4052 UINT_32 maxBaseAlignHtile
= 0;
4053 UINT_32 maxBaseAlignCmask
= 0;
4055 for (UINT_32 swIdx
= 0; swIdx
< sizeof(ValidSwizzleModeForXmask
) / sizeof(ValidSwizzleModeForXmask
[0]); swIdx
++)
4057 for (UINT_32 bppLog2
= 0; bppLog2
< 3; bppLog2
++)
4059 for (UINT_32 numFragLog2
= 0; numFragLog2
< 4; numFragLog2
++)
4061 // Max base alignment for Htile
4062 const UINT_32 metaBlkSizeHtile
= GetMetaBlkSize(Gfx10DataDepthStencil
,
4064 ValidSwizzleModeForXmask
[swIdx
],
4070 maxBaseAlignHtile
= Max(maxBaseAlignHtile
, metaBlkSizeHtile
);
4074 // Max base alignment for Cmask
4075 const UINT_32 metaBlkSizeCmask
= GetMetaBlkSize(Gfx10DataFmask
,
4077 ValidSwizzleModeForXmask
[swIdx
],
4083 maxBaseAlignCmask
= Max(maxBaseAlignCmask
, metaBlkSizeCmask
);
4086 // Max base alignment for 2D Dcc
4087 const AddrSwizzleMode ValidSwizzleModeForDcc2D
[] =
4092 m_blockVarSizeLog2
? ADDR_SW_VAR_R_X
: ADDR_SW_64KB_R_X
,
4095 UINT_32 maxBaseAlignDcc2D
= 0;
4097 for (UINT_32 swIdx
= 0; swIdx
< sizeof(ValidSwizzleModeForDcc2D
) / sizeof(ValidSwizzleModeForDcc2D
[0]); swIdx
++)
4099 for (UINT_32 bppLog2
= 0; bppLog2
< MaxNumOfBpp
; bppLog2
++)
4101 for (UINT_32 numFragLog2
= 0; numFragLog2
< 4; numFragLog2
++)
4103 const UINT_32 metaBlkSize2D
= GetMetaBlkSize(Gfx10DataColor
,
4105 ValidSwizzleModeForDcc2D
[swIdx
],
4111 maxBaseAlignDcc2D
= Max(maxBaseAlignDcc2D
, metaBlkSize2D
);
4116 // Max base alignment for 3D Dcc
4117 const AddrSwizzleMode ValidSwizzleModeForDcc3D
[] =
4123 m_blockVarSizeLog2
? ADDR_SW_VAR_R_X
: ADDR_SW_64KB_R_X
,
4126 UINT_32 maxBaseAlignDcc3D
= 0;
4128 for (UINT_32 swIdx
= 0; swIdx
< sizeof(ValidSwizzleModeForDcc3D
) / sizeof(ValidSwizzleModeForDcc3D
[0]); swIdx
++)
4130 for (UINT_32 bppLog2
= 0; bppLog2
< MaxNumOfBpp
; bppLog2
++)
4132 const UINT_32 metaBlkSize3D
= GetMetaBlkSize(Gfx10DataColor
,
4134 ValidSwizzleModeForDcc3D
[swIdx
],
4140 maxBaseAlignDcc3D
= Max(maxBaseAlignDcc3D
, metaBlkSize3D
);
4144 return Max(Max(maxBaseAlignHtile
, maxBaseAlignCmask
), Max(maxBaseAlignDcc2D
, maxBaseAlignDcc3D
));
4148 ************************************************************************************************************************
4149 * Gfx10Lib::GetMetaElementSizeLog2
4152 * Gets meta data element size log2
4154 * Meta data element size log2
4155 ************************************************************************************************************************
4157 INT_32
Gfx10Lib::GetMetaElementSizeLog2(
4158 Gfx10DataType dataType
) ///< Data surface type
4160 INT_32 elemSizeLog2
= 0;
4162 if (dataType
== Gfx10DataColor
)
4166 else if (dataType
== Gfx10DataDepthStencil
)
4172 ADDR_ASSERT(dataType
== Gfx10DataFmask
);
4176 return elemSizeLog2
;
4180 ************************************************************************************************************************
4181 * Gfx10Lib::GetMetaCacheSizeLog2
4184 * Gets meta data cache line size log2
4186 * Meta data cache line size log2
4187 ************************************************************************************************************************
4189 INT_32
Gfx10Lib::GetMetaCacheSizeLog2(
4190 Gfx10DataType dataType
) ///< Data surface type
4192 INT_32 cacheSizeLog2
= 0;
4194 if (dataType
== Gfx10DataColor
)
4198 else if (dataType
== Gfx10DataDepthStencil
)
4204 ADDR_ASSERT(dataType
== Gfx10DataFmask
);
4207 return cacheSizeLog2
;
4211 ************************************************************************************************************************
4212 * Gfx10Lib::HwlComputeSurfaceInfoLinear
4215 * Internal function to calculate alignment for linear surface
4219 ************************************************************************************************************************
4221 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSurfaceInfoLinear(
4222 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
4223 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
4226 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
4228 if (IsTex1d(pIn
->resourceType
) && (pIn
->height
> 1))
4230 returnCode
= ADDR_INVALIDPARAMS
;
4234 const UINT_32 elementBytes
= pIn
->bpp
>> 3;
4235 const UINT_32 pitchAlign
= (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
) ? 1 : (256 / elementBytes
);
4236 const UINT_32 mipDepth
= (pIn
->resourceType
== ADDR_RSRC_TEX_3D
) ? pIn
->numSlices
: 1;
4237 UINT_32 pitch
= PowTwoAlign(pIn
->width
, pitchAlign
);
4238 UINT_32 actualHeight
= pIn
->height
;
4239 UINT_64 sliceSize
= 0;
4241 if (pIn
->numMipLevels
> 1)
4243 for (INT_32 i
= static_cast<INT_32
>(pIn
->numMipLevels
) - 1; i
>= 0; i
--)
4245 UINT_32 mipWidth
, mipHeight
;
4247 GetMipSize(pIn
->width
, pIn
->height
, 1, i
, &mipWidth
, &mipHeight
);
4249 const UINT_32 mipActualWidth
= PowTwoAlign(mipWidth
, pitchAlign
);
4251 if (pOut
->pMipInfo
!= NULL
)
4253 pOut
->pMipInfo
[i
].pitch
= mipActualWidth
;
4254 pOut
->pMipInfo
[i
].height
= mipHeight
;
4255 pOut
->pMipInfo
[i
].depth
= mipDepth
;
4256 pOut
->pMipInfo
[i
].offset
= sliceSize
;
4257 pOut
->pMipInfo
[i
].mipTailOffset
= 0;
4258 pOut
->pMipInfo
[i
].macroBlockOffset
= sliceSize
;
4261 sliceSize
+= static_cast<UINT_64
>(mipActualWidth
) * mipHeight
* elementBytes
;
4266 returnCode
= ApplyCustomizedPitchHeight(pIn
, elementBytes
, pitchAlign
, &pitch
, &actualHeight
);
4268 if (returnCode
== ADDR_OK
)
4270 sliceSize
= static_cast<UINT_64
>(pitch
) * actualHeight
* elementBytes
;
4272 if (pOut
->pMipInfo
!= NULL
)
4274 pOut
->pMipInfo
[0].pitch
= pitch
;
4275 pOut
->pMipInfo
[0].height
= actualHeight
;
4276 pOut
->pMipInfo
[0].depth
= mipDepth
;
4277 pOut
->pMipInfo
[0].offset
= 0;
4278 pOut
->pMipInfo
[0].mipTailOffset
= 0;
4279 pOut
->pMipInfo
[0].macroBlockOffset
= 0;
4284 if (returnCode
== ADDR_OK
)
4286 pOut
->pitch
= pitch
;
4287 pOut
->height
= actualHeight
;
4288 pOut
->numSlices
= pIn
->numSlices
;
4289 pOut
->sliceSize
= sliceSize
;
4290 pOut
->surfSize
= sliceSize
* pOut
->numSlices
;
4291 pOut
->baseAlign
= (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
) ? elementBytes
: 256;
4292 pOut
->blockWidth
= pitchAlign
;
4293 pOut
->blockHeight
= 1;
4294 pOut
->blockSlices
= 1;
4296 // Following members are useless on GFX10
4297 pOut
->mipChainPitch
= 0;
4298 pOut
->mipChainHeight
= 0;
4299 pOut
->mipChainSlice
= 0;
4300 pOut
->epitchIsHeight
= FALSE
;
4302 // Post calculation validate
4303 ADDR_ASSERT(pOut
->sliceSize
> 0);