2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
28 ************************************************************************************************************************
29 * @file gfx10addrlib.cpp
30 * @brief Contain the implementation for the Gfx10Lib class.
31 ************************************************************************************************************************
34 #include "gfx10addrlib.h"
35 #include "gfx10_gb_reg.h"
37 #include "amdgpu_asic_addr.h"
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
45 ************************************************************************************************************************
49 * Creates an Gfx10Lib object.
52 * Returns an Gfx10Lib object pointer.
53 ************************************************************************************************************************
55 Addr::Lib
* Gfx10HwlInit(const Client
* pClient
)
57 return V2::Gfx10Lib::CreateObj(pClient
);
63 ////////////////////////////////////////////////////////////////////////////////////////////////////
64 // Static Const Member
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
67 const SwizzleModeFlags
Gfx10Lib::SwizzleModeTable
[ADDR_SW_MAX_TYPE
] =
68 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
69 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
70 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
71 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
72 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
74 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
75 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
76 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
77 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
79 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
80 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
81 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
82 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
84 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
85 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
86 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
87 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
91 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
92 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
94 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
95 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_X
96 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_X
97 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
99 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
100 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
101 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
102 {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_64KB_R_X
104 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_Z_X
105 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
106 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
107 {0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_VAR_R_X
108 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
111 const Dim3d
Gfx10Lib::Block256_3d
[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
113 const Dim3d
Gfx10Lib::Block64K_Log2_3d
[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114 const Dim3d
Gfx10Lib::Block4K_Log2_3d
[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
117 ************************************************************************************************************************
123 ************************************************************************************************************************
125 Gfx10Lib::Gfx10Lib(const Client
* pClient
)
132 m_class
= AI_ADDRLIB
;
133 memset(&m_settings
, 0, sizeof(m_settings
));
134 memcpy(m_swizzleModeTable
, SwizzleModeTable
, sizeof(SwizzleModeTable
));
138 ************************************************************************************************************************
139 * Gfx10Lib::~Gfx10Lib
143 ************************************************************************************************************************
145 Gfx10Lib::~Gfx10Lib()
150 ************************************************************************************************************************
151 * Gfx10Lib::HwlComputeHtileInfo
154 * Interface function stub of AddrComputeHtilenfo
158 ************************************************************************************************************************
160 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeHtileInfo(
161 const ADDR2_COMPUTE_HTILE_INFO_INPUT
* pIn
, ///< [in] input structure
162 ADDR2_COMPUTE_HTILE_INFO_OUTPUT
* pOut
///< [out] output structure
165 ADDR_E_RETURNCODE ret
= ADDR_OK
;
167 if (((pIn
->swizzleMode
!= ADDR_SW_64KB_Z_X
) &&
168 ((pIn
->swizzleMode
!= ADDR_SW_VAR_Z_X
) || (m_blockVarSizeLog2
== 0))) ||
169 (pIn
->hTileFlags
.pipeAligned
!= TRUE
))
171 ret
= ADDR_INVALIDPARAMS
;
176 const UINT_32 metaBlkSize
= GetMetaBlkSize(Gfx10DataDepthStencil
,
184 pOut
->pitch
= PowTwoAlign(pIn
->unalignedWidth
, metaBlk
.w
);
185 pOut
->height
= PowTwoAlign(pIn
->unalignedHeight
, metaBlk
.h
);
186 pOut
->baseAlign
= Max(metaBlkSize
, 1u << (m_pipesLog2
+ 11u));
187 pOut
->metaBlkWidth
= metaBlk
.w
;
188 pOut
->metaBlkHeight
= metaBlk
.h
;
190 if (pIn
->numMipLevels
> 1)
192 ADDR_ASSERT(pIn
->firstMipIdInTail
<= pIn
->numMipLevels
);
194 UINT_32 offset
= (pIn
->firstMipIdInTail
== pIn
->numMipLevels
) ? 0 : metaBlkSize
;
196 for (INT_32 i
= static_cast<INT_32
>(pIn
->firstMipIdInTail
) - 1; i
>=0; i
--)
198 UINT_32 mipWidth
, mipHeight
;
200 GetMipSize(pIn
->unalignedWidth
, pIn
->unalignedHeight
, 1, i
, &mipWidth
, &mipHeight
);
202 mipWidth
= PowTwoAlign(mipWidth
, metaBlk
.w
);
203 mipHeight
= PowTwoAlign(mipHeight
, metaBlk
.h
);
205 const UINT_32 pitchInM
= mipWidth
/ metaBlk
.w
;
206 const UINT_32 heightInM
= mipHeight
/ metaBlk
.h
;
207 const UINT_32 mipSliceSize
= pitchInM
* heightInM
* metaBlkSize
;
209 if (pOut
->pMipInfo
!= NULL
)
211 pOut
->pMipInfo
[i
].inMiptail
= FALSE
;
212 pOut
->pMipInfo
[i
].offset
= offset
;
213 pOut
->pMipInfo
[i
].sliceSize
= mipSliceSize
;
216 offset
+= mipSliceSize
;
219 pOut
->sliceSize
= offset
;
220 pOut
->metaBlkNumPerSlice
= offset
/ metaBlkSize
;
221 pOut
->htileBytes
= pOut
->sliceSize
* pIn
->numSlices
;
223 if (pOut
->pMipInfo
!= NULL
)
225 for (UINT_32 i
= pIn
->firstMipIdInTail
; i
< pIn
->numMipLevels
; i
++)
227 pOut
->pMipInfo
[i
].inMiptail
= TRUE
;
228 pOut
->pMipInfo
[i
].offset
= 0;
229 pOut
->pMipInfo
[i
].sliceSize
= 0;
232 if (pIn
->firstMipIdInTail
!= pIn
->numMipLevels
)
234 pOut
->pMipInfo
[pIn
->firstMipIdInTail
].sliceSize
= metaBlkSize
;
240 const UINT_32 pitchInM
= pOut
->pitch
/ metaBlk
.w
;
241 const UINT_32 heightInM
= pOut
->height
/ metaBlk
.h
;
243 pOut
->metaBlkNumPerSlice
= pitchInM
* heightInM
;
244 pOut
->sliceSize
= pOut
->metaBlkNumPerSlice
* metaBlkSize
;
245 pOut
->htileBytes
= pOut
->sliceSize
* pIn
->numSlices
;
247 if (pOut
->pMipInfo
!= NULL
)
249 pOut
->pMipInfo
[0].inMiptail
= FALSE
;
250 pOut
->pMipInfo
[0].offset
= 0;
251 pOut
->pMipInfo
[0].sliceSize
= pOut
->sliceSize
;
260 ************************************************************************************************************************
261 * Gfx10Lib::HwlComputeCmaskInfo
264 * Interface function stub of AddrComputeCmaskInfo
268 ************************************************************************************************************************
270 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeCmaskInfo(
271 const ADDR2_COMPUTE_CMASK_INFO_INPUT
* pIn
, ///< [in] input structure
272 ADDR2_COMPUTE_CMASK_INFO_OUTPUT
* pOut
///< [out] output structure
275 ADDR_E_RETURNCODE ret
= ADDR_OK
;
277 if ((pIn
->resourceType
!= ADDR_RSRC_TEX_2D
) ||
278 (pIn
->cMaskFlags
.pipeAligned
!= TRUE
) ||
279 ((pIn
->swizzleMode
!= ADDR_SW_64KB_Z_X
) &&
280 ((pIn
->swizzleMode
!= ADDR_SW_VAR_Z_X
) || (m_blockVarSizeLog2
== 0))))
282 ret
= ADDR_INVALIDPARAMS
;
287 const UINT_32 metaBlkSize
= GetMetaBlkSize(Gfx10DataFmask
,
295 pOut
->pitch
= PowTwoAlign(pIn
->unalignedWidth
, metaBlk
.w
);
296 pOut
->height
= PowTwoAlign(pIn
->unalignedHeight
, metaBlk
.h
);
297 pOut
->baseAlign
= metaBlkSize
;
298 pOut
->metaBlkWidth
= metaBlk
.w
;
299 pOut
->metaBlkHeight
= metaBlk
.h
;
301 if (pIn
->numMipLevels
> 1)
303 ADDR_ASSERT(pIn
->firstMipIdInTail
<= pIn
->numMipLevels
);
305 UINT_32 metaBlkPerSlice
= (pIn
->firstMipIdInTail
== pIn
->numMipLevels
) ? 0 : 1;
307 for (INT_32 i
= static_cast<INT_32
>(pIn
->firstMipIdInTail
) - 1; i
>= 0; i
--)
309 UINT_32 mipWidth
, mipHeight
;
311 GetMipSize(pIn
->unalignedWidth
, pIn
->unalignedHeight
, 1, i
, &mipWidth
, &mipHeight
);
313 mipWidth
= PowTwoAlign(mipWidth
, metaBlk
.w
);
314 mipHeight
= PowTwoAlign(mipHeight
, metaBlk
.h
);
316 const UINT_32 pitchInM
= mipWidth
/ metaBlk
.w
;
317 const UINT_32 heightInM
= mipHeight
/ metaBlk
.h
;
319 if (pOut
->pMipInfo
!= NULL
)
321 pOut
->pMipInfo
[i
].inMiptail
= FALSE
;
322 pOut
->pMipInfo
[i
].offset
= metaBlkPerSlice
* metaBlkSize
;
323 pOut
->pMipInfo
[i
].sliceSize
= pitchInM
* heightInM
* metaBlkSize
;
326 metaBlkPerSlice
+= pitchInM
* heightInM
;
329 pOut
->metaBlkNumPerSlice
= metaBlkPerSlice
;
331 if (pOut
->pMipInfo
!= NULL
)
333 for (UINT_32 i
= pIn
->firstMipIdInTail
; i
< pIn
->numMipLevels
; i
++)
335 pOut
->pMipInfo
[i
].inMiptail
= TRUE
;
336 pOut
->pMipInfo
[i
].offset
= 0;
337 pOut
->pMipInfo
[i
].sliceSize
= 0;
340 if (pIn
->firstMipIdInTail
!= pIn
->numMipLevels
)
342 pOut
->pMipInfo
[pIn
->firstMipIdInTail
].sliceSize
= metaBlkSize
;
348 const UINT_32 pitchInM
= pOut
->pitch
/ metaBlk
.w
;
349 const UINT_32 heightInM
= pOut
->height
/ metaBlk
.h
;
351 pOut
->metaBlkNumPerSlice
= pitchInM
* heightInM
;
353 if (pOut
->pMipInfo
!= NULL
)
355 pOut
->pMipInfo
[0].inMiptail
= FALSE
;
356 pOut
->pMipInfo
[0].offset
= 0;
357 pOut
->pMipInfo
[0].sliceSize
= pOut
->metaBlkNumPerSlice
* metaBlkSize
;
361 pOut
->sliceSize
= pOut
->metaBlkNumPerSlice
* metaBlkSize
;
362 pOut
->cmaskBytes
= pOut
->sliceSize
* pIn
->numSlices
;
369 ************************************************************************************************************************
370 * Gfx10Lib::HwlComputeDccInfo
373 * Interface function to compute DCC key info
377 ************************************************************************************************************************
379 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeDccInfo(
380 const ADDR2_COMPUTE_DCCINFO_INPUT
* pIn
, ///< [in] input structure
381 ADDR2_COMPUTE_DCCINFO_OUTPUT
* pOut
///< [out] output structure
384 ADDR_E_RETURNCODE ret
= ADDR_OK
;
386 if (pIn
->swizzleMode
!= ADDR_SW_64KB_Z_X
&& pIn
->swizzleMode
!= ADDR_SW_64KB_R_X
)
388 // Hardware does not support DCC for this swizzle mode.
389 ret
= ADDR_INVALIDPARAMS
;
391 else if (m_settings
.dccUnsup3DSwDis
&& IsTex3d(pIn
->resourceType
) && IsDisplaySwizzle(pIn
->swizzleMode
))
393 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
394 ret
= ADDR_INVALIDPARAMS
;
398 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
399 ADDR_ASSERT(IsRtOptSwizzle(pIn
->swizzleMode
));
402 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
403 const UINT_32 numFragLog2
= Log2(pIn
->numFrags
);
404 const UINT_32 metaBlkSize
= GetMetaBlkSize(Gfx10DataColor
,
409 pIn
->dccKeyFlags
.pipeAligned
,
411 const BOOL_32 isThick
= IsThick(pIn
->resourceType
, pIn
->swizzleMode
);
413 pOut
->compressBlkWidth
= isThick
? Block256_3d
[elemLog2
].w
: Block256_2d
[elemLog2
].w
;
414 pOut
->compressBlkHeight
= isThick
? Block256_3d
[elemLog2
].h
: Block256_2d
[elemLog2
].h
;
415 pOut
->compressBlkDepth
= isThick
? Block256_3d
[elemLog2
].d
: 1;
417 pOut
->dccRamBaseAlign
= metaBlkSize
;
418 pOut
->metaBlkWidth
= metaBlk
.w
;
419 pOut
->metaBlkHeight
= metaBlk
.h
;
420 pOut
->metaBlkDepth
= metaBlk
.d
;
422 pOut
->pitch
= PowTwoAlign(pIn
->unalignedWidth
, metaBlk
.w
);
423 pOut
->height
= PowTwoAlign(pIn
->unalignedHeight
, metaBlk
.h
);
424 pOut
->depth
= PowTwoAlign(pIn
->numSlices
, metaBlk
.d
);
426 if (pIn
->numMipLevels
> 1)
428 ADDR_ASSERT(pIn
->firstMipIdInTail
<= pIn
->numMipLevels
);
430 UINT_32 offset
= (pIn
->firstMipIdInTail
== pIn
->numMipLevels
) ? 0 : metaBlkSize
;
432 for (INT_32 i
= static_cast<INT_32
>(pIn
->firstMipIdInTail
) - 1; i
>= 0; i
--)
434 UINT_32 mipWidth
, mipHeight
;
436 GetMipSize(pIn
->unalignedWidth
, pIn
->unalignedHeight
, 1, i
, &mipWidth
, &mipHeight
);
438 mipWidth
= PowTwoAlign(mipWidth
, metaBlk
.w
);
439 mipHeight
= PowTwoAlign(mipHeight
, metaBlk
.h
);
441 const UINT_32 pitchInM
= mipWidth
/ metaBlk
.w
;
442 const UINT_32 heightInM
= mipHeight
/ metaBlk
.h
;
443 const UINT_32 mipSliceSize
= pitchInM
* heightInM
* metaBlkSize
;
445 if (pOut
->pMipInfo
!= NULL
)
447 pOut
->pMipInfo
[i
].inMiptail
= FALSE
;
448 pOut
->pMipInfo
[i
].offset
= offset
;
449 pOut
->pMipInfo
[i
].sliceSize
= mipSliceSize
;
452 offset
+= mipSliceSize
;
455 pOut
->dccRamSliceSize
= offset
;
456 pOut
->metaBlkNumPerSlice
= offset
/ metaBlkSize
;
457 pOut
->dccRamSize
= pOut
->dccRamSliceSize
* (pOut
->depth
/ metaBlk
.d
);
459 if (pOut
->pMipInfo
!= NULL
)
461 for (UINT_32 i
= pIn
->firstMipIdInTail
; i
< pIn
->numMipLevels
; i
++)
463 pOut
->pMipInfo
[i
].inMiptail
= TRUE
;
464 pOut
->pMipInfo
[i
].offset
= 0;
465 pOut
->pMipInfo
[i
].sliceSize
= 0;
468 if (pIn
->firstMipIdInTail
!= pIn
->numMipLevels
)
470 pOut
->pMipInfo
[pIn
->firstMipIdInTail
].sliceSize
= metaBlkSize
;
476 const UINT_32 pitchInM
= pOut
->pitch
/ metaBlk
.w
;
477 const UINT_32 heightInM
= pOut
->height
/ metaBlk
.h
;
479 pOut
->metaBlkNumPerSlice
= pitchInM
* heightInM
;
480 pOut
->dccRamSliceSize
= pOut
->metaBlkNumPerSlice
* metaBlkSize
;
481 pOut
->dccRamSize
= pOut
->dccRamSliceSize
* (pOut
->depth
/ metaBlk
.d
);
483 if (pOut
->pMipInfo
!= NULL
)
485 pOut
->pMipInfo
[0].inMiptail
= FALSE
;
486 pOut
->pMipInfo
[0].offset
= 0;
487 pOut
->pMipInfo
[0].sliceSize
= pOut
->dccRamSliceSize
;
496 ************************************************************************************************************************
497 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
500 * Interface function stub of AddrComputeCmaskAddrFromCoord
504 ************************************************************************************************************************
506 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeCmaskAddrFromCoord(
507 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
508 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
510 // Only support pipe aligned CMask
511 ADDR_ASSERT(pIn
->cMaskFlags
.pipeAligned
== TRUE
);
513 ADDR2_COMPUTE_CMASK_INFO_INPUT input
= {};
514 input
.size
= sizeof(input
);
515 input
.cMaskFlags
= pIn
->cMaskFlags
;
516 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
517 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
518 input
.numSlices
= Max(pIn
->numSlices
, 1u);
519 input
.swizzleMode
= pIn
->swizzleMode
;
520 input
.resourceType
= pIn
->resourceType
;
522 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output
= {};
523 output
.size
= sizeof(output
);
525 ADDR_E_RETURNCODE returnCode
= ComputeCmaskInfo(&input
, &output
);
527 if (returnCode
== ADDR_OK
)
529 const UINT_32 fmaskBpp
= GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
);
530 const UINT_32 fmaskElemLog2
= Log2(fmaskBpp
>> 3);
531 const UINT_32 pipeMask
= (1 << m_pipesLog2
) - 1;
532 const UINT_32 index
= m_xmaskBaseIndex
+ fmaskElemLog2
;
533 const UINT_8
* patIdxTable
= (pIn
->swizzleMode
== ADDR_SW_VAR_Z_X
) ? CMASK_VAR_RBPLUS_PATIDX
:
534 (m_settings
.supportRbPlus
? CMASK_64K_RBPLUS_PATIDX
: CMASK_64K_PATIDX
);
536 const UINT_32 blkSizeLog2
= Log2(output
.metaBlkWidth
) + Log2(output
.metaBlkHeight
) - 7;
537 const UINT_32 blkMask
= (1 << blkSizeLog2
) - 1;
538 const UINT_32 blkOffset
= ComputeOffsetFromSwizzlePattern(CMASK_SW_PATTERN
[patIdxTable
[index
]],
539 blkSizeLog2
+ 1, // +1 for nibble offset
544 const UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
545 const UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
546 const UINT_32 pb
= output
.pitch
/ output
.metaBlkWidth
;
547 const UINT_32 blkIndex
= (yb
* pb
) + xb
;
548 const UINT_32 pipeXor
= ((pIn
->pipeXor
& pipeMask
) << m_pipeInterleaveLog2
) & blkMask
;
550 pOut
->addr
= (output
.sliceSize
* pIn
->slice
) +
551 (blkIndex
* (1 << blkSizeLog2
)) +
552 ((blkOffset
>> 1) ^ pipeXor
);
553 pOut
->bitPosition
= (blkOffset
& 1) << 2;
560 ************************************************************************************************************************
561 * Gfx10Lib::HwlComputeHtileAddrFromCoord
564 * Interface function stub of AddrComputeHtileAddrFromCoord
568 ************************************************************************************************************************
570 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeHtileAddrFromCoord(
571 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
572 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
574 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
576 if (pIn
->numMipLevels
> 1)
578 returnCode
= ADDR_NOTIMPLEMENTED
;
582 ADDR2_COMPUTE_HTILE_INFO_INPUT input
= {0};
583 input
.size
= sizeof(input
);
584 input
.hTileFlags
= pIn
->hTileFlags
;
585 input
.depthFlags
= pIn
->depthflags
;
586 input
.swizzleMode
= pIn
->swizzleMode
;
587 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
588 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
589 input
.numSlices
= Max(pIn
->numSlices
, 1u);
590 input
.numMipLevels
= 1;
592 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output
= {0};
593 output
.size
= sizeof(output
);
595 returnCode
= ComputeHtileInfo(&input
, &output
);
597 if (returnCode
== ADDR_OK
)
599 const UINT_32 numSampleLog2
= Log2(pIn
->numSamples
);
600 const UINT_32 pipeMask
= (1 << m_pipesLog2
) - 1;
601 const UINT_32 index
= m_xmaskBaseIndex
+ numSampleLog2
;
602 const UINT_8
* patIdxTable
= m_settings
.supportRbPlus
? HTILE_RBPLUS_PATIDX
: HTILE_PATIDX
;
604 const UINT_32 blkSizeLog2
= Log2(output
.metaBlkWidth
) + Log2(output
.metaBlkHeight
) - 4;
605 const UINT_32 blkMask
= (1 << blkSizeLog2
) - 1;
606 const UINT_32 blkOffset
= ComputeOffsetFromSwizzlePattern(HTILE_SW_PATTERN
[patIdxTable
[index
]],
607 blkSizeLog2
+ 1, // +1 for nibble offset
612 const UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
613 const UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
614 const UINT_32 pb
= output
.pitch
/ output
.metaBlkWidth
;
615 const UINT_32 blkIndex
= (yb
* pb
) + xb
;
616 const UINT_32 pipeXor
= ((pIn
->pipeXor
& pipeMask
) << m_pipeInterleaveLog2
) & blkMask
;
618 pOut
->addr
= (static_cast<UINT_64
>(output
.sliceSize
) * pIn
->slice
) +
619 (blkIndex
* (1 << blkSizeLog2
)) +
620 ((blkOffset
>> 1) ^ pipeXor
);
628 ************************************************************************************************************************
629 * Gfx10Lib::HwlComputeHtileCoordFromAddr
632 * Interface function stub of AddrComputeHtileCoordFromAddr
636 ************************************************************************************************************************
638 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeHtileCoordFromAddr(
639 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT
* pIn
, ///< [in] input structure
640 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
* pOut
) ///< [out] output structure
642 ADDR_NOT_IMPLEMENTED();
648 ************************************************************************************************************************
649 * Gfx10Lib::HwlComputeDccAddrFromCoord
652 * Interface function stub of AddrComputeDccAddrFromCoord
656 ************************************************************************************************************************
658 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeDccAddrFromCoord(
659 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
660 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
662 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
664 if ((pIn
->resourceType
!= ADDR_RSRC_TEX_2D
) ||
665 (pIn
->swizzleMode
!= ADDR_SW_64KB_R_X
) ||
666 (pIn
->dccKeyFlags
.linear
== TRUE
) ||
667 (pIn
->numFrags
> 1) ||
668 (pIn
->numMipLevels
> 1) ||
671 returnCode
= ADDR_NOTSUPPORTED
;
675 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
676 const UINT_32 numPipeLog2
= m_pipesLog2
;
677 const UINT_32 pipeMask
= (1 << numPipeLog2
) - 1;
678 UINT_32 index
= m_dccBaseIndex
+ elemLog2
;
679 const UINT_8
* patIdxTable
;
681 if (m_settings
.supportRbPlus
)
683 patIdxTable
= DCC_64K_R_X_RBPLUS_PATIDX
;
685 if (pIn
->dccKeyFlags
.pipeAligned
)
687 index
+= MaxNumOfBpp
;
689 if (m_numPkrLog2
< 2)
691 index
+= m_pipesLog2
* MaxNumOfBpp
;
695 // 4 groups for "m_numPkrLog2 < 2" case
696 index
+= 4 * MaxNumOfBpp
;
698 const UINT_32 dccPipePerPkr
= 3;
700 index
+= (m_numPkrLog2
- 2) * dccPipePerPkr
* MaxNumOfBpp
+
701 (m_pipesLog2
- m_numPkrLog2
) * MaxNumOfBpp
;
707 patIdxTable
= DCC_64K_R_X_PATIDX
;
709 if (pIn
->dccKeyFlags
.pipeAligned
)
711 index
+= (numPipeLog2
+ UnalignedDccType
) * MaxNumOfBpp
;
715 index
+= Min(numPipeLog2
, UnalignedDccType
- 1) * MaxNumOfBpp
;
719 const UINT_32 blkSizeLog2
= Log2(pIn
->metaBlkWidth
) + Log2(pIn
->metaBlkHeight
) + elemLog2
- 8;
720 const UINT_32 blkMask
= (1 << blkSizeLog2
) - 1;
721 const UINT_32 blkOffset
= ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN
[patIdxTable
[index
]],
722 blkSizeLog2
+ 1, // +1 for nibble offset
727 const UINT_32 xb
= pIn
->x
/ pIn
->metaBlkWidth
;
728 const UINT_32 yb
= pIn
->y
/ pIn
->metaBlkHeight
;
729 const UINT_32 pb
= pIn
->pitch
/ pIn
->metaBlkWidth
;
730 const UINT_32 blkIndex
= (yb
* pb
) + xb
;
731 const UINT_32 pipeXor
= ((pIn
->pipeXor
& pipeMask
) << m_pipeInterleaveLog2
) & blkMask
;
733 pOut
->addr
= (static_cast<UINT_64
>(pIn
->dccRamSliceSize
) * pIn
->slice
) +
734 (blkIndex
* (1 << blkSizeLog2
)) +
735 ((blkOffset
>> 1) ^ pipeXor
);
742 ************************************************************************************************************************
743 * Gfx10Lib::HwlInitGlobalParams
746 * Initializes global parameters
749 * TRUE if all settings are valid
751 ************************************************************************************************************************
753 BOOL_32
Gfx10Lib::HwlInitGlobalParams(
754 const ADDR_CREATE_INPUT
* pCreateIn
) ///< [in] create input
756 BOOL_32 valid
= TRUE
;
757 GB_ADDR_CONFIG_gfx10 gbAddrConfig
;
759 gbAddrConfig
.u32All
= pCreateIn
->regValue
.gbAddrConfig
;
761 // These values are copied from CModel code
762 switch (gbAddrConfig
.bits
.NUM_PIPES
)
764 case ADDR_CONFIG_1_PIPE
:
768 case ADDR_CONFIG_2_PIPE
:
772 case ADDR_CONFIG_4_PIPE
:
776 case ADDR_CONFIG_8_PIPE
:
780 case ADDR_CONFIG_16_PIPE
:
784 case ADDR_CONFIG_32_PIPE
:
788 case ADDR_CONFIG_64_PIPE
:
793 ADDR_ASSERT_ALWAYS();
798 switch (gbAddrConfig
.bits
.PIPE_INTERLEAVE_SIZE
)
800 case ADDR_CONFIG_PIPE_INTERLEAVE_256B
:
801 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_256B
;
802 m_pipeInterleaveLog2
= 8;
804 case ADDR_CONFIG_PIPE_INTERLEAVE_512B
:
805 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_512B
;
806 m_pipeInterleaveLog2
= 9;
808 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB
:
809 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_1KB
;
810 m_pipeInterleaveLog2
= 10;
812 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB
:
813 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_2KB
;
814 m_pipeInterleaveLog2
= 11;
817 ADDR_ASSERT_ALWAYS();
822 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
823 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
824 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
825 ADDR_ASSERT(m_pipeInterleaveBytes
== ADDR_PIPEINTERLEAVE_256B
);
827 switch (gbAddrConfig
.bits
.MAX_COMPRESSED_FRAGS
)
829 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS
:
831 m_maxCompFragLog2
= 0;
833 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS
:
835 m_maxCompFragLog2
= 1;
837 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS
:
839 m_maxCompFragLog2
= 2;
841 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS
:
843 m_maxCompFragLog2
= 3;
846 ADDR_ASSERT_ALWAYS();
852 // Skip unaligned case
853 m_xmaskBaseIndex
+= MaxNumOfAA
;
855 m_xmaskBaseIndex
+= m_pipesLog2
* MaxNumOfAA
;
856 m_colorBaseIndex
+= m_pipesLog2
* MaxNumOfBpp
;
858 if (m_settings
.supportRbPlus
)
860 m_numPkrLog2
= gbAddrConfig
.bits
.NUM_PKRS
;
861 m_numSaLog2
= (m_numPkrLog2
> 0) ? (m_numPkrLog2
- 1) : 0;
863 ADDR_ASSERT((m_numPkrLog2
<= m_pipesLog2
) && ((m_pipesLog2
- m_numPkrLog2
) <= 2));
865 ADDR_C_ASSERT(sizeof(HTILE_RBPLUS_PATIDX
) / sizeof(HTILE_RBPLUS_PATIDX
[0]) ==
866 sizeof(CMASK_64K_RBPLUS_PATIDX
) / sizeof(CMASK_64K_RBPLUS_PATIDX
[0]));
868 if (m_numPkrLog2
>= 2)
870 m_colorBaseIndex
+= (2 * m_numPkrLog2
- 2) * MaxNumOfBpp
;
871 m_xmaskBaseIndex
+= (m_numPkrLog2
- 1) * 3 * MaxNumOfAA
;
876 const UINT_32 numPipeType
= static_cast<UINT_32
>(ADDR_CONFIG_64_PIPE
) -
877 static_cast<UINT_32
>(ADDR_CONFIG_1_PIPE
) +
880 ADDR_C_ASSERT(sizeof(HTILE_PATIDX
) / sizeof(HTILE_PATIDX
[0]) == (numPipeType
+ 1) * MaxNumOfAA
);
882 ADDR_C_ASSERT(sizeof(HTILE_PATIDX
) / sizeof(HTILE_PATIDX
[0]) ==
883 sizeof(CMASK_64K_PATIDX
) / sizeof(CMASK_64K_PATIDX
[0]));
887 if (m_settings
.supportRbPlus
)
889 // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
890 // corresponding SW_64KB_* mode
891 m_blockVarSizeLog2
= m_pipesLog2
+ 14;
903 ************************************************************************************************************************
904 * Gfx10Lib::HwlConvertChipFamily
907 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
910 ************************************************************************************************************************
912 ChipFamily
Gfx10Lib::HwlConvertChipFamily(
913 UINT_32 chipFamily
, ///< [in] chip family defined in atiih.h
914 UINT_32 chipRevision
) ///< [in] chip revision defined in "asic_family"_id.h
916 ChipFamily family
= ADDR_CHIP_FAMILY_NAVI
;
918 m_settings
.dccUnsup3DSwDis
= 1;
923 m_settings
.isDcn2
= 1;
925 if (ASICREV_IS_SIENNA_CICHLID(chipRevision
))
927 m_settings
.supportRbPlus
= 1;
928 m_settings
.dccUnsup3DSwDis
= 0;
931 if (ASICREV_IS_NAVY_FLOUNDER(chipRevision
))
933 m_settings
.supportRbPlus
= 1;
934 m_settings
.dccUnsup3DSwDis
= 0;
938 ADDR_ASSERT(!"Unknown chip family");
942 m_settings
.dsMipmapHtileFix
= 1;
944 if (ASICREV_IS_NAVI10_P(chipRevision
))
946 m_settings
.dsMipmapHtileFix
= 0;
949 m_configFlags
.use32bppFor422Fmt
= TRUE
;
955 ************************************************************************************************************************
956 * Gfx10Lib::GetBlk256SizeLog2
963 ************************************************************************************************************************
965 void Gfx10Lib::GetBlk256SizeLog2(
966 AddrResourceType resourceType
, ///< [in] Resource type
967 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
968 UINT_32 elemLog2
, ///< [in] element size log2
969 UINT_32 numSamplesLog2
, ///< [in] number of samples
970 Dim3d
* pBlock
///< [out] block size
973 if (IsThin(resourceType
, swizzleMode
))
975 UINT_32 blockBits
= 8 - elemLog2
;
977 if (IsZOrderSwizzle(swizzleMode
))
979 blockBits
-= numSamplesLog2
;
982 pBlock
->w
= (blockBits
>> 1) + (blockBits
& 1);
983 pBlock
->h
= (blockBits
>> 1);
988 ADDR_ASSERT(IsThick(resourceType
, swizzleMode
));
990 UINT_32 blockBits
= 8 - elemLog2
;
992 pBlock
->d
= (blockBits
/ 3) + (((blockBits
% 3) > 0) ? 1 : 0);
993 pBlock
->w
= (blockBits
/ 3) + (((blockBits
% 3) > 1) ? 1 : 0);
994 pBlock
->h
= (blockBits
/ 3);
999 ************************************************************************************************************************
1000 * Gfx10Lib::GetCompressedBlockSizeLog2
1003 * Get compress block size
1007 ************************************************************************************************************************
1009 void Gfx10Lib::GetCompressedBlockSizeLog2(
1010 Gfx10DataType dataType
, ///< [in] Data type
1011 AddrResourceType resourceType
, ///< [in] Resource type
1012 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
1013 UINT_32 elemLog2
, ///< [in] element size log2
1014 UINT_32 numSamplesLog2
, ///< [in] number of samples
1015 Dim3d
* pBlock
///< [out] block size
1018 if (dataType
== Gfx10DataColor
)
1020 GetBlk256SizeLog2(resourceType
, swizzleMode
, elemLog2
, numSamplesLog2
, pBlock
);
1024 ADDR_ASSERT((dataType
== Gfx10DataDepthStencil
) || (dataType
== Gfx10DataFmask
));
1032 ************************************************************************************************************************
1033 * Gfx10Lib::GetMetaOverlapLog2
1036 * Get meta block overlap
1040 ************************************************************************************************************************
1042 INT_32
Gfx10Lib::GetMetaOverlapLog2(
1043 Gfx10DataType dataType
, ///< [in] Data type
1044 AddrResourceType resourceType
, ///< [in] Resource type
1045 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
1046 UINT_32 elemLog2
, ///< [in] element size log2
1047 UINT_32 numSamplesLog2
///< [in] number of samples
1053 GetCompressedBlockSizeLog2(dataType
, resourceType
, swizzleMode
, elemLog2
, numSamplesLog2
, &compBlock
);
1054 GetBlk256SizeLog2(resourceType
, swizzleMode
, elemLog2
, numSamplesLog2
, µBlock
);
1056 const INT_32 compSizeLog2
= compBlock
.w
+ compBlock
.h
+ compBlock
.d
;
1057 const INT_32 blk256SizeLog2
= microBlock
.w
+ microBlock
.h
+ microBlock
.d
;
1058 const INT_32 maxSizeLog2
= Max(compSizeLog2
, blk256SizeLog2
);
1059 const INT_32 numPipesLog2
= GetEffectiveNumPipes();
1060 INT_32 overlap
= numPipesLog2
- maxSizeLog2
;
1062 if ((numPipesLog2
> 1) && m_settings
.supportRbPlus
)
1067 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1068 if ((elemLog2
== 4) && (numSamplesLog2
== 3))
1072 overlap
= Max(overlap
, 0);
1077 ************************************************************************************************************************
1078 * Gfx10Lib::Get3DMetaOverlapLog2
1081 * Get 3d meta block overlap
1085 ************************************************************************************************************************
1087 INT_32
Gfx10Lib::Get3DMetaOverlapLog2(
1088 AddrResourceType resourceType
, ///< [in] Resource type
1089 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
1090 UINT_32 elemLog2
///< [in] element size log2
1094 GetBlk256SizeLog2(resourceType
, swizzleMode
, elemLog2
, 0, µBlock
);
1096 INT_32 overlap
= GetEffectiveNumPipes() - static_cast<INT_32
>(microBlock
.w
);
1098 if (m_settings
.supportRbPlus
)
1103 if ((overlap
< 0) || (IsStandardSwizzle(resourceType
, swizzleMode
) == TRUE
))
1111 ************************************************************************************************************************
1112 * Gfx10Lib::GetPipeRotateAmount
1115 * Get pipe rotate amount
1118 * Pipe rotate amount
1119 ************************************************************************************************************************
1122 INT_32
Gfx10Lib::GetPipeRotateAmount(
1123 AddrResourceType resourceType
, ///< [in] Resource type
1124 AddrSwizzleMode swizzleMode
///< [in] Swizzle mode
1129 if (m_settings
.supportRbPlus
&& (m_pipesLog2
>= (m_numSaLog2
+ 1)) && (m_pipesLog2
> 1))
1131 amount
= ((m_pipesLog2
== (m_numSaLog2
+ 1)) && IsRbAligned(resourceType
, swizzleMode
)) ?
1132 1 : m_pipesLog2
- (m_numSaLog2
+ 1);
1139 ************************************************************************************************************************
1140 * Gfx10Lib::GetMetaBlkSize
1143 * Get metadata block size
1147 ************************************************************************************************************************
1149 UINT_32
Gfx10Lib::GetMetaBlkSize(
1150 Gfx10DataType dataType
, ///< [in] Data type
1151 AddrResourceType resourceType
, ///< [in] Resource type
1152 AddrSwizzleMode swizzleMode
, ///< [in] Swizzle mode
1153 UINT_32 elemLog2
, ///< [in] element size log2
1154 UINT_32 numSamplesLog2
, ///< [in] number of samples
1155 BOOL_32 pipeAlign
, ///< [in] pipe align
1156 Dim3d
* pBlock
///< [out] block size
1159 INT_32 metablkSizeLog2
;
1160 const INT_32 metaElemSizeLog2
= GetMetaElementSizeLog2(dataType
);
1161 const INT_32 metaCacheSizeLog2
= GetMetaCacheSizeLog2(dataType
);
1162 const INT_32 compBlkSizeLog2
= (dataType
== Gfx10DataColor
) ? 8 : 6 + numSamplesLog2
+ elemLog2
;
1163 const INT_32 metaBlkSamplesLog2
= (dataType
== Gfx10DataDepthStencil
) ?
1164 numSamplesLog2
: Min(numSamplesLog2
, m_maxCompFragLog2
);
1165 const INT_32 dataBlkSizeLog2
= GetBlockSizeLog2(swizzleMode
);
1166 INT_32 numPipesLog2
= m_pipesLog2
;
1168 if (IsThin(resourceType
, swizzleMode
))
1170 if ((pipeAlign
== FALSE
) ||
1171 (IsStandardSwizzle(resourceType
, swizzleMode
) == TRUE
) ||
1172 (IsDisplaySwizzle(resourceType
, swizzleMode
) == TRUE
))
1176 metablkSizeLog2
= Max(static_cast<INT_32
>(m_pipeInterleaveLog2
) + numPipesLog2
, 12);
1177 metablkSizeLog2
= Min(metablkSizeLog2
, dataBlkSizeLog2
);
1181 metablkSizeLog2
= Min(dataBlkSizeLog2
, 12);
1186 if (m_settings
.supportRbPlus
&& (m_pipesLog2
== m_numSaLog2
+ 1) && (m_pipesLog2
> 1))
1191 INT_32 pipeRotateLog2
= GetPipeRotateAmount(resourceType
, swizzleMode
);
1193 if (numPipesLog2
>= 4)
1195 INT_32 overlapLog2
= GetMetaOverlapLog2(dataType
, resourceType
, swizzleMode
, elemLog2
, numSamplesLog2
);
1197 // In 16Bpe 8xaa, we have an extra overlap bit
1198 if ((pipeRotateLog2
> 0) &&
1200 (numSamplesLog2
== 3) &&
1201 (IsZOrderSwizzle(swizzleMode
) || (GetEffectiveNumPipes() > 3)))
1206 metablkSizeLog2
= metaCacheSizeLog2
+ overlapLog2
+ numPipesLog2
;
1207 metablkSizeLog2
= Max(metablkSizeLog2
, static_cast<INT_32
>(m_pipeInterleaveLog2
) + numPipesLog2
);
1209 if (m_settings
.supportRbPlus
&&
1210 IsRtOptSwizzle(swizzleMode
) &&
1211 (numPipesLog2
== 6) &&
1212 (numSamplesLog2
== 3) &&
1213 (m_maxCompFragLog2
== 3) &&
1214 (metablkSizeLog2
< 15))
1216 metablkSizeLog2
= 15;
1221 metablkSizeLog2
= Max(static_cast<INT_32
>(m_pipeInterleaveLog2
) + numPipesLog2
, 12);
1224 if (dataType
== Gfx10DataDepthStencil
)
1226 // For htile surfaces, pad meta block size to 2K * num_pipes
1227 metablkSizeLog2
= Max(metablkSizeLog2
, 11 + numPipesLog2
);
1230 const INT_32 compFragLog2
= Min(m_maxCompFragLog2
, numSamplesLog2
);
1232 if (IsRtOptSwizzle(swizzleMode
) && (compFragLog2
> 1) && (pipeRotateLog2
>= 1))
1234 const INT_32 tmp
= 8 + m_pipesLog2
+ Max(pipeRotateLog2
, compFragLog2
- 1);
1236 metablkSizeLog2
= Max(metablkSizeLog2
, tmp
);
1240 const INT_32 metablkBitsLog2
=
1241 metablkSizeLog2
+ compBlkSizeLog2
- elemLog2
- metaBlkSamplesLog2
- metaElemSizeLog2
;
1242 pBlock
->w
= 1 << ((metablkBitsLog2
>> 1) + (metablkBitsLog2
& 1));
1243 pBlock
->h
= 1 << (metablkBitsLog2
>> 1);
1248 ADDR_ASSERT(IsThick(resourceType
, swizzleMode
));
1252 if (m_settings
.supportRbPlus
&&
1253 (m_pipesLog2
== m_numSaLog2
+ 1) &&
1254 (m_pipesLog2
> 1) &&
1255 IsRbAligned(resourceType
, swizzleMode
))
1260 const INT_32 overlapLog2
= Get3DMetaOverlapLog2(resourceType
, swizzleMode
, elemLog2
);
1262 metablkSizeLog2
= metaCacheSizeLog2
+ overlapLog2
+ numPipesLog2
;
1263 metablkSizeLog2
= Max(metablkSizeLog2
, static_cast<INT_32
>(m_pipeInterleaveLog2
) + numPipesLog2
);
1264 metablkSizeLog2
= Max(metablkSizeLog2
, 12);
1268 metablkSizeLog2
= 12;
1271 const INT_32 metablkBitsLog2
=
1272 metablkSizeLog2
+ compBlkSizeLog2
- elemLog2
- metaBlkSamplesLog2
- metaElemSizeLog2
;
1273 pBlock
->w
= 1 << ((metablkBitsLog2
/ 3) + (((metablkBitsLog2
% 3) > 0) ? 1 : 0));
1274 pBlock
->h
= 1 << ((metablkBitsLog2
/ 3) + (((metablkBitsLog2
% 3) > 1) ? 1 : 0));
1275 pBlock
->d
= 1 << (metablkBitsLog2
/ 3);
1278 return (1 << static_cast<UINT_32
>(metablkSizeLog2
));
1282 ************************************************************************************************************************
1283 * Gfx10Lib::ConvertSwizzlePatternToEquation
1286 * Convert swizzle pattern to equation.
1290 ************************************************************************************************************************
1292 VOID
Gfx10Lib::ConvertSwizzlePatternToEquation(
1293 UINT_32 elemLog2
, ///< [in] element bytes log2
1294 AddrResourceType rsrcType
, ///< [in] resource type
1295 AddrSwizzleMode swMode
, ///< [in] swizzle mode
1296 const ADDR_SW_PATINFO
* pPatInfo
, ///< [in] swizzle pattern infor
1297 ADDR_EQUATION
* pEquation
) ///< [out] equation converted from swizzle pattern
1300 ADDR_BIT_SETTING fullSwizzlePattern
[20];
1301 GetSwizzlePatternFromPatternInfo(pPatInfo
, fullSwizzlePattern
);
1303 const ADDR_BIT_SETTING
* pSwizzle
= fullSwizzlePattern
;
1304 const UINT_32 blockSizeLog2
= GetBlockSizeLog2(swMode
);
1306 pEquation
->numBits
= blockSizeLog2
;
1307 pEquation
->stackedDepthSlices
= FALSE
;
1309 for (UINT_32 i
= 0; i
< elemLog2
; i
++)
1311 pEquation
->addr
[i
].channel
= 0;
1312 pEquation
->addr
[i
].valid
= 1;
1313 pEquation
->addr
[i
].index
= i
;
1316 if (IsXor(swMode
) == FALSE
)
1318 for (UINT_32 i
= elemLog2
; i
< blockSizeLog2
; i
++)
1320 ADDR_ASSERT(IsPow2(pSwizzle
[i
].value
));
1322 if (pSwizzle
[i
].x
!= 0)
1324 ADDR_ASSERT(IsPow2(static_cast<UINT_32
>(pSwizzle
[i
].x
)));
1326 pEquation
->addr
[i
].channel
= 0;
1327 pEquation
->addr
[i
].valid
= 1;
1328 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].x
) + elemLog2
;
1330 else if (pSwizzle
[i
].y
!= 0)
1332 ADDR_ASSERT(IsPow2(static_cast<UINT_32
>(pSwizzle
[i
].y
)));
1334 pEquation
->addr
[i
].channel
= 1;
1335 pEquation
->addr
[i
].valid
= 1;
1336 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].y
);
1340 ADDR_ASSERT(pSwizzle
[i
].z
!= 0);
1341 ADDR_ASSERT(IsPow2(static_cast<UINT_32
>(pSwizzle
[i
].z
)));
1343 pEquation
->addr
[i
].channel
= 2;
1344 pEquation
->addr
[i
].valid
= 1;
1345 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].z
);
1348 pEquation
->xor1
[i
].value
= 0;
1349 pEquation
->xor2
[i
].value
= 0;
1352 else if (IsThin(rsrcType
, swMode
))
1355 ComputeThinBlockDimension(&dim
.w
, &dim
.h
, &dim
.d
, 8u << elemLog2
, 0, rsrcType
, swMode
);
1357 const UINT_32 blkXLog2
= Log2(dim
.w
);
1358 const UINT_32 blkYLog2
= Log2(dim
.h
);
1359 const UINT_32 blkXMask
= dim
.w
- 1;
1360 const UINT_32 blkYMask
= dim
.h
- 1;
1362 ADDR_BIT_SETTING swizzle
[ADDR_MAX_EQUATION_BIT
];
1365 UINT_32 bMask
= (1 << elemLog2
) - 1;
1367 for (UINT_32 i
= elemLog2
; i
< blockSizeLog2
; i
++)
1369 if (IsPow2(pSwizzle
[i
].value
))
1371 if (pSwizzle
[i
].x
!= 0)
1373 ADDR_ASSERT((xMask
& pSwizzle
[i
].x
) == 0);
1374 xMask
|= pSwizzle
[i
].x
;
1376 const UINT_32 xLog2
= Log2(pSwizzle
[i
].x
);
1378 ADDR_ASSERT(xLog2
< blkXLog2
);
1380 pEquation
->addr
[i
].channel
= 0;
1381 pEquation
->addr
[i
].valid
= 1;
1382 pEquation
->addr
[i
].index
= xLog2
+ elemLog2
;
1386 ADDR_ASSERT(pSwizzle
[i
].y
!= 0);
1387 ADDR_ASSERT((yMask
& pSwizzle
[i
].y
) == 0);
1388 yMask
|= pSwizzle
[i
].y
;
1390 pEquation
->addr
[i
].channel
= 1;
1391 pEquation
->addr
[i
].valid
= 1;
1392 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].y
);
1394 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkYLog2
);
1397 swizzle
[i
].value
= 0;
1402 if (pSwizzle
[i
].z
!= 0)
1404 ADDR_ASSERT(IsPow2(static_cast<UINT_32
>(pSwizzle
[i
].z
)));
1406 pEquation
->xor2
[i
].channel
= 2;
1407 pEquation
->xor2
[i
].valid
= 1;
1408 pEquation
->xor2
[i
].index
= Log2(pSwizzle
[i
].z
);
1411 swizzle
[i
].x
= pSwizzle
[i
].x
;
1412 swizzle
[i
].y
= pSwizzle
[i
].y
;
1413 swizzle
[i
].z
= swizzle
[i
].s
= 0;
1415 ADDR_ASSERT(IsPow2(swizzle
[i
].value
) == FALSE
);
1417 const UINT_32 xHi
= swizzle
[i
].x
& (~blkXMask
);
1421 ADDR_ASSERT(IsPow2(xHi
));
1422 ADDR_ASSERT(pEquation
->xor1
[i
].value
== 0);
1424 pEquation
->xor1
[i
].channel
= 0;
1425 pEquation
->xor1
[i
].valid
= 1;
1426 pEquation
->xor1
[i
].index
= Log2(xHi
) + elemLog2
;
1428 swizzle
[i
].x
&= blkXMask
;
1431 const UINT_32 yHi
= swizzle
[i
].y
& (~blkYMask
);
1435 ADDR_ASSERT(IsPow2(yHi
));
1439 ADDR_ASSERT(pEquation
->xor1
[i
].value
== 0);
1440 pEquation
->xor1
[i
].channel
= 1;
1441 pEquation
->xor1
[i
].valid
= 1;
1442 pEquation
->xor1
[i
].index
= Log2(yHi
);
1446 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1447 pEquation
->xor2
[i
].channel
= 1;
1448 pEquation
->xor2
[i
].valid
= 1;
1449 pEquation
->xor2
[i
].index
= Log2(yHi
);
1452 swizzle
[i
].y
&= blkYMask
;
1455 if (swizzle
[i
].value
== 0)
1462 const UINT_32 pipeIntMask
= (1 << m_pipeInterleaveLog2
) - 1;
1463 const UINT_32 blockMask
= (1 << blockSizeLog2
) - 1;
1465 ADDR_ASSERT((bMask
& pipeIntMask
) == pipeIntMask
);
1467 while (bMask
!= blockMask
)
1469 for (UINT_32 i
= m_pipeInterleaveLog2
; i
< blockSizeLog2
; i
++)
1471 if ((bMask
& (1 << i
)) == 0)
1473 if (IsPow2(swizzle
[i
].value
))
1475 if (swizzle
[i
].x
!= 0)
1477 ADDR_ASSERT((xMask
& swizzle
[i
].x
) == 0);
1478 xMask
|= swizzle
[i
].x
;
1480 const UINT_32 xLog2
= Log2(swizzle
[i
].x
);
1482 ADDR_ASSERT(xLog2
< blkXLog2
);
1484 pEquation
->addr
[i
].channel
= 0;
1485 pEquation
->addr
[i
].valid
= 1;
1486 pEquation
->addr
[i
].index
= xLog2
+ elemLog2
;
1490 ADDR_ASSERT(swizzle
[i
].y
!= 0);
1491 ADDR_ASSERT((yMask
& swizzle
[i
].y
) == 0);
1492 yMask
|= swizzle
[i
].y
;
1494 pEquation
->addr
[i
].channel
= 1;
1495 pEquation
->addr
[i
].valid
= 1;
1496 pEquation
->addr
[i
].index
= Log2(swizzle
[i
].y
);
1498 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkYLog2
);
1501 swizzle
[i
].value
= 0;
1506 const UINT_32 x
= swizzle
[i
].x
& xMask
;
1507 const UINT_32 y
= swizzle
[i
].y
& yMask
;
1511 ADDR_ASSERT(IsPow2(x
));
1513 if (pEquation
->xor1
[i
].value
== 0)
1515 pEquation
->xor1
[i
].channel
= 0;
1516 pEquation
->xor1
[i
].valid
= 1;
1517 pEquation
->xor1
[i
].index
= Log2(x
) + elemLog2
;
1521 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1522 pEquation
->xor2
[i
].channel
= 0;
1523 pEquation
->xor2
[i
].valid
= 1;
1524 pEquation
->xor2
[i
].index
= Log2(x
) + elemLog2
;
1530 ADDR_ASSERT(IsPow2(y
));
1532 if (pEquation
->xor1
[i
].value
== 0)
1534 pEquation
->xor1
[i
].channel
= 1;
1535 pEquation
->xor1
[i
].valid
= 1;
1536 pEquation
->xor1
[i
].index
= Log2(y
);
1540 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1541 pEquation
->xor2
[i
].channel
= 1;
1542 pEquation
->xor2
[i
].valid
= 1;
1543 pEquation
->xor2
[i
].index
= Log2(y
);
1554 ADDR_ASSERT((xMask
== blkXMask
) && (yMask
== blkYMask
));
1558 const UINT_32 blkXLog2
= (blockSizeLog2
== 12) ? Block4K_Log2_3d
[elemLog2
].w
: Block64K_Log2_3d
[elemLog2
].w
;
1559 const UINT_32 blkYLog2
= (blockSizeLog2
== 12) ? Block4K_Log2_3d
[elemLog2
].h
: Block64K_Log2_3d
[elemLog2
].h
;
1560 const UINT_32 blkZLog2
= (blockSizeLog2
== 12) ? Block4K_Log2_3d
[elemLog2
].d
: Block64K_Log2_3d
[elemLog2
].d
;
1561 const UINT_32 blkXMask
= (1 << blkXLog2
) - 1;
1562 const UINT_32 blkYMask
= (1 << blkYLog2
) - 1;
1563 const UINT_32 blkZMask
= (1 << blkZLog2
) - 1;
1565 ADDR_BIT_SETTING swizzle
[ADDR_MAX_EQUATION_BIT
];
1569 UINT_32 bMask
= (1 << elemLog2
) - 1;
1571 for (UINT_32 i
= elemLog2
; i
< blockSizeLog2
; i
++)
1573 if (IsPow2(pSwizzle
[i
].value
))
1575 if (pSwizzle
[i
].x
!= 0)
1577 ADDR_ASSERT((xMask
& pSwizzle
[i
].x
) == 0);
1578 xMask
|= pSwizzle
[i
].x
;
1580 const UINT_32 xLog2
= Log2(pSwizzle
[i
].x
);
1582 ADDR_ASSERT(xLog2
< blkXLog2
);
1584 pEquation
->addr
[i
].channel
= 0;
1585 pEquation
->addr
[i
].valid
= 1;
1586 pEquation
->addr
[i
].index
= xLog2
+ elemLog2
;
1588 else if (pSwizzle
[i
].y
!= 0)
1590 ADDR_ASSERT((yMask
& pSwizzle
[i
].y
) == 0);
1591 yMask
|= pSwizzle
[i
].y
;
1593 pEquation
->addr
[i
].channel
= 1;
1594 pEquation
->addr
[i
].valid
= 1;
1595 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].y
);
1597 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkYLog2
);
1601 ADDR_ASSERT(pSwizzle
[i
].z
!= 0);
1602 ADDR_ASSERT((zMask
& pSwizzle
[i
].z
) == 0);
1603 zMask
|= pSwizzle
[i
].z
;
1605 pEquation
->addr
[i
].channel
= 2;
1606 pEquation
->addr
[i
].valid
= 1;
1607 pEquation
->addr
[i
].index
= Log2(pSwizzle
[i
].z
);
1609 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkZLog2
);
1612 swizzle
[i
].value
= 0;
1617 swizzle
[i
].x
= pSwizzle
[i
].x
;
1618 swizzle
[i
].y
= pSwizzle
[i
].y
;
1619 swizzle
[i
].z
= pSwizzle
[i
].z
;
1622 ADDR_ASSERT(IsPow2(swizzle
[i
].value
) == FALSE
);
1624 const UINT_32 xHi
= swizzle
[i
].x
& (~blkXMask
);
1625 const UINT_32 yHi
= swizzle
[i
].y
& (~blkYMask
);
1626 const UINT_32 zHi
= swizzle
[i
].z
& (~blkZMask
);
1628 ADDR_ASSERT((xHi
== 0) || (yHi
== 0) || (zHi
== 0));
1632 ADDR_ASSERT(IsPow2(xHi
));
1633 ADDR_ASSERT(pEquation
->xor1
[i
].value
== 0);
1635 pEquation
->xor1
[i
].channel
= 0;
1636 pEquation
->xor1
[i
].valid
= 1;
1637 pEquation
->xor1
[i
].index
= Log2(xHi
) + elemLog2
;
1639 swizzle
[i
].x
&= blkXMask
;
1644 ADDR_ASSERT(IsPow2(yHi
));
1646 if (pEquation
->xor1
[i
].value
== 0)
1648 pEquation
->xor1
[i
].channel
= 1;
1649 pEquation
->xor1
[i
].valid
= 1;
1650 pEquation
->xor1
[i
].index
= Log2(yHi
);
1654 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1655 pEquation
->xor2
[i
].channel
= 1;
1656 pEquation
->xor2
[i
].valid
= 1;
1657 pEquation
->xor2
[i
].index
= Log2(yHi
);
1660 swizzle
[i
].y
&= blkYMask
;
1665 ADDR_ASSERT(IsPow2(zHi
));
1667 if (pEquation
->xor1
[i
].value
== 0)
1669 pEquation
->xor1
[i
].channel
= 2;
1670 pEquation
->xor1
[i
].valid
= 1;
1671 pEquation
->xor1
[i
].index
= Log2(zHi
);
1675 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1676 pEquation
->xor2
[i
].channel
= 2;
1677 pEquation
->xor2
[i
].valid
= 1;
1678 pEquation
->xor2
[i
].index
= Log2(zHi
);
1681 swizzle
[i
].z
&= blkZMask
;
1684 if (swizzle
[i
].value
== 0)
1691 const UINT_32 pipeIntMask
= (1 << m_pipeInterleaveLog2
) - 1;
1692 const UINT_32 blockMask
= (1 << blockSizeLog2
) - 1;
1694 ADDR_ASSERT((bMask
& pipeIntMask
) == pipeIntMask
);
1696 while (bMask
!= blockMask
)
1698 for (UINT_32 i
= m_pipeInterleaveLog2
; i
< blockSizeLog2
; i
++)
1700 if ((bMask
& (1 << i
)) == 0)
1702 if (IsPow2(swizzle
[i
].value
))
1704 if (swizzle
[i
].x
!= 0)
1706 ADDR_ASSERT((xMask
& swizzle
[i
].x
) == 0);
1707 xMask
|= swizzle
[i
].x
;
1709 const UINT_32 xLog2
= Log2(swizzle
[i
].x
);
1711 ADDR_ASSERT(xLog2
< blkXLog2
);
1713 pEquation
->addr
[i
].channel
= 0;
1714 pEquation
->addr
[i
].valid
= 1;
1715 pEquation
->addr
[i
].index
= xLog2
+ elemLog2
;
1717 else if (swizzle
[i
].y
!= 0)
1719 ADDR_ASSERT((yMask
& swizzle
[i
].y
) == 0);
1720 yMask
|= swizzle
[i
].y
;
1722 pEquation
->addr
[i
].channel
= 1;
1723 pEquation
->addr
[i
].valid
= 1;
1724 pEquation
->addr
[i
].index
= Log2(swizzle
[i
].y
);
1726 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkYLog2
);
1730 ADDR_ASSERT(swizzle
[i
].z
!= 0);
1731 ADDR_ASSERT((zMask
& swizzle
[i
].z
) == 0);
1732 zMask
|= swizzle
[i
].z
;
1734 pEquation
->addr
[i
].channel
= 2;
1735 pEquation
->addr
[i
].valid
= 1;
1736 pEquation
->addr
[i
].index
= Log2(swizzle
[i
].z
);
1738 ADDR_ASSERT(pEquation
->addr
[i
].index
< blkZLog2
);
1741 swizzle
[i
].value
= 0;
1746 const UINT_32 x
= swizzle
[i
].x
& xMask
;
1747 const UINT_32 y
= swizzle
[i
].y
& yMask
;
1748 const UINT_32 z
= swizzle
[i
].z
& zMask
;
1752 ADDR_ASSERT(IsPow2(x
));
1754 if (pEquation
->xor1
[i
].value
== 0)
1756 pEquation
->xor1
[i
].channel
= 0;
1757 pEquation
->xor1
[i
].valid
= 1;
1758 pEquation
->xor1
[i
].index
= Log2(x
) + elemLog2
;
1762 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1763 pEquation
->xor2
[i
].channel
= 0;
1764 pEquation
->xor2
[i
].valid
= 1;
1765 pEquation
->xor2
[i
].index
= Log2(x
) + elemLog2
;
1771 ADDR_ASSERT(IsPow2(y
));
1773 if (pEquation
->xor1
[i
].value
== 0)
1775 pEquation
->xor1
[i
].channel
= 1;
1776 pEquation
->xor1
[i
].valid
= 1;
1777 pEquation
->xor1
[i
].index
= Log2(y
);
1781 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1782 pEquation
->xor2
[i
].channel
= 1;
1783 pEquation
->xor2
[i
].valid
= 1;
1784 pEquation
->xor2
[i
].index
= Log2(y
);
1790 ADDR_ASSERT(IsPow2(z
));
1792 if (pEquation
->xor1
[i
].value
== 0)
1794 pEquation
->xor1
[i
].channel
= 2;
1795 pEquation
->xor1
[i
].valid
= 1;
1796 pEquation
->xor1
[i
].index
= Log2(z
);
1800 ADDR_ASSERT(pEquation
->xor2
[i
].value
== 0);
1801 pEquation
->xor2
[i
].channel
= 2;
1802 pEquation
->xor2
[i
].valid
= 1;
1803 pEquation
->xor2
[i
].index
= Log2(z
);
1815 ADDR_ASSERT((xMask
== blkXMask
) && (yMask
== blkYMask
) && (zMask
== blkZMask
));
1820 ************************************************************************************************************************
1821 * Gfx10Lib::InitEquationTable
1824 * Initialize Equation table.
1828 ************************************************************************************************************************
1830 VOID
Gfx10Lib::InitEquationTable()
1832 memset(m_equationTable
, 0, sizeof(m_equationTable
));
1834 for (UINT_32 rsrcTypeIdx
= 0; rsrcTypeIdx
< MaxRsrcType
; rsrcTypeIdx
++)
1836 const AddrResourceType rsrcType
= static_cast<AddrResourceType
>(rsrcTypeIdx
+ ADDR_RSRC_TEX_2D
);
1838 for (UINT_32 swModeIdx
= 0; swModeIdx
< MaxSwModeType
; swModeIdx
++)
1840 const AddrSwizzleMode swMode
= static_cast<AddrSwizzleMode
>(swModeIdx
);
1842 for (UINT_32 elemLog2
= 0; elemLog2
< MaxElementBytesLog2
; elemLog2
++)
1844 UINT_32 equationIndex
= ADDR_INVALID_EQUATION_INDEX
;
1845 const ADDR_SW_PATINFO
* pPatInfo
= GetSwizzlePatternInfo(swMode
, rsrcType
, elemLog2
, 1);
1847 if (pPatInfo
!= NULL
)
1849 ADDR_ASSERT(IsValidSwMode(swMode
));
1851 if (pPatInfo
->maxItemCount
<= 3)
1853 ADDR_EQUATION equation
= {};
1855 ConvertSwizzlePatternToEquation(elemLog2
, rsrcType
, swMode
, pPatInfo
, &equation
);
1857 equationIndex
= m_numEquations
;
1858 ADDR_ASSERT(equationIndex
< EquationTableSize
);
1860 m_equationTable
[equationIndex
] = equation
;
1866 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
1867 ADDR_ASSERT((elemLog2
== 3) || (elemLog2
== 4));
1868 ADDR_ASSERT(rsrcTypeIdx
== 1);
1869 ADDR_ASSERT(swMode
== ADDR_SW_64KB_D_X
);
1870 ADDR_ASSERT(m_settings
.supportRbPlus
== 1);
1874 m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][elemLog2
] = equationIndex
;
1881 ************************************************************************************************************************
1882 * Gfx10Lib::HwlGetEquationIndex
1885 * Interface function stub of GetEquationIndex
1889 ************************************************************************************************************************
1891 UINT_32
Gfx10Lib::HwlGetEquationIndex(
1892 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
1893 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
1896 UINT_32 equationIdx
= ADDR_INVALID_EQUATION_INDEX
;
1898 if ((pIn
->resourceType
== ADDR_RSRC_TEX_2D
) ||
1899 (pIn
->resourceType
== ADDR_RSRC_TEX_3D
))
1901 const UINT_32 rsrcTypeIdx
= static_cast<UINT_32
>(pIn
->resourceType
) - 1;
1902 const UINT_32 swModeIdx
= static_cast<UINT_32
>(pIn
->swizzleMode
);
1903 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
1905 equationIdx
= m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][elemLog2
];
1908 if (pOut
->pMipInfo
!= NULL
)
1910 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
1912 pOut
->pMipInfo
[i
].equationIndex
= equationIdx
;
1920 ************************************************************************************************************************
1921 * Gfx10Lib::IsValidDisplaySwizzleMode
1924 * Check if a swizzle mode is supported by display engine
1927 * TRUE is swizzle mode is supported by display engine
1928 ************************************************************************************************************************
1930 BOOL_32
Gfx10Lib::IsValidDisplaySwizzleMode(
1931 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
///< [in] input structure
1934 ADDR_ASSERT(pIn
->resourceType
== ADDR_RSRC_TEX_2D
);
1936 BOOL_32 support
= FALSE
;
1938 if (m_settings
.isDcn2
)
1940 switch (pIn
->swizzleMode
)
1943 case ADDR_SW_4KB_D_X
:
1944 case ADDR_SW_64KB_D
:
1945 case ADDR_SW_64KB_D_T
:
1946 case ADDR_SW_64KB_D_X
:
1947 support
= (pIn
->bpp
== 64);
1950 case ADDR_SW_LINEAR
:
1952 case ADDR_SW_4KB_S_X
:
1953 case ADDR_SW_64KB_S
:
1954 case ADDR_SW_64KB_S_T
:
1955 case ADDR_SW_64KB_S_X
:
1956 case ADDR_SW_64KB_R_X
:
1957 support
= (pIn
->bpp
<= 64);
1966 ADDR_NOT_IMPLEMENTED();
1973 ************************************************************************************************************************
1974 * Gfx10Lib::GetMaxNumMipsInTail
1977 * Return max number of mips in tails
1980 * Max number of mips in tails
1981 ************************************************************************************************************************
1983 UINT_32
Gfx10Lib::GetMaxNumMipsInTail(
1984 UINT_32 blockSizeLog2
, ///< block size log2
1985 BOOL_32 isThin
///< is thin or thick
1988 UINT_32 effectiveLog2
= blockSizeLog2
;
1990 if (isThin
== FALSE
)
1992 effectiveLog2
-= (blockSizeLog2
- 8) / 3;
1995 return (effectiveLog2
<= 11) ? (1 + (1 << (effectiveLog2
- 9))) : (effectiveLog2
- 4);
1999 ************************************************************************************************************************
2000 * Gfx10Lib::HwlComputePipeBankXor
2003 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2007 ************************************************************************************************************************
2009 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputePipeBankXor(
2010 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT
* pIn
, ///< [in] input structure
2011 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
* pOut
///< [out] output structure
2014 if (IsNonPrtXor(pIn
->swizzleMode
))
2016 const UINT_32 blockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
2017 const UINT_32 pipeBits
= GetPipeXorBits(blockBits
);
2018 const UINT_32 bankBits
= GetBankXorBits(blockBits
);
2020 UINT_32 pipeXor
= 0;
2021 UINT_32 bankXor
= 0;
2025 if (blockBits
== 16)
2027 const UINT_32 XorPatternLen
= 8;
2028 static const UINT_32 XorBank1b
[XorPatternLen
] = {0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80};
2029 static const UINT_32 XorBank2b
[XorPatternLen
] = {0x00, 0x80, 0x40, 0xC0, 0x80, 0x00, 0xC0, 0x40};
2030 static const UINT_32 XorBank3b
[XorPatternLen
] = {0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0};
2032 const UINT_32 index
= pIn
->surfIndex
% XorPatternLen
;
2036 bankXor
= XorBank1b
[index
];
2038 else if (bankBits
== 2)
2040 bankXor
= XorBank2b
[index
];
2044 bankXor
= XorBank3b
[index
];
2048 bankXor
>>= (2 - pipeBits
);
2054 pOut
->pipeBankXor
= bankXor
| pipeXor
;
2058 pOut
->pipeBankXor
= 0;
2065 ************************************************************************************************************************
2066 * Gfx10Lib::HwlComputeSlicePipeBankXor
2069 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2073 ************************************************************************************************************************
2075 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSlicePipeBankXor(
2076 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT
* pIn
, ///< [in] input structure
2077 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
* pOut
///< [out] output structure
2080 if (IsNonPrtXor(pIn
->swizzleMode
))
2082 const UINT_32 blockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
2083 const UINT_32 pipeBits
= GetPipeXorBits(blockBits
);
2084 const UINT_32 pipeXor
= ReverseBitVector(pIn
->slice
, pipeBits
);
2086 pOut
->pipeBankXor
= pIn
->basePipeBankXor
^ pipeXor
;
2090 pOut
->pipeBankXor
= 0;
2097 ************************************************************************************************************************
2098 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2101 * Compute sub resource offset to support swizzle pattern
2105 ************************************************************************************************************************
2107 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2108 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
* pIn
, ///< [in] input structure
2109 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
* pOut
///< [out] output structure
2112 ADDR_ASSERT(IsThin(pIn
->resourceType
, pIn
->swizzleMode
));
2114 pOut
->offset
= pIn
->slice
* pIn
->sliceSize
+ pIn
->macroBlockOffset
;
2120 ************************************************************************************************************************
2121 * Gfx10Lib::ValidateNonSwModeParams
2124 * Validate compute surface info params except swizzle mode
2127 * TRUE if parameters are valid, FALSE otherwise
2128 ************************************************************************************************************************
2130 BOOL_32
Gfx10Lib::ValidateNonSwModeParams(
2131 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
2133 BOOL_32 valid
= TRUE
;
2135 if ((pIn
->bpp
== 0) || (pIn
->bpp
> 128) || (pIn
->width
== 0) || (pIn
->numFrags
> 8) || (pIn
->numSamples
> 16))
2137 ADDR_ASSERT_ALWAYS();
2141 if (pIn
->resourceType
>= ADDR_RSRC_MAX_TYPE
)
2143 ADDR_ASSERT_ALWAYS();
2147 const ADDR2_SURFACE_FLAGS flags
= pIn
->flags
;
2148 const AddrResourceType rsrcType
= pIn
->resourceType
;
2149 const BOOL_32 mipmap
= (pIn
->numMipLevels
> 1);
2150 const BOOL_32 msaa
= (pIn
->numFrags
> 1);
2151 const BOOL_32 display
= flags
.display
;
2152 const BOOL_32 tex3d
= IsTex3d(rsrcType
);
2153 const BOOL_32 tex2d
= IsTex2d(rsrcType
);
2154 const BOOL_32 tex1d
= IsTex1d(rsrcType
);
2155 const BOOL_32 stereo
= flags
.qbStereo
;
2157 // Resource type check
2160 if (msaa
|| display
|| stereo
)
2162 ADDR_ASSERT_ALWAYS();
2168 if ((msaa
&& mipmap
) || (stereo
&& msaa
) || (stereo
&& mipmap
))
2170 ADDR_ASSERT_ALWAYS();
2176 if (msaa
|| display
|| stereo
)
2178 ADDR_ASSERT_ALWAYS();
2184 ADDR_ASSERT_ALWAYS();
2192 ************************************************************************************************************************
2193 * Gfx10Lib::ValidateSwModeParams
2196 * Validate compute surface info related to swizzle mode
2199 * TRUE if parameters are valid, FALSE otherwise
2200 ************************************************************************************************************************
2202 BOOL_32
Gfx10Lib::ValidateSwModeParams(
2203 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
2205 BOOL_32 valid
= TRUE
;
2207 if ((pIn
->swizzleMode
>= ADDR_SW_MAX_TYPE
) || (IsValidSwMode(pIn
->swizzleMode
) == FALSE
))
2209 ADDR_ASSERT_ALWAYS();
2213 const ADDR2_SURFACE_FLAGS flags
= pIn
->flags
;
2214 const AddrResourceType rsrcType
= pIn
->resourceType
;
2215 const AddrSwizzleMode swizzle
= pIn
->swizzleMode
;
2216 const BOOL_32 msaa
= (pIn
->numFrags
> 1);
2217 const BOOL_32 zbuffer
= flags
.depth
|| flags
.stencil
;
2218 const BOOL_32 color
= flags
.color
;
2219 const BOOL_32 display
= flags
.display
;
2220 const BOOL_32 tex3d
= IsTex3d(rsrcType
);
2221 const BOOL_32 tex2d
= IsTex2d(rsrcType
);
2222 const BOOL_32 tex1d
= IsTex1d(rsrcType
);
2223 const BOOL_32 thin3d
= flags
.view3dAs2dArray
;
2224 const BOOL_32 linear
= IsLinear(swizzle
);
2225 const BOOL_32 blk256B
= IsBlock256b(swizzle
);
2226 const BOOL_32 blkVar
= IsBlockVariable(swizzle
);
2227 const BOOL_32 isNonPrtXor
= IsNonPrtXor(swizzle
);
2228 const BOOL_32 prt
= flags
.prt
;
2229 const BOOL_32 fmask
= flags
.fmask
;
2232 if ((pIn
->numFrags
> 1) &&
2233 (GetBlockSize(swizzle
) < (m_pipeInterleaveBytes
* pIn
->numFrags
)))
2235 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2236 ADDR_ASSERT_ALWAYS();
2240 if (display
&& (IsValidDisplaySwizzleMode(pIn
) == FALSE
))
2242 ADDR_ASSERT_ALWAYS();
2246 if ((pIn
->bpp
== 96) && (linear
== FALSE
))
2248 ADDR_ASSERT_ALWAYS();
2252 const UINT_32 swizzleMask
= 1 << swizzle
;
2254 // Resource type check
2257 if ((swizzleMask
& Gfx10Rsrc1dSwModeMask
) == 0)
2259 ADDR_ASSERT_ALWAYS();
2265 if (((swizzleMask
& Gfx10Rsrc2dSwModeMask
) == 0) ||
2266 (prt
&& ((swizzleMask
& Gfx10Rsrc2dPrtSwModeMask
) == 0)) ||
2267 (fmask
&& ((swizzleMask
& Gfx10ZSwModeMask
) == 0)))
2269 ADDR_ASSERT_ALWAYS();
2275 if (((swizzleMask
& Gfx10Rsrc3dSwModeMask
) == 0) ||
2276 (prt
&& ((swizzleMask
& Gfx10Rsrc3dPrtSwModeMask
) == 0)) ||
2277 (thin3d
&& ((swizzleMask
& Gfx10Rsrc3dThinSwModeMask
) == 0)))
2279 ADDR_ASSERT_ALWAYS();
2284 // Swizzle type check
2287 if (zbuffer
|| msaa
|| (pIn
->bpp
== 0) || ((pIn
->bpp
% 8) != 0))
2289 ADDR_ASSERT_ALWAYS();
2293 else if (IsZOrderSwizzle(swizzle
))
2295 if ((pIn
->bpp
> 64) ||
2296 (msaa
&& (color
|| (pIn
->bpp
> 32))) ||
2297 ElemLib::IsBlockCompressed(pIn
->format
) ||
2298 ElemLib::IsMacroPixelPacked(pIn
->format
))
2300 ADDR_ASSERT_ALWAYS();
2304 else if (IsStandardSwizzle(rsrcType
, swizzle
))
2306 if (zbuffer
|| msaa
)
2308 ADDR_ASSERT_ALWAYS();
2312 else if (IsDisplaySwizzle(rsrcType
, swizzle
))
2314 if (zbuffer
|| msaa
)
2316 ADDR_ASSERT_ALWAYS();
2320 else if (IsRtOptSwizzle(swizzle
))
2324 ADDR_ASSERT_ALWAYS();
2330 ADDR_ASSERT_ALWAYS();
2337 if (zbuffer
|| tex3d
|| msaa
)
2339 ADDR_ASSERT_ALWAYS();
2345 if (m_blockVarSizeLog2
== 0)
2347 ADDR_ASSERT_ALWAYS();
2356 ************************************************************************************************************************
2357 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2360 * Compute surface info sanity check
2364 ************************************************************************************************************************
2366 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2367 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
///< [in] input structure
2370 return ValidateNonSwModeParams(pIn
) && ValidateSwModeParams(pIn
) ? ADDR_OK
: ADDR_INVALIDPARAMS
;
2374 ************************************************************************************************************************
2375 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2378 * Internal function to get suggested surface information for cliet to use
2382 ************************************************************************************************************************
2384 ADDR_E_RETURNCODE
Gfx10Lib::HwlGetPreferredSurfaceSetting(
2385 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
* pIn
, ///< [in] input structure
2386 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
* pOut
///< [out] output structure
2389 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
2391 if (pIn
->flags
.fmask
)
2393 const BOOL_32 forbid64KbBlockType
= pIn
->forbiddenBlock
.macroThin64KB
? TRUE
: FALSE
;
2394 const BOOL_32 forbidVarBlockType
= ((m_blockVarSizeLog2
== 0) || (pIn
->forbiddenBlock
.var
!= 0));
2396 if (forbid64KbBlockType
&& forbidVarBlockType
)
2398 // Invalid combination...
2399 ADDR_ASSERT_ALWAYS();
2400 returnCode
= ADDR_INVALIDPARAMS
;
2404 pOut
->resourceType
= ADDR_RSRC_TEX_2D
;
2405 pOut
->validBlockSet
.value
= 0;
2406 pOut
->validBlockSet
.macroThin64KB
= forbid64KbBlockType
? 0 : 1;
2407 pOut
->validBlockSet
.var
= forbidVarBlockType
? 0 : 1;
2408 pOut
->validSwModeSet
.value
= 0;
2409 pOut
->validSwModeSet
.sw64KB_Z_X
= forbid64KbBlockType
? 0 : 1;
2410 pOut
->validSwModeSet
.swVar_Z_X
= forbidVarBlockType
? 0 : 1;
2411 pOut
->canXor
= TRUE
;
2412 pOut
->validSwTypeSet
.value
= AddrSwSetZ
;
2413 pOut
->clientPreferredSwSet
= pOut
->validSwTypeSet
;
2415 BOOL_32 use64KbBlockType
= (forbid64KbBlockType
== FALSE
);
2417 if ((forbid64KbBlockType
== FALSE
) && (forbidVarBlockType
== FALSE
))
2419 const UINT_8 maxFmaskSwizzleModeType
= 2;
2420 const UINT_32 ratioLow
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 3 : 2);
2421 const UINT_32 ratioHi
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 2 : 1);
2422 const UINT_32 fmaskBpp
= GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
);
2423 const UINT_32 numSlices
= Max(pIn
->numSlices
, 1u);
2424 const UINT_32 width
= Max(pIn
->width
, 1u);
2425 const UINT_32 height
= Max(pIn
->height
, 1u);
2426 const UINT_64 sizeAlignInElement
= Max(NextPow2(pIn
->minSizeAlign
) / (fmaskBpp
>> 3), 1u);
2428 AddrSwizzleMode swMode
[maxFmaskSwizzleModeType
] = {ADDR_SW_64KB_Z_X
, ADDR_SW_VAR_Z_X
};
2429 Dim3d blkDim
[maxFmaskSwizzleModeType
] = {{0}, {0}};
2430 Dim3d padDim
[maxFmaskSwizzleModeType
] = {{0}, {0}};
2431 UINT_64 padSize
[maxFmaskSwizzleModeType
] = {0};
2433 for (UINT_8 i
= 0; i
< maxFmaskSwizzleModeType
; i
++)
2435 ComputeBlockDimensionForSurf(&blkDim
[i
].w
,
2443 padSize
[i
] = ComputePadSize(&blkDim
[i
], width
, height
, numSlices
, &padDim
[i
]);
2444 padSize
[i
] = PowTwoAlign(padSize
[i
], sizeAlignInElement
);
2447 if (GetBlockSizeLog2(swMode
[1]) >= GetBlockSizeLog2(swMode
[0]))
2449 if ((padSize
[1] * ratioHi
) <= (padSize
[0] * ratioLow
))
2451 use64KbBlockType
= FALSE
;
2456 if ((padSize
[1] * ratioLow
) < (padSize
[0] * ratioHi
))
2458 use64KbBlockType
= FALSE
;
2462 else if (forbidVarBlockType
)
2464 use64KbBlockType
= TRUE
;
2467 if (use64KbBlockType
)
2469 pOut
->swizzleMode
= ADDR_SW_64KB_Z_X
;
2473 pOut
->swizzleMode
= ADDR_SW_VAR_Z_X
;
2479 UINT_32 bpp
= pIn
->bpp
;
2480 UINT_32 width
= Max(pIn
->width
, 1u);
2481 UINT_32 height
= Max(pIn
->height
, 1u);
2483 // Set format to INVALID will skip this conversion
2484 if (pIn
->format
!= ADDR_FMT_INVALID
)
2486 ElemMode elemMode
= ADDR_UNCOMPRESSED
;
2487 UINT_32 expandX
, expandY
;
2489 // Get compression/expansion factors and element mode which indicates compression/expansion
2490 bpp
= GetElemLib()->GetBitsPerPixel(pIn
->format
,
2495 UINT_32 basePitch
= 0;
2496 GetElemLib()->AdjustSurfaceInfo(elemMode
,
2505 const UINT_32 numSlices
= Max(pIn
->numSlices
, 1u);
2506 const UINT_32 numMipLevels
= Max(pIn
->numMipLevels
, 1u);
2507 const UINT_32 numSamples
= Max(pIn
->numSamples
, 1u);
2508 const UINT_32 numFrags
= (pIn
->numFrags
== 0) ? numSamples
: pIn
->numFrags
;
2509 const BOOL_32 msaa
= (numFrags
> 1) || (numSamples
> 1);
2511 // Pre sanity check on non swizzle mode parameters
2512 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {};
2513 localIn
.flags
= pIn
->flags
;
2514 localIn
.resourceType
= pIn
->resourceType
;
2515 localIn
.format
= pIn
->format
;
2517 localIn
.width
= width
;
2518 localIn
.height
= height
;
2519 localIn
.numSlices
= numSlices
;
2520 localIn
.numMipLevels
= numMipLevels
;
2521 localIn
.numSamples
= numSamples
;
2522 localIn
.numFrags
= numFrags
;
2524 if (ValidateNonSwModeParams(&localIn
))
2526 // Forbid swizzle mode(s) by client setting
2527 ADDR2_SWMODE_SET allowedSwModeSet
= {};
2528 allowedSwModeSet
.value
|= pIn
->forbiddenBlock
.linear
? 0 : Gfx10LinearSwModeMask
;
2529 allowedSwModeSet
.value
|= pIn
->forbiddenBlock
.micro
? 0 : Gfx10Blk256BSwModeMask
;
2530 allowedSwModeSet
.value
|=
2531 pIn
->forbiddenBlock
.macroThin4KB
? 0 :
2532 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? 0 : Gfx10Blk4KBSwModeMask
);
2533 allowedSwModeSet
.value
|=
2534 pIn
->forbiddenBlock
.macroThick4KB
? 0 :
2535 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? Gfx10Rsrc3dThick4KBSwModeMask
: 0);
2536 allowedSwModeSet
.value
|=
2537 pIn
->forbiddenBlock
.macroThin64KB
? 0 :
2538 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? Gfx10Rsrc3dThin64KBSwModeMask
: Gfx10Blk64KBSwModeMask
);
2539 allowedSwModeSet
.value
|=
2540 pIn
->forbiddenBlock
.macroThick64KB
? 0 :
2541 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? Gfx10Rsrc3dThick64KBSwModeMask
: 0);
2542 allowedSwModeSet
.value
|=
2543 pIn
->forbiddenBlock
.var
? 0 : (m_blockVarSizeLog2
? Gfx10BlkVarSwModeMask
: 0);
2545 if (pIn
->preferredSwSet
.value
!= 0)
2547 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_Z
? ~0 : ~Gfx10ZSwModeMask
;
2548 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_S
? ~0 : ~Gfx10StandardSwModeMask
;
2549 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_D
? ~0 : ~Gfx10DisplaySwModeMask
;
2550 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_R
? ~0 : ~Gfx10RenderSwModeMask
;
2555 allowedSwModeSet
.value
&= ~Gfx10XorSwModeMask
;
2558 if (pIn
->maxAlign
> 0)
2560 if (pIn
->maxAlign
< (1u << m_blockVarSizeLog2
))
2562 allowedSwModeSet
.value
&= ~Gfx10BlkVarSwModeMask
;
2565 if (pIn
->maxAlign
< Size64K
)
2567 allowedSwModeSet
.value
&= ~Gfx10Blk64KBSwModeMask
;
2570 if (pIn
->maxAlign
< Size4K
)
2572 allowedSwModeSet
.value
&= ~Gfx10Blk4KBSwModeMask
;
2575 if (pIn
->maxAlign
< Size256
)
2577 allowedSwModeSet
.value
&= ~Gfx10Blk256BSwModeMask
;
2581 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2582 switch (pIn
->resourceType
)
2584 case ADDR_RSRC_TEX_1D
:
2585 allowedSwModeSet
.value
&= Gfx10Rsrc1dSwModeMask
;
2588 case ADDR_RSRC_TEX_2D
:
2589 allowedSwModeSet
.value
&= pIn
->flags
.prt
? Gfx10Rsrc2dPrtSwModeMask
: Gfx10Rsrc2dSwModeMask
;
2592 case ADDR_RSRC_TEX_3D
:
2593 allowedSwModeSet
.value
&= pIn
->flags
.prt
? Gfx10Rsrc3dPrtSwModeMask
: Gfx10Rsrc3dSwModeMask
;
2595 if (pIn
->flags
.view3dAs2dArray
)
2597 allowedSwModeSet
.value
&= Gfx10Rsrc3dThinSwModeMask
;
2602 ADDR_ASSERT_ALWAYS();
2603 allowedSwModeSet
.value
= 0;
2607 if (ElemLib::IsBlockCompressed(pIn
->format
) ||
2608 ElemLib::IsMacroPixelPacked(pIn
->format
) ||
2610 (msaa
&& ((bpp
> 32) || pIn
->flags
.color
|| pIn
->flags
.unordered
)))
2612 allowedSwModeSet
.value
&= ~Gfx10ZSwModeMask
;
2615 if (pIn
->format
== ADDR_FMT_32_32_32
)
2617 allowedSwModeSet
.value
&= Gfx10LinearSwModeMask
;
2622 allowedSwModeSet
.value
&= Gfx10MsaaSwModeMask
;
2625 if (pIn
->flags
.depth
|| pIn
->flags
.stencil
)
2627 allowedSwModeSet
.value
&= Gfx10ZSwModeMask
;
2630 if (pIn
->flags
.display
)
2632 if (m_settings
.isDcn2
)
2634 allowedSwModeSet
.value
&= (bpp
== 64) ? Dcn2Bpp64SwModeMask
: Dcn2NonBpp64SwModeMask
;
2638 ADDR_NOT_IMPLEMENTED();
2642 if (allowedSwModeSet
.value
!= 0)
2645 // Post sanity check, at least AddrLib should accept the output generated by its own
2646 UINT_32 validateSwModeSet
= allowedSwModeSet
.value
;
2648 for (UINT_32 i
= 0; validateSwModeSet
!= 0; i
++)
2650 if (validateSwModeSet
& 1)
2652 localIn
.swizzleMode
= static_cast<AddrSwizzleMode
>(i
);
2653 ADDR_ASSERT(ValidateSwModeParams(&localIn
));
2656 validateSwModeSet
>>= 1;
2660 pOut
->resourceType
= pIn
->resourceType
;
2661 pOut
->validSwModeSet
= allowedSwModeSet
;
2662 pOut
->canXor
= (allowedSwModeSet
.value
& Gfx10XorSwModeMask
) ? TRUE
: FALSE
;
2663 pOut
->validBlockSet
= GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
);
2664 pOut
->validSwTypeSet
= GetAllowedSwSet(allowedSwModeSet
);
2666 pOut
->clientPreferredSwSet
= pIn
->preferredSwSet
;
2668 if (pOut
->clientPreferredSwSet
.value
== 0)
2670 pOut
->clientPreferredSwSet
.value
= AddrSwSetAll
;
2673 // Apply optional restrictions
2674 if ((pIn
->flags
.depth
|| pIn
->flags
.stencil
) && msaa
&& m_configFlags
.nonPower2MemConfig
)
2676 if ((allowedSwModeSet
.value
&= ~Gfx10BlkVarSwModeMask
) != 0)
2678 // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
2679 // the GL2 in VAR mode, so it should be avoided.
2680 allowedSwModeSet
.value
&= ~Gfx10BlkVarSwModeMask
;
2684 // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
2685 // But we have to suffer from low performance because there is no other choice...
2686 ADDR_ASSERT_ALWAYS();
2690 if (pIn
->flags
.needEquation
)
2692 FilterInvalidEqSwizzleMode(allowedSwModeSet
, pIn
->resourceType
, Log2(bpp
>> 3));
2695 if (allowedSwModeSet
.value
== Gfx10LinearSwModeMask
)
2697 pOut
->swizzleMode
= ADDR_SW_LINEAR
;
2701 // Always ignore linear swizzle mode if there is other choice.
2702 allowedSwModeSet
.swLinear
= 0;
2704 ADDR2_BLOCK_SET allowedBlockSet
= GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
);
2706 // Determine block size if there is 2 or more block type candidates
2707 if (IsPow2(allowedBlockSet
.value
) == FALSE
)
2709 AddrSwizzleMode swMode
[AddrBlockMaxTiledType
] = { ADDR_SW_LINEAR
};
2711 if (m_blockVarSizeLog2
!= 0)
2713 swMode
[AddrBlockVar
] = ADDR_SW_VAR_R_X
;
2716 if (pOut
->resourceType
== ADDR_RSRC_TEX_3D
)
2718 swMode
[AddrBlockThick4KB
] = ADDR_SW_4KB_S
;
2719 swMode
[AddrBlockThin64KB
] = ADDR_SW_64KB_R_X
;
2720 swMode
[AddrBlockThick64KB
] = ADDR_SW_64KB_S
;
2724 swMode
[AddrBlockMicro
] = ADDR_SW_256B_S
;
2725 swMode
[AddrBlockThin4KB
] = ADDR_SW_4KB_S
;
2726 swMode
[AddrBlockThin64KB
] = ADDR_SW_64KB_S
;
2729 Dim3d blkDim
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}, {0}, {0}, {0}};
2730 Dim3d padDim
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}, {0}, {0}, {0}};
2731 UINT_64 padSize
[AddrBlockMaxTiledType
] = {0};
2733 const UINT_32 ratioLow
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 3 : 2);
2734 const UINT_32 ratioHi
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 2 : 1);
2735 const UINT_64 sizeAlignInElement
= Max(NextPow2(pIn
->minSizeAlign
) / (bpp
>> 3), 1u);
2736 UINT_32 minSizeBlk
= AddrBlockMicro
;
2737 UINT_64 minSize
= 0;
2739 for (UINT_32 i
= AddrBlockMicro
; i
< AddrBlockMaxTiledType
; i
++)
2741 if (allowedBlockSet
.value
& (1 << i
))
2743 ComputeBlockDimensionForSurf(&blkDim
[i
].w
,
2751 padSize
[i
] = ComputePadSize(&blkDim
[i
], width
, height
, numSlices
, &padDim
[i
]);
2752 padSize
[i
] = PowTwoAlign(padSize
[i
] * numFrags
, sizeAlignInElement
);
2756 minSize
= padSize
[i
];
2761 // Due to the fact that VAR block size = 16KB * m_pipes, it is possible that VAR
2762 // block size < 64KB. And ratio[Hi/Low] logic implicitly requires iterating from
2763 // smaller block type to bigger block type. So we have to correct comparing logic
2764 // according to the size of existing "minimun block" and size of coming/comparing
2765 // block. The new logic can also be useful to any future change about AddrBlockType.
2766 if (GetBlockSizeLog2(swMode
[i
]) >= GetBlockSizeLog2(swMode
[minSizeBlk
]))
2768 if ((padSize
[i
] * ratioHi
) <= (minSize
* ratioLow
))
2770 minSize
= padSize
[i
];
2776 if ((padSize
[i
] * ratioLow
) < (minSize
* ratioHi
))
2778 minSize
= padSize
[i
];
2786 if ((allowedBlockSet
.micro
== TRUE
) &&
2787 (width
<= blkDim
[AddrBlockMicro
].w
) &&
2788 (height
<= blkDim
[AddrBlockMicro
].h
))
2790 minSizeBlk
= AddrBlockMicro
;
2793 if (minSizeBlk
== AddrBlockMicro
)
2795 ADDR_ASSERT(pOut
->resourceType
!= ADDR_RSRC_TEX_3D
);
2796 allowedSwModeSet
.value
&= Gfx10Blk256BSwModeMask
;
2798 else if (minSizeBlk
== AddrBlockThick4KB
)
2800 ADDR_ASSERT(pOut
->resourceType
== ADDR_RSRC_TEX_3D
);
2801 allowedSwModeSet
.value
&= Gfx10Rsrc3dThick4KBSwModeMask
;
2803 else if (minSizeBlk
== AddrBlockThin4KB
)
2805 ADDR_ASSERT(pOut
->resourceType
!= ADDR_RSRC_TEX_3D
);
2806 allowedSwModeSet
.value
&= Gfx10Blk4KBSwModeMask
;
2808 else if (minSizeBlk
== AddrBlockThick64KB
)
2810 ADDR_ASSERT(pOut
->resourceType
== ADDR_RSRC_TEX_3D
);
2811 allowedSwModeSet
.value
&= Gfx10Rsrc3dThick64KBSwModeMask
;
2813 else if (minSizeBlk
== AddrBlockThin64KB
)
2815 allowedSwModeSet
.value
&= (pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ?
2816 Gfx10Rsrc3dThin64KBSwModeMask
: Gfx10Blk64KBSwModeMask
;
2820 ADDR_ASSERT(minSizeBlk
== AddrBlockVar
);
2821 allowedSwModeSet
.value
&= Gfx10BlkVarSwModeMask
;
2825 // Block type should be determined.
2826 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
).value
));
2828 ADDR2_SWTYPE_SET allowedSwSet
= GetAllowedSwSet(allowedSwModeSet
);
2830 // Determine swizzle type if there is 2 or more swizzle type candidates
2831 if (IsPow2(allowedSwSet
.value
) == FALSE
)
2833 if (ElemLib::IsBlockCompressed(pIn
->format
))
2835 if (allowedSwSet
.sw_D
)
2837 allowedSwModeSet
.value
&= Gfx10DisplaySwModeMask
;
2839 else if (allowedSwSet
.sw_S
)
2841 allowedSwModeSet
.value
&= Gfx10StandardSwModeMask
;
2845 ADDR_ASSERT(allowedSwSet
.sw_R
);
2846 allowedSwModeSet
.value
&= Gfx10RenderSwModeMask
;
2849 else if (ElemLib::IsMacroPixelPacked(pIn
->format
))
2851 if (allowedSwSet
.sw_S
)
2853 allowedSwModeSet
.value
&= Gfx10StandardSwModeMask
;
2855 else if (allowedSwSet
.sw_D
)
2857 allowedSwModeSet
.value
&= Gfx10DisplaySwModeMask
;
2861 ADDR_ASSERT(allowedSwSet
.sw_R
);
2862 allowedSwModeSet
.value
&= Gfx10RenderSwModeMask
;
2865 else if (pIn
->resourceType
== ADDR_RSRC_TEX_3D
)
2867 if (pIn
->flags
.color
&&
2868 GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
).macroThick64KB
&&
2871 allowedSwModeSet
.value
&= Gfx10DisplaySwModeMask
;
2873 else if (allowedSwSet
.sw_S
)
2875 allowedSwModeSet
.value
&= Gfx10StandardSwModeMask
;
2877 else if (allowedSwSet
.sw_R
)
2879 allowedSwModeSet
.value
&= Gfx10RenderSwModeMask
;
2883 ADDR_ASSERT(allowedSwSet
.sw_Z
);
2884 allowedSwModeSet
.value
&= Gfx10ZSwModeMask
;
2889 if (allowedSwSet
.sw_R
)
2891 allowedSwModeSet
.value
&= Gfx10RenderSwModeMask
;
2893 else if (allowedSwSet
.sw_D
)
2895 allowedSwModeSet
.value
&= Gfx10DisplaySwModeMask
;
2897 else if (allowedSwSet
.sw_S
)
2899 allowedSwModeSet
.value
&= Gfx10StandardSwModeMask
;
2903 ADDR_ASSERT(allowedSwSet
.sw_Z
);
2904 allowedSwModeSet
.value
&= Gfx10ZSwModeMask
;
2909 // Swizzle type should be determined.
2910 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet
).value
));
2912 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2913 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2914 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2915 pOut
->swizzleMode
= static_cast<AddrSwizzleMode
>(Log2NonPow2(allowedSwModeSet
.value
));
2920 // Invalid combination...
2921 ADDR_ASSERT_ALWAYS();
2922 returnCode
= ADDR_INVALIDPARAMS
;
2927 // Invalid combination...
2928 ADDR_ASSERT_ALWAYS();
2929 returnCode
= ADDR_INVALIDPARAMS
;
2937 ************************************************************************************************************************
2938 * Gfx10Lib::ComputeStereoInfo
2941 * Compute height alignment and right eye pipeBankXor for stereo surface
2946 ************************************************************************************************************************
2948 ADDR_E_RETURNCODE
Gfx10Lib::ComputeStereoInfo(
2949 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< Compute surface info
2950 UINT_32 blkHeight
, ///< Block height
2951 UINT_32
* pAlignY
, ///< Stereo requested additional alignment in Y
2952 UINT_32
* pRightXor
///< Right eye xor
2955 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2960 if (IsNonPrtXor(pIn
->swizzleMode
))
2962 const UINT_32 blkSizeLog2
= GetBlockSizeLog2(pIn
->swizzleMode
);
2963 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
2964 const UINT_32 rsrcType
= static_cast<UINT_32
>(pIn
->resourceType
) - 1;
2965 const UINT_32 swMode
= static_cast<UINT_32
>(pIn
->swizzleMode
);
2966 const UINT_32 eqIndex
= m_equationLookupTable
[rsrcType
][swMode
][elemLog2
];
2968 if (eqIndex
!= ADDR_INVALID_EQUATION_INDEX
)
2973 for (UINT_32 i
= m_pipeInterleaveLog2
; i
< blkSizeLog2
; i
++)
2975 if (m_equationTable
[eqIndex
].xor1
[i
].value
== 0)
2980 ADDR_ASSERT(m_equationTable
[eqIndex
].xor1
[i
].valid
== 1);
2982 if ((m_equationTable
[eqIndex
].xor1
[i
].channel
== 1) &&
2983 (m_equationTable
[eqIndex
].xor1
[i
].index
> yMax
))
2985 yMax
= m_equationTable
[eqIndex
].xor1
[i
].index
;
2990 const UINT_32 additionalAlign
= 1 << yMax
;
2992 if (additionalAlign
>= blkHeight
)
2994 *pAlignY
*= (additionalAlign
/ blkHeight
);
2996 const UINT_32 alignedHeight
= PowTwoAlign(pIn
->height
, additionalAlign
);
2998 if ((alignedHeight
>> yMax
) & 1)
3000 *pRightXor
= 1 << (yPos
- m_pipeInterleaveLog2
);
3006 ret
= ADDR_INVALIDPARAMS
;
3014 ************************************************************************************************************************
3015 * Gfx10Lib::HwlComputeSurfaceInfoTiled
3018 * Internal function to calculate alignment for tiled surface
3022 ************************************************************************************************************************
3024 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSurfaceInfoTiled(
3025 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
3026 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
3029 ADDR_E_RETURNCODE ret
;
3031 if (IsBlock256b(pIn
->swizzleMode
))
3033 ret
= ComputeSurfaceInfoMicroTiled(pIn
, pOut
);
3037 ret
= ComputeSurfaceInfoMacroTiled(pIn
, pOut
);
3044 ************************************************************************************************************************
3045 * Gfx10Lib::ComputeSurfaceInfoMicroTiled
3048 * Internal function to calculate alignment for micro tiled surface
3052 ************************************************************************************************************************
3054 ADDR_E_RETURNCODE
Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3055 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
3056 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
3059 ADDR_E_RETURNCODE ret
= ComputeBlockDimensionForSurf(&pOut
->blockWidth
,
3069 pOut
->mipChainPitch
= 0;
3070 pOut
->mipChainHeight
= 0;
3071 pOut
->mipChainSlice
= 0;
3072 pOut
->epitchIsHeight
= FALSE
;
3073 pOut
->mipChainInTail
= FALSE
;
3074 pOut
->firstMipIdInTail
= pIn
->numMipLevels
;
3076 const UINT_32 blockSize
= GetBlockSize(pIn
->swizzleMode
);
3078 pOut
->pitch
= PowTwoAlign(pIn
->width
, pOut
->blockWidth
);
3079 pOut
->height
= PowTwoAlign(pIn
->height
, pOut
->blockHeight
);
3080 pOut
->numSlices
= pIn
->numSlices
;
3081 pOut
->baseAlign
= blockSize
;
3083 if (pIn
->numMipLevels
> 1)
3085 const UINT_32 mip0Width
= pIn
->width
;
3086 const UINT_32 mip0Height
= pIn
->height
;
3087 UINT_64 mipSliceSize
= 0;
3089 for (INT_32 i
= static_cast<INT_32
>(pIn
->numMipLevels
) - 1; i
>= 0; i
--)
3091 UINT_32 mipWidth
, mipHeight
;
3093 GetMipSize(mip0Width
, mip0Height
, 1, i
, &mipWidth
, &mipHeight
);
3095 const UINT_32 mipActualWidth
= PowTwoAlign(mipWidth
, pOut
->blockWidth
);
3096 const UINT_32 mipActualHeight
= PowTwoAlign(mipHeight
, pOut
->blockHeight
);
3098 if (pOut
->pMipInfo
!= NULL
)
3100 pOut
->pMipInfo
[i
].pitch
= mipActualWidth
;
3101 pOut
->pMipInfo
[i
].height
= mipActualHeight
;
3102 pOut
->pMipInfo
[i
].depth
= 1;
3103 pOut
->pMipInfo
[i
].offset
= mipSliceSize
;
3104 pOut
->pMipInfo
[i
].mipTailOffset
= 0;
3105 pOut
->pMipInfo
[i
].macroBlockOffset
= mipSliceSize
;
3108 mipSliceSize
+= mipActualWidth
* mipActualHeight
* (pIn
->bpp
>> 3);
3111 pOut
->sliceSize
= mipSliceSize
;
3112 pOut
->surfSize
= mipSliceSize
* pOut
->numSlices
;
3116 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->pitch
) * pOut
->height
* (pIn
->bpp
>> 3);
3117 pOut
->surfSize
= pOut
->sliceSize
* pOut
->numSlices
;
3119 if (pOut
->pMipInfo
!= NULL
)
3121 pOut
->pMipInfo
[0].pitch
= pOut
->pitch
;
3122 pOut
->pMipInfo
[0].height
= pOut
->height
;
3123 pOut
->pMipInfo
[0].depth
= 1;
3124 pOut
->pMipInfo
[0].offset
= 0;
3125 pOut
->pMipInfo
[0].mipTailOffset
= 0;
3126 pOut
->pMipInfo
[0].macroBlockOffset
= 0;
3136 ************************************************************************************************************************
3137 * Gfx10Lib::ComputeSurfaceInfoMacroTiled
3140 * Internal function to calculate alignment for macro tiled surface
3144 ************************************************************************************************************************
3146 ADDR_E_RETURNCODE
Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3147 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
3148 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
3151 ADDR_E_RETURNCODE returnCode
= ComputeBlockDimensionForSurf(&pOut
->blockWidth
,
3159 if (returnCode
== ADDR_OK
)
3161 UINT_32 heightAlign
= pOut
->blockHeight
;
3163 if (pIn
->flags
.qbStereo
)
3165 UINT_32 rightXor
= 0;
3168 returnCode
= ComputeStereoInfo(pIn
, heightAlign
, &alignY
, &rightXor
);
3170 if (returnCode
== ADDR_OK
)
3172 pOut
->pStereoInfo
->rightSwizzle
= rightXor
;
3174 heightAlign
*= alignY
;
3178 if (returnCode
== ADDR_OK
)
3180 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3181 pOut
->mipChainPitch
= 0;
3182 pOut
->mipChainHeight
= 0;
3183 pOut
->mipChainSlice
= 0;
3184 pOut
->epitchIsHeight
= FALSE
;
3185 pOut
->mipChainInTail
= FALSE
;
3186 pOut
->firstMipIdInTail
= pIn
->numMipLevels
;
3188 const UINT_32 blockSizeLog2
= GetBlockSizeLog2(pIn
->swizzleMode
);
3189 const UINT_32 blockSize
= 1 << blockSizeLog2
;
3191 pOut
->pitch
= PowTwoAlign(pIn
->width
, pOut
->blockWidth
);
3192 pOut
->height
= PowTwoAlign(pIn
->height
, heightAlign
);
3193 pOut
->numSlices
= PowTwoAlign(pIn
->numSlices
, pOut
->blockSlices
);
3194 pOut
->baseAlign
= blockSize
;
3196 if (pIn
->numMipLevels
> 1)
3198 const Dim3d tailMaxDim
= GetMipTailDim(pIn
->resourceType
,
3203 const UINT_32 mip0Width
= pIn
->width
;
3204 const UINT_32 mip0Height
= pIn
->height
;
3205 const BOOL_32 isThin
= IsThin(pIn
->resourceType
, pIn
->swizzleMode
);
3206 const UINT_32 mip0Depth
= isThin
? 1 : pIn
->numSlices
;
3207 const UINT_32 maxMipsInTail
= GetMaxNumMipsInTail(blockSizeLog2
, isThin
);
3208 const UINT_32 index
= Log2(pIn
->bpp
>> 3);
3209 UINT_32 firstMipInTail
= pIn
->numMipLevels
;
3210 UINT_64 mipChainSliceSize
= 0;
3211 UINT_64 mipSize
[MaxMipLevels
];
3212 UINT_64 mipSliceSize
[MaxMipLevels
];
3214 Dim3d fixedTailMaxDim
= tailMaxDim
;
3216 if (m_settings
.dsMipmapHtileFix
&& IsZOrderSwizzle(pIn
->swizzleMode
) && (index
<= 1))
3218 fixedTailMaxDim
.w
/= Block256_2d
[index
].w
/ Block256_2d
[2].w
;
3219 fixedTailMaxDim
.h
/= Block256_2d
[index
].h
/ Block256_2d
[2].h
;
3222 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
3224 UINT_32 mipWidth
, mipHeight
, mipDepth
;
3226 GetMipSize(mip0Width
, mip0Height
, mip0Depth
, i
, &mipWidth
, &mipHeight
, &mipDepth
);
3228 if (IsInMipTail(fixedTailMaxDim
, maxMipsInTail
, mipWidth
, mipHeight
, pIn
->numMipLevels
- i
))
3231 mipChainSliceSize
+= blockSize
/ pOut
->blockSlices
;
3236 const UINT_32 pitch
= PowTwoAlign(mipWidth
, pOut
->blockWidth
);
3237 const UINT_32 height
= PowTwoAlign(mipHeight
, pOut
->blockHeight
);
3238 const UINT_32 depth
= PowTwoAlign(mipDepth
, pOut
->blockSlices
);
3239 const UINT_64 sliceSize
= static_cast<UINT_64
>(pitch
) * height
* (pIn
->bpp
>> 3);
3241 mipSize
[i
] = sliceSize
* depth
;
3242 mipSliceSize
[i
] = sliceSize
* pOut
->blockSlices
;
3243 mipChainSliceSize
+= sliceSize
;
3245 if (pOut
->pMipInfo
!= NULL
)
3247 pOut
->pMipInfo
[i
].pitch
= pitch
;
3248 pOut
->pMipInfo
[i
].height
= height
;
3249 pOut
->pMipInfo
[i
].depth
= depth
;
3254 pOut
->sliceSize
= mipChainSliceSize
;
3255 pOut
->surfSize
= mipChainSliceSize
* pOut
->numSlices
;
3256 pOut
->mipChainInTail
= (firstMipInTail
== 0) ? TRUE
: FALSE
;
3257 pOut
->firstMipIdInTail
= firstMipInTail
;
3259 if (pOut
->pMipInfo
!= NULL
)
3262 UINT_64 macroBlkOffset
= 0;
3263 UINT_32 tailMaxDepth
= 0;
3265 if (firstMipInTail
!= pIn
->numMipLevels
)
3267 UINT_32 mipWidth
, mipHeight
;
3269 GetMipSize(mip0Width
, mip0Height
, mip0Depth
, firstMipInTail
,
3270 &mipWidth
, &mipHeight
, &tailMaxDepth
);
3272 offset
= blockSize
* PowTwoAlign(tailMaxDepth
, pOut
->blockSlices
) / pOut
->blockSlices
;
3273 macroBlkOffset
= blockSize
;
3276 for (INT_32 i
= firstMipInTail
- 1; i
>= 0; i
--)
3278 pOut
->pMipInfo
[i
].offset
= offset
;
3279 pOut
->pMipInfo
[i
].macroBlockOffset
= macroBlkOffset
;
3280 pOut
->pMipInfo
[i
].mipTailOffset
= 0;
3282 offset
+= mipSize
[i
];
3283 macroBlkOffset
+= mipSliceSize
[i
];
3286 UINT_32 pitch
= tailMaxDim
.w
;
3287 UINT_32 height
= tailMaxDim
.h
;
3288 UINT_32 depth
= isThin
? 1 : PowTwoAlign(tailMaxDepth
, Block256_3d
[index
].d
);
3290 tailMaxDepth
= isThin
? 1 : (depth
/ Block256_3d
[index
].d
);
3292 for (UINT_32 i
= firstMipInTail
; i
< pIn
->numMipLevels
; i
++)
3294 const UINT_32 m
= maxMipsInTail
- 1 - (i
- firstMipInTail
);
3295 const UINT_32 mipOffset
= (m
> 6) ? (16 << m
) : (m
<< 8);
3297 pOut
->pMipInfo
[i
].offset
= mipOffset
* tailMaxDepth
;
3298 pOut
->pMipInfo
[i
].mipTailOffset
= mipOffset
;
3299 pOut
->pMipInfo
[i
].macroBlockOffset
= 0;
3301 pOut
->pMipInfo
[i
].pitch
= pitch
;
3302 pOut
->pMipInfo
[i
].height
= height
;
3303 pOut
->pMipInfo
[i
].depth
= depth
;
3305 UINT_32 mipX
= ((mipOffset
>> 9) & 1) |
3306 ((mipOffset
>> 10) & 2) |
3307 ((mipOffset
>> 11) & 4) |
3308 ((mipOffset
>> 12) & 8) |
3309 ((mipOffset
>> 13) & 16) |
3310 ((mipOffset
>> 14) & 32);
3311 UINT_32 mipY
= ((mipOffset
>> 8) & 1) |
3312 ((mipOffset
>> 9) & 2) |
3313 ((mipOffset
>> 10) & 4) |
3314 ((mipOffset
>> 11) & 8) |
3315 ((mipOffset
>> 12) & 16) |
3316 ((mipOffset
>> 13) & 32);
3318 if (blockSizeLog2
& 1)
3320 const UINT_32 temp
= mipX
;
3326 mipY
= (mipY
<< 1) | (mipX
& 1);
3333 pOut
->pMipInfo
[i
].mipTailCoordX
= mipX
* Block256_2d
[index
].w
;
3334 pOut
->pMipInfo
[i
].mipTailCoordY
= mipY
* Block256_2d
[index
].h
;
3335 pOut
->pMipInfo
[i
].mipTailCoordZ
= 0;
3337 pitch
= Max(pitch
>> 1, Block256_2d
[index
].w
);
3338 height
= Max(height
>> 1, Block256_2d
[index
].h
);
3343 pOut
->pMipInfo
[i
].mipTailCoordX
= mipX
* Block256_3d
[index
].w
;
3344 pOut
->pMipInfo
[i
].mipTailCoordY
= mipY
* Block256_3d
[index
].h
;
3345 pOut
->pMipInfo
[i
].mipTailCoordZ
= 0;
3347 pitch
= Max(pitch
>> 1, Block256_3d
[index
].w
);
3348 height
= Max(height
>> 1, Block256_3d
[index
].h
);
3349 depth
= PowTwoAlign(Max(depth
>> 1, 1u), Block256_3d
[index
].d
);
3356 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->pitch
) * pOut
->height
* (pIn
->bpp
>> 3) * pIn
->numFrags
;
3357 pOut
->surfSize
= pOut
->sliceSize
* pOut
->numSlices
;
3359 if (pOut
->pMipInfo
!= NULL
)
3361 pOut
->pMipInfo
[0].pitch
= pOut
->pitch
;
3362 pOut
->pMipInfo
[0].height
= pOut
->height
;
3363 pOut
->pMipInfo
[0].depth
= IsTex3d(pIn
->resourceType
)? pOut
->numSlices
: 1;
3364 pOut
->pMipInfo
[0].offset
= 0;
3365 pOut
->pMipInfo
[0].mipTailOffset
= 0;
3366 pOut
->pMipInfo
[0].macroBlockOffset
= 0;
3367 pOut
->pMipInfo
[0].mipTailCoordX
= 0;
3368 pOut
->pMipInfo
[0].mipTailCoordY
= 0;
3369 pOut
->pMipInfo
[0].mipTailCoordZ
= 0;
3379 ************************************************************************************************************************
3380 * Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3383 * Internal function to calculate address from coord for tiled swizzle surface
3387 ************************************************************************************************************************
3389 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3390 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
3391 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
3394 ADDR_E_RETURNCODE ret
;
3396 if (IsBlock256b(pIn
->swizzleMode
))
3398 ret
= ComputeSurfaceAddrFromCoordMicroTiled(pIn
, pOut
);
3402 ret
= ComputeSurfaceAddrFromCoordMacroTiled(pIn
, pOut
);
3409 ************************************************************************************************************************
3410 * Gfx10Lib::ComputeOffsetFromEquation
3413 * Compute offset from equation
3417 ************************************************************************************************************************
3419 UINT_32
Gfx10Lib::ComputeOffsetFromEquation(
3420 const ADDR_EQUATION
* pEq
, ///< Equation
3421 UINT_32 x
, ///< x coord in bytes
3422 UINT_32 y
, ///< y coord in pixel
3423 UINT_32 z
///< z coord in slice
3428 for (UINT_32 i
= 0; i
< pEq
->numBits
; i
++)
3432 if (pEq
->addr
[i
].valid
)
3434 if (pEq
->addr
[i
].channel
== 0)
3436 v
^= (x
>> pEq
->addr
[i
].index
) & 1;
3438 else if (pEq
->addr
[i
].channel
== 1)
3440 v
^= (y
>> pEq
->addr
[i
].index
) & 1;
3444 ADDR_ASSERT(pEq
->addr
[i
].channel
== 2);
3445 v
^= (z
>> pEq
->addr
[i
].index
) & 1;
3449 if (pEq
->xor1
[i
].valid
)
3451 if (pEq
->xor1
[i
].channel
== 0)
3453 v
^= (x
>> pEq
->xor1
[i
].index
) & 1;
3455 else if (pEq
->xor1
[i
].channel
== 1)
3457 v
^= (y
>> pEq
->xor1
[i
].index
) & 1;
3461 ADDR_ASSERT(pEq
->xor1
[i
].channel
== 2);
3462 v
^= (z
>> pEq
->xor1
[i
].index
) & 1;
3466 if (pEq
->xor2
[i
].valid
)
3468 if (pEq
->xor2
[i
].channel
== 0)
3470 v
^= (x
>> pEq
->xor2
[i
].index
) & 1;
3472 else if (pEq
->xor2
[i
].channel
== 1)
3474 v
^= (y
>> pEq
->xor2
[i
].index
) & 1;
3478 ADDR_ASSERT(pEq
->xor2
[i
].channel
== 2);
3479 v
^= (z
>> pEq
->xor2
[i
].index
) & 1;
3490 ************************************************************************************************************************
3491 * Gfx10Lib::ComputeOffsetFromSwizzlePattern
3494 * Compute offset from swizzle pattern
3498 ************************************************************************************************************************
3500 UINT_32
Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3501 const UINT_64
* pPattern
, ///< Swizzle pattern
3502 UINT_32 numBits
, ///< Number of bits in pattern
3503 UINT_32 x
, ///< x coord in pixel
3504 UINT_32 y
, ///< y coord in pixel
3505 UINT_32 z
, ///< z coord in slice
3506 UINT_32 s
///< sample id
3510 const ADDR_BIT_SETTING
* pSwizzlePattern
= reinterpret_cast<const ADDR_BIT_SETTING
*>(pPattern
);
3512 for (UINT_32 i
= 0; i
< numBits
; i
++)
3516 if (pSwizzlePattern
[i
].x
!= 0)
3518 UINT_16 mask
= pSwizzlePattern
[i
].x
;
3533 if (pSwizzlePattern
[i
].y
!= 0)
3535 UINT_16 mask
= pSwizzlePattern
[i
].y
;
3550 if (pSwizzlePattern
[i
].z
!= 0)
3552 UINT_16 mask
= pSwizzlePattern
[i
].z
;
3567 if (pSwizzlePattern
[i
].s
!= 0)
3569 UINT_16 mask
= pSwizzlePattern
[i
].s
;
3591 ************************************************************************************************************************
3592 * Gfx10Lib::GetSwizzlePatternInfo
3595 * Get swizzle pattern
3598 * Swizzle pattern information
3599 ************************************************************************************************************************
3601 const ADDR_SW_PATINFO
* Gfx10Lib::GetSwizzlePatternInfo(
3602 AddrSwizzleMode swizzleMode
, ///< Swizzle mode
3603 AddrResourceType resourceType
, ///< Resource type
3604 UINT_32 elemLog2
, ///< Element size in bytes log2
3605 UINT_32 numFrag
///< Number of fragment
3608 const UINT_32 index
= IsXor(swizzleMode
) ? (m_colorBaseIndex
+ elemLog2
) : elemLog2
;
3609 const ADDR_SW_PATINFO
* patInfo
= NULL
;
3610 const UINT_32 swizzleMask
= 1 << swizzleMode
;
3612 if (IsLinear(swizzleMode
) == FALSE
)
3614 if (IsBlockVariable(swizzleMode
))
3616 if (m_blockVarSizeLog2
!= 0)
3618 ADDR_ASSERT(m_settings
.supportRbPlus
);
3620 if (IsRtOptSwizzle(swizzleMode
))
3624 patInfo
= SW_VAR_R_X_1xaa_RBPLUS_PATINFO
;
3626 else if (numFrag
== 2)
3628 patInfo
= SW_VAR_R_X_2xaa_RBPLUS_PATINFO
;
3630 else if (numFrag
== 4)
3632 patInfo
= SW_VAR_R_X_4xaa_RBPLUS_PATINFO
;
3636 ADDR_ASSERT(numFrag
== 8);
3637 patInfo
= SW_VAR_R_X_8xaa_RBPLUS_PATINFO
;
3640 else if (IsZOrderSwizzle(swizzleMode
))
3644 patInfo
= SW_VAR_Z_X_1xaa_RBPLUS_PATINFO
;
3646 else if (numFrag
== 2)
3648 patInfo
= SW_VAR_Z_X_2xaa_RBPLUS_PATINFO
;
3650 else if (numFrag
== 4)
3652 patInfo
= SW_VAR_Z_X_4xaa_RBPLUS_PATINFO
;
3656 ADDR_ASSERT(numFrag
== 8);
3657 patInfo
= SW_VAR_Z_X_8xaa_RBPLUS_PATINFO
;
3662 else if (resourceType
== ADDR_RSRC_TEX_3D
)
3664 ADDR_ASSERT(numFrag
== 1);
3666 if ((swizzleMask
& Gfx10Rsrc3dSwModeMask
) != 0)
3668 if (IsRtOptSwizzle(swizzleMode
))
3670 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_1xaa_RBPLUS_PATINFO
: SW_64K_R_X_1xaa_PATINFO
;
3672 else if (IsZOrderSwizzle(swizzleMode
))
3674 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_1xaa_RBPLUS_PATINFO
: SW_64K_Z_X_1xaa_PATINFO
;
3676 else if (IsDisplaySwizzle(resourceType
, swizzleMode
))
3678 ADDR_ASSERT(swizzleMode
== ADDR_SW_64KB_D_X
);
3679 patInfo
= m_settings
.supportRbPlus
? SW_64K_D3_X_RBPLUS_PATINFO
: SW_64K_D3_X_PATINFO
;
3683 ADDR_ASSERT(IsStandardSwizzle(resourceType
, swizzleMode
));
3685 if (IsBlock4kb(swizzleMode
))
3687 if (swizzleMode
== ADDR_SW_4KB_S
)
3689 patInfo
= m_settings
.supportRbPlus
? SW_4K_S3_RBPLUS_PATINFO
: SW_4K_S3_PATINFO
;
3693 ADDR_ASSERT(swizzleMode
== ADDR_SW_4KB_S_X
);
3694 patInfo
= m_settings
.supportRbPlus
? SW_4K_S3_X_RBPLUS_PATINFO
: SW_4K_S3_X_PATINFO
;
3699 if (swizzleMode
== ADDR_SW_64KB_S
)
3701 patInfo
= m_settings
.supportRbPlus
? SW_64K_S3_RBPLUS_PATINFO
: SW_64K_S3_PATINFO
;
3703 else if (swizzleMode
== ADDR_SW_64KB_S_X
)
3705 patInfo
= m_settings
.supportRbPlus
? SW_64K_S3_X_RBPLUS_PATINFO
: SW_64K_S3_X_PATINFO
;
3709 ADDR_ASSERT(swizzleMode
== ADDR_SW_64KB_S_T
);
3710 patInfo
= m_settings
.supportRbPlus
? SW_64K_S3_T_RBPLUS_PATINFO
: SW_64K_S3_T_PATINFO
;
3718 if ((swizzleMask
& Gfx10Rsrc2dSwModeMask
) != 0)
3720 if (IsBlock256b(swizzleMode
))
3722 if (swizzleMode
== ADDR_SW_256B_S
)
3724 patInfo
= m_settings
.supportRbPlus
? SW_256_S_RBPLUS_PATINFO
: SW_256_S_PATINFO
;
3728 ADDR_ASSERT(swizzleMode
== ADDR_SW_256B_D
);
3729 patInfo
= m_settings
.supportRbPlus
? SW_256_D_RBPLUS_PATINFO
: SW_256_D_PATINFO
;
3732 else if (IsBlock4kb(swizzleMode
))
3734 if (IsStandardSwizzle(resourceType
, swizzleMode
))
3736 if (swizzleMode
== ADDR_SW_4KB_S
)
3738 patInfo
= m_settings
.supportRbPlus
? SW_4K_S_RBPLUS_PATINFO
: SW_4K_S_PATINFO
;
3742 ADDR_ASSERT(swizzleMode
== ADDR_SW_4KB_S_X
);
3743 patInfo
= m_settings
.supportRbPlus
? SW_4K_S_X_RBPLUS_PATINFO
: SW_4K_S_X_PATINFO
;
3748 if (swizzleMode
== ADDR_SW_4KB_D
)
3750 patInfo
= m_settings
.supportRbPlus
? SW_4K_D_RBPLUS_PATINFO
: SW_4K_D_PATINFO
;
3754 ADDR_ASSERT(swizzleMode
== ADDR_SW_4KB_D_X
);
3755 patInfo
= m_settings
.supportRbPlus
? SW_4K_D_X_RBPLUS_PATINFO
: SW_4K_D_X_PATINFO
;
3761 if (IsRtOptSwizzle(swizzleMode
))
3765 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_1xaa_RBPLUS_PATINFO
: SW_64K_R_X_1xaa_PATINFO
;
3767 else if (numFrag
== 2)
3769 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_2xaa_RBPLUS_PATINFO
: SW_64K_R_X_2xaa_PATINFO
;
3771 else if (numFrag
== 4)
3773 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_4xaa_RBPLUS_PATINFO
: SW_64K_R_X_4xaa_PATINFO
;
3777 ADDR_ASSERT(numFrag
== 8);
3778 patInfo
= m_settings
.supportRbPlus
? SW_64K_R_X_8xaa_RBPLUS_PATINFO
: SW_64K_R_X_8xaa_PATINFO
;
3781 else if (IsZOrderSwizzle(swizzleMode
))
3785 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_1xaa_RBPLUS_PATINFO
: SW_64K_Z_X_1xaa_PATINFO
;
3787 else if (numFrag
== 2)
3789 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_2xaa_RBPLUS_PATINFO
: SW_64K_Z_X_2xaa_PATINFO
;
3791 else if (numFrag
== 4)
3793 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_4xaa_RBPLUS_PATINFO
: SW_64K_Z_X_4xaa_PATINFO
;
3797 ADDR_ASSERT(numFrag
== 8);
3798 patInfo
= m_settings
.supportRbPlus
? SW_64K_Z_X_8xaa_RBPLUS_PATINFO
: SW_64K_Z_X_8xaa_PATINFO
;
3801 else if (IsDisplaySwizzle(resourceType
, swizzleMode
))
3803 if (swizzleMode
== ADDR_SW_64KB_D
)
3805 patInfo
= m_settings
.supportRbPlus
? SW_64K_D_RBPLUS_PATINFO
: SW_64K_D_PATINFO
;
3807 else if (swizzleMode
== ADDR_SW_64KB_D_X
)
3809 patInfo
= m_settings
.supportRbPlus
? SW_64K_D_X_RBPLUS_PATINFO
: SW_64K_D_X_PATINFO
;
3813 ADDR_ASSERT(swizzleMode
== ADDR_SW_64KB_D_T
);
3814 patInfo
= m_settings
.supportRbPlus
? SW_64K_D_T_RBPLUS_PATINFO
: SW_64K_D_T_PATINFO
;
3819 if (swizzleMode
== ADDR_SW_64KB_S
)
3821 patInfo
= m_settings
.supportRbPlus
? SW_64K_S_RBPLUS_PATINFO
: SW_64K_S_PATINFO
;
3823 else if (swizzleMode
== ADDR_SW_64KB_S_X
)
3825 patInfo
= m_settings
.supportRbPlus
? SW_64K_S_X_RBPLUS_PATINFO
: SW_64K_S_X_PATINFO
;
3829 ADDR_ASSERT(swizzleMode
== ADDR_SW_64KB_S_T
);
3830 patInfo
= m_settings
.supportRbPlus
? SW_64K_S_T_RBPLUS_PATINFO
: SW_64K_S_T_PATINFO
;
3838 return (patInfo
!= NULL
) ? &patInfo
[index
] : NULL
;
3842 ************************************************************************************************************************
3843 * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
3846 * Internal function to calculate address from coord for micro tiled swizzle surface
3850 ************************************************************************************************************************
3852 ADDR_E_RETURNCODE
Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
3853 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
3854 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
3857 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {0};
3858 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut
= {0};
3859 ADDR2_MIP_INFO mipInfo
[MaxMipLevels
];
3861 localIn
.swizzleMode
= pIn
->swizzleMode
;
3862 localIn
.flags
= pIn
->flags
;
3863 localIn
.resourceType
= pIn
->resourceType
;
3864 localIn
.bpp
= pIn
->bpp
;
3865 localIn
.width
= Max(pIn
->unalignedWidth
, 1u);
3866 localIn
.height
= Max(pIn
->unalignedHeight
, 1u);
3867 localIn
.numSlices
= Max(pIn
->numSlices
, 1u);
3868 localIn
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
3869 localIn
.numSamples
= Max(pIn
->numSamples
, 1u);
3870 localIn
.numFrags
= Max(pIn
->numFrags
, 1u);
3871 localOut
.pMipInfo
= mipInfo
;
3873 ADDR_E_RETURNCODE ret
= ComputeSurfaceInfoMicroTiled(&localIn
, &localOut
);
3877 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
3878 const UINT_32 rsrcType
= static_cast<UINT_32
>(pIn
->resourceType
) - 1;
3879 const UINT_32 swMode
= static_cast<UINT_32
>(pIn
->swizzleMode
);
3880 const UINT_32 eqIndex
= m_equationLookupTable
[rsrcType
][swMode
][elemLog2
];
3882 if (eqIndex
!= ADDR_INVALID_EQUATION_INDEX
)
3884 const UINT_32 pb
= mipInfo
[pIn
->mipId
].pitch
/ localOut
.blockWidth
;
3885 const UINT_32 yb
= pIn
->y
/ localOut
.blockHeight
;
3886 const UINT_32 xb
= pIn
->x
/ localOut
.blockWidth
;
3887 const UINT_32 blockIndex
= yb
* pb
+ xb
;
3888 const UINT_32 blockSize
= 256;
3889 const UINT_32 blk256Offset
= ComputeOffsetFromEquation(&m_equationTable
[eqIndex
],
3893 pOut
->addr
= localOut
.sliceSize
* pIn
->slice
+
3894 mipInfo
[pIn
->mipId
].macroBlockOffset
+
3895 (blockIndex
* blockSize
) +
3900 ret
= ADDR_INVALIDPARAMS
;
3908 ************************************************************************************************************************
3909 * Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
3912 * Internal function to calculate address from coord for macro tiled swizzle surface
3916 ************************************************************************************************************************
3918 ADDR_E_RETURNCODE
Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
3919 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
3920 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
3923 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {0};
3924 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut
= {0};
3925 ADDR2_MIP_INFO mipInfo
[MaxMipLevels
];
3927 localIn
.swizzleMode
= pIn
->swizzleMode
;
3928 localIn
.flags
= pIn
->flags
;
3929 localIn
.resourceType
= pIn
->resourceType
;
3930 localIn
.bpp
= pIn
->bpp
;
3931 localIn
.width
= Max(pIn
->unalignedWidth
, 1u);
3932 localIn
.height
= Max(pIn
->unalignedHeight
, 1u);
3933 localIn
.numSlices
= Max(pIn
->numSlices
, 1u);
3934 localIn
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
3935 localIn
.numSamples
= Max(pIn
->numSamples
, 1u);
3936 localIn
.numFrags
= Max(pIn
->numFrags
, 1u);
3937 localOut
.pMipInfo
= mipInfo
;
3939 ADDR_E_RETURNCODE ret
= ComputeSurfaceInfoMacroTiled(&localIn
, &localOut
);
3943 const UINT_32 elemLog2
= Log2(pIn
->bpp
>> 3);
3944 const UINT_32 blkSizeLog2
= GetBlockSizeLog2(pIn
->swizzleMode
);
3945 const UINT_32 blkMask
= (1 << blkSizeLog2
) - 1;
3946 const UINT_32 pipeMask
= (1 << m_pipesLog2
) - 1;
3947 const UINT_32 bankMask
= ((1 << GetBankXorBits(blkSizeLog2
)) - 1) << (m_pipesLog2
+ ColumnBits
);
3948 const UINT_32 pipeBankXor
= IsXor(pIn
->swizzleMode
) ?
3949 (((pIn
->pipeBankXor
& (pipeMask
| bankMask
)) << m_pipeInterleaveLog2
) & blkMask
) : 0;
3951 if (localIn
.numFrags
> 1)
3953 const ADDR_SW_PATINFO
* pPatInfo
= GetSwizzlePatternInfo(pIn
->swizzleMode
,
3958 if (pPatInfo
!= NULL
)
3960 const UINT_32 pb
= localOut
.pitch
/ localOut
.blockWidth
;
3961 const UINT_32 yb
= pIn
->y
/ localOut
.blockHeight
;
3962 const UINT_32 xb
= pIn
->x
/ localOut
.blockWidth
;
3963 const UINT_64 blkIdx
= yb
* pb
+ xb
;
3965 ADDR_BIT_SETTING fullSwizzlePattern
[20];
3966 GetSwizzlePatternFromPatternInfo(pPatInfo
, fullSwizzlePattern
);
3968 const UINT_32 blkOffset
=
3969 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64
*>(fullSwizzlePattern
),
3976 pOut
->addr
= (localOut
.sliceSize
* pIn
->slice
) +
3977 (blkIdx
<< blkSizeLog2
) +
3978 (blkOffset
^ pipeBankXor
);
3982 ret
= ADDR_INVALIDPARAMS
;
3987 const UINT_32 rsrcIdx
= (pIn
->resourceType
== ADDR_RSRC_TEX_3D
) ? 1 : 0;
3988 const UINT_32 swMode
= static_cast<UINT_32
>(pIn
->swizzleMode
);
3989 const UINT_32 eqIndex
= m_equationLookupTable
[rsrcIdx
][swMode
][elemLog2
];
3991 if (eqIndex
!= ADDR_INVALID_EQUATION_INDEX
)
3993 const BOOL_32 inTail
= (mipInfo
[pIn
->mipId
].mipTailOffset
!= 0) ? TRUE
: FALSE
;
3994 const BOOL_32 isThin
= IsThin(pIn
->resourceType
, pIn
->swizzleMode
);
3995 const UINT_64 sliceSize
= isThin
? localOut
.sliceSize
: (localOut
.sliceSize
* localOut
.blockSlices
);
3996 const UINT_32 sliceId
= isThin
? pIn
->slice
: (pIn
->slice
/ localOut
.blockSlices
);
3997 const UINT_32 x
= inTail
? (pIn
->x
+ mipInfo
[pIn
->mipId
].mipTailCoordX
) : pIn
->x
;
3998 const UINT_32 y
= inTail
? (pIn
->y
+ mipInfo
[pIn
->mipId
].mipTailCoordY
) : pIn
->y
;
3999 const UINT_32 z
= inTail
? (pIn
->slice
+ mipInfo
[pIn
->mipId
].mipTailCoordZ
) : pIn
->slice
;
4000 const UINT_32 pb
= mipInfo
[pIn
->mipId
].pitch
/ localOut
.blockWidth
;
4001 const UINT_32 yb
= pIn
->y
/ localOut
.blockHeight
;
4002 const UINT_32 xb
= pIn
->x
/ localOut
.blockWidth
;
4003 const UINT_64 blkIdx
= yb
* pb
+ xb
;
4004 const UINT_32 blkOffset
= ComputeOffsetFromEquation(&m_equationTable
[eqIndex
],
4008 pOut
->addr
= sliceSize
* sliceId
+
4009 mipInfo
[pIn
->mipId
].macroBlockOffset
+
4010 (blkIdx
<< blkSizeLog2
) +
4011 (blkOffset
^ pipeBankXor
);
4015 ret
= ADDR_INVALIDPARAMS
;
4024 ************************************************************************************************************************
4025 * Gfx10Lib::HwlComputeMaxBaseAlignments
4028 * Gets maximum alignments
4030 * maximum alignments
4031 ************************************************************************************************************************
4033 UINT_32
Gfx10Lib::HwlComputeMaxBaseAlignments() const
4035 return m_blockVarSizeLog2
? Max(Size64K
, 1u << m_blockVarSizeLog2
) : Size64K
;
4039 ************************************************************************************************************************
4040 * Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4043 * Gets maximum alignments for metadata
4045 * maximum alignments for metadata
4046 ************************************************************************************************************************
4048 UINT_32
Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4052 const AddrSwizzleMode ValidSwizzleModeForXmask
[] =
4055 m_blockVarSizeLog2
? ADDR_SW_VAR_Z_X
: ADDR_SW_64KB_Z_X
,
4058 UINT_32 maxBaseAlignHtile
= 0;
4059 UINT_32 maxBaseAlignCmask
= 0;
4061 for (UINT_32 swIdx
= 0; swIdx
< sizeof(ValidSwizzleModeForXmask
) / sizeof(ValidSwizzleModeForXmask
[0]); swIdx
++)
4063 for (UINT_32 bppLog2
= 0; bppLog2
< 3; bppLog2
++)
4065 for (UINT_32 numFragLog2
= 0; numFragLog2
< 4; numFragLog2
++)
4067 // Max base alignment for Htile
4068 const UINT_32 metaBlkSizeHtile
= GetMetaBlkSize(Gfx10DataDepthStencil
,
4070 ValidSwizzleModeForXmask
[swIdx
],
4076 maxBaseAlignHtile
= Max(maxBaseAlignHtile
, metaBlkSizeHtile
);
4080 // Max base alignment for Cmask
4081 const UINT_32 metaBlkSizeCmask
= GetMetaBlkSize(Gfx10DataFmask
,
4083 ValidSwizzleModeForXmask
[swIdx
],
4089 maxBaseAlignCmask
= Max(maxBaseAlignCmask
, metaBlkSizeCmask
);
4092 // Max base alignment for 2D Dcc
4093 const AddrSwizzleMode ValidSwizzleModeForDcc2D
[] =
4098 m_blockVarSizeLog2
? ADDR_SW_VAR_R_X
: ADDR_SW_64KB_R_X
,
4101 UINT_32 maxBaseAlignDcc2D
= 0;
4103 for (UINT_32 swIdx
= 0; swIdx
< sizeof(ValidSwizzleModeForDcc2D
) / sizeof(ValidSwizzleModeForDcc2D
[0]); swIdx
++)
4105 for (UINT_32 bppLog2
= 0; bppLog2
< MaxNumOfBpp
; bppLog2
++)
4107 for (UINT_32 numFragLog2
= 0; numFragLog2
< 4; numFragLog2
++)
4109 const UINT_32 metaBlkSize2D
= GetMetaBlkSize(Gfx10DataColor
,
4111 ValidSwizzleModeForDcc2D
[swIdx
],
4117 maxBaseAlignDcc2D
= Max(maxBaseAlignDcc2D
, metaBlkSize2D
);
4122 // Max base alignment for 3D Dcc
4123 const AddrSwizzleMode ValidSwizzleModeForDcc3D
[] =
4129 m_blockVarSizeLog2
? ADDR_SW_VAR_R_X
: ADDR_SW_64KB_R_X
,
4132 UINT_32 maxBaseAlignDcc3D
= 0;
4134 for (UINT_32 swIdx
= 0; swIdx
< sizeof(ValidSwizzleModeForDcc3D
) / sizeof(ValidSwizzleModeForDcc3D
[0]); swIdx
++)
4136 for (UINT_32 bppLog2
= 0; bppLog2
< MaxNumOfBpp
; bppLog2
++)
4138 const UINT_32 metaBlkSize3D
= GetMetaBlkSize(Gfx10DataColor
,
4140 ValidSwizzleModeForDcc3D
[swIdx
],
4146 maxBaseAlignDcc3D
= Max(maxBaseAlignDcc3D
, metaBlkSize3D
);
4150 return Max(Max(maxBaseAlignHtile
, maxBaseAlignCmask
), Max(maxBaseAlignDcc2D
, maxBaseAlignDcc3D
));
4154 ************************************************************************************************************************
4155 * Gfx10Lib::GetMetaElementSizeLog2
4158 * Gets meta data element size log2
4160 * Meta data element size log2
4161 ************************************************************************************************************************
4163 INT_32
Gfx10Lib::GetMetaElementSizeLog2(
4164 Gfx10DataType dataType
) ///< Data surface type
4166 INT_32 elemSizeLog2
= 0;
4168 if (dataType
== Gfx10DataColor
)
4172 else if (dataType
== Gfx10DataDepthStencil
)
4178 ADDR_ASSERT(dataType
== Gfx10DataFmask
);
4182 return elemSizeLog2
;
4186 ************************************************************************************************************************
4187 * Gfx10Lib::GetMetaCacheSizeLog2
4190 * Gets meta data cache line size log2
4192 * Meta data cache line size log2
4193 ************************************************************************************************************************
4195 INT_32
Gfx10Lib::GetMetaCacheSizeLog2(
4196 Gfx10DataType dataType
) ///< Data surface type
4198 INT_32 cacheSizeLog2
= 0;
4200 if (dataType
== Gfx10DataColor
)
4204 else if (dataType
== Gfx10DataDepthStencil
)
4210 ADDR_ASSERT(dataType
== Gfx10DataFmask
);
4213 return cacheSizeLog2
;
4217 ************************************************************************************************************************
4218 * Gfx10Lib::HwlComputeSurfaceInfoLinear
4221 * Internal function to calculate alignment for linear surface
4225 ************************************************************************************************************************
4227 ADDR_E_RETURNCODE
Gfx10Lib::HwlComputeSurfaceInfoLinear(
4228 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
4229 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
4232 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
4234 if (IsTex1d(pIn
->resourceType
) && (pIn
->height
> 1))
4236 returnCode
= ADDR_INVALIDPARAMS
;
4240 const UINT_32 elementBytes
= pIn
->bpp
>> 3;
4241 const UINT_32 pitchAlign
= (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
) ? 1 : (256 / elementBytes
);
4242 const UINT_32 mipDepth
= (pIn
->resourceType
== ADDR_RSRC_TEX_3D
) ? pIn
->numSlices
: 1;
4243 UINT_32 pitch
= PowTwoAlign(pIn
->width
, pitchAlign
);
4244 UINT_32 actualHeight
= pIn
->height
;
4245 UINT_64 sliceSize
= 0;
4247 if (pIn
->numMipLevels
> 1)
4249 for (INT_32 i
= static_cast<INT_32
>(pIn
->numMipLevels
) - 1; i
>= 0; i
--)
4251 UINT_32 mipWidth
, mipHeight
;
4253 GetMipSize(pIn
->width
, pIn
->height
, 1, i
, &mipWidth
, &mipHeight
);
4255 const UINT_32 mipActualWidth
= PowTwoAlign(mipWidth
, pitchAlign
);
4257 if (pOut
->pMipInfo
!= NULL
)
4259 pOut
->pMipInfo
[i
].pitch
= mipActualWidth
;
4260 pOut
->pMipInfo
[i
].height
= mipHeight
;
4261 pOut
->pMipInfo
[i
].depth
= mipDepth
;
4262 pOut
->pMipInfo
[i
].offset
= sliceSize
;
4263 pOut
->pMipInfo
[i
].mipTailOffset
= 0;
4264 pOut
->pMipInfo
[i
].macroBlockOffset
= sliceSize
;
4267 sliceSize
+= static_cast<UINT_64
>(mipActualWidth
) * mipHeight
* elementBytes
;
4272 returnCode
= ApplyCustomizedPitchHeight(pIn
, elementBytes
, pitchAlign
, &pitch
, &actualHeight
);
4274 if (returnCode
== ADDR_OK
)
4276 sliceSize
= static_cast<UINT_64
>(pitch
) * actualHeight
* elementBytes
;
4278 if (pOut
->pMipInfo
!= NULL
)
4280 pOut
->pMipInfo
[0].pitch
= pitch
;
4281 pOut
->pMipInfo
[0].height
= actualHeight
;
4282 pOut
->pMipInfo
[0].depth
= mipDepth
;
4283 pOut
->pMipInfo
[0].offset
= 0;
4284 pOut
->pMipInfo
[0].mipTailOffset
= 0;
4285 pOut
->pMipInfo
[0].macroBlockOffset
= 0;
4290 if (returnCode
== ADDR_OK
)
4292 pOut
->pitch
= pitch
;
4293 pOut
->height
= actualHeight
;
4294 pOut
->numSlices
= pIn
->numSlices
;
4295 pOut
->sliceSize
= sliceSize
;
4296 pOut
->surfSize
= sliceSize
* pOut
->numSlices
;
4297 pOut
->baseAlign
= (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
) ? elementBytes
: 256;
4298 pOut
->blockWidth
= pitchAlign
;
4299 pOut
->blockHeight
= 1;
4300 pOut
->blockSlices
= 1;
4302 // Following members are useless on GFX10
4303 pOut
->mipChainPitch
= 0;
4304 pOut
->mipChainHeight
= 0;
4305 pOut
->mipChainSlice
= 0;
4306 pOut
->epitchIsHeight
= FALSE
;
4308 // Post calculation validate
4309 ADDR_ASSERT(pOut
->sliceSize
> 0);