2 * Copyright © 2017 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
34 #include "gfx9addrlib.h"
36 #include "gfx9_gb_reg.h"
38 #include "amdgpu_asic_addr.h"
40 #include "util/macros.h"
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
49 ************************************************************************************************************************
53 * Creates an Gfx9Lib object.
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
59 Addr::Lib
* Gfx9HwlInit(const Client
* pClient
)
61 return V2::Gfx9Lib::CreateObj(pClient
);
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
71 const SwizzleModeFlags
Gfx9Lib::SwizzleModeTable
[ADDR_SW_MAX_TYPE
] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
88 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
89 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
90 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
91 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
108 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
109 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
110 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
111 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
115 const UINT_32
Gfx9Lib::MipTailOffset256B
[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
116 8, 6, 5, 4, 3, 2, 1, 0};
118 const Dim3d
Gfx9Lib::Block256_3dS
[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
120 const Dim3d
Gfx9Lib::Block256_3dZ
[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
123 ************************************************************************************************************************
129 ************************************************************************************************************************
131 Gfx9Lib::Gfx9Lib(const Client
* pClient
)
136 m_class
= AI_ADDRLIB
;
137 memset(&m_settings
, 0, sizeof(m_settings
));
138 memcpy(m_swizzleModeTable
, SwizzleModeTable
, sizeof(SwizzleModeTable
));
142 ************************************************************************************************************************
147 ************************************************************************************************************************
154 ************************************************************************************************************************
155 * Gfx9Lib::HwlComputeHtileInfo
158 * Interface function stub of AddrComputeHtilenfo
162 ************************************************************************************************************************
164 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileInfo(
165 const ADDR2_COMPUTE_HTILE_INFO_INPUT
* pIn
, ///< [in] input structure
166 ADDR2_COMPUTE_HTILE_INFO_OUTPUT
* pOut
///< [out] output structure
169 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
172 UINT_32 numRbTotal
= pIn
->hTileFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
174 UINT_32 numCompressBlkPerMetaBlk
, numCompressBlkPerMetaBlkLog2
;
176 if ((numPipeTotal
== 1) && (numRbTotal
== 1))
178 numCompressBlkPerMetaBlkLog2
= 10;
182 if (m_settings
.applyAliasFix
)
184 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ Max(10u, m_pipeInterleaveLog2
);
188 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ 10;
192 numCompressBlkPerMetaBlk
= 1 << numCompressBlkPerMetaBlkLog2
;
194 Dim3d metaBlkDim
= {8, 8, 1};
195 UINT_32 totalAmpBits
= numCompressBlkPerMetaBlkLog2
;
196 UINT_32 widthAmp
= (pIn
->numMipLevels
> 1) ? (totalAmpBits
>> 1) : RoundHalf(totalAmpBits
);
197 UINT_32 heightAmp
= totalAmpBits
- widthAmp
;
198 metaBlkDim
.w
<<= widthAmp
;
199 metaBlkDim
.h
<<= heightAmp
;
202 Dim3d metaBlkDimDbg
= {8, 8, 1};
203 for (UINT_32 index
= 0; index
< numCompressBlkPerMetaBlkLog2
; index
++)
205 if ((metaBlkDimDbg
.h
< metaBlkDimDbg
.w
) ||
206 ((pIn
->numMipLevels
> 1) && (metaBlkDimDbg
.h
== metaBlkDimDbg
.w
)))
208 metaBlkDimDbg
.h
<<= 1;
212 metaBlkDimDbg
.w
<<= 1;
215 ADDR_ASSERT((metaBlkDimDbg
.w
== metaBlkDim
.w
) && (metaBlkDimDbg
.h
== metaBlkDim
.h
));
222 GetMetaMipInfo(pIn
->numMipLevels
, &metaBlkDim
, FALSE
, pOut
->pMipInfo
,
223 pIn
->unalignedWidth
, pIn
->unalignedHeight
, pIn
->numSlices
,
224 &numMetaBlkX
, &numMetaBlkY
, &numMetaBlkZ
);
226 const UINT_32 metaBlkSize
= numCompressBlkPerMetaBlk
<< 2;
227 UINT_32 align
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
229 if ((IsXor(pIn
->swizzleMode
) == FALSE
) && (numPipeTotal
> 2))
231 align
*= (numPipeTotal
>> 1);
234 align
= Max(align
, metaBlkSize
);
236 if (m_settings
.metaBaseAlignFix
)
238 align
= Max(align
, GetBlockSize(pIn
->swizzleMode
));
241 if (m_settings
.htileAlignFix
)
243 const INT_32 metaBlkSizeLog2
= numCompressBlkPerMetaBlkLog2
+ 2;
244 const INT_32 htileCachelineSizeLog2
= 11;
245 const INT_32 maxNumOfRbMaskBits
= 1 + Log2(numPipeTotal
) + Log2(numRbTotal
);
247 INT_32 rbMaskPadding
= Max(0, htileCachelineSizeLog2
- (metaBlkSizeLog2
- maxNumOfRbMaskBits
));
249 align
<<= rbMaskPadding
;
252 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
253 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
254 pOut
->sliceSize
= numMetaBlkX
* numMetaBlkY
* metaBlkSize
;
256 pOut
->metaBlkWidth
= metaBlkDim
.w
;
257 pOut
->metaBlkHeight
= metaBlkDim
.h
;
258 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
260 pOut
->baseAlign
= align
;
261 pOut
->htileBytes
= PowTwoAlign(pOut
->sliceSize
* numMetaBlkZ
, align
);
267 ************************************************************************************************************************
268 * Gfx9Lib::HwlComputeCmaskInfo
271 * Interface function stub of AddrComputeCmaskInfo
275 ************************************************************************************************************************
277 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeCmaskInfo(
278 const ADDR2_COMPUTE_CMASK_INFO_INPUT
* pIn
, ///< [in] input structure
279 ADDR2_COMPUTE_CMASK_INFO_OUTPUT
* pOut
///< [out] output structure
282 // TODO: Clarify with AddrLib team
283 // ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
285 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pIn
->cMaskFlags
.pipeAligned
,
288 UINT_32 numRbTotal
= pIn
->cMaskFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
290 UINT_32 numCompressBlkPerMetaBlkLog2
, numCompressBlkPerMetaBlk
;
292 if ((numPipeTotal
== 1) && (numRbTotal
== 1))
294 numCompressBlkPerMetaBlkLog2
= 13;
298 if (m_settings
.applyAliasFix
)
300 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ Max(10u, m_pipeInterleaveLog2
);
304 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ 10;
307 numCompressBlkPerMetaBlkLog2
= Max(numCompressBlkPerMetaBlkLog2
, 13u);
310 numCompressBlkPerMetaBlk
= 1 << numCompressBlkPerMetaBlkLog2
;
312 Dim2d metaBlkDim
= {8, 8};
313 UINT_32 totalAmpBits
= numCompressBlkPerMetaBlkLog2
;
314 UINT_32 heightAmp
= totalAmpBits
>> 1;
315 UINT_32 widthAmp
= totalAmpBits
- heightAmp
;
316 metaBlkDim
.w
<<= widthAmp
;
317 metaBlkDim
.h
<<= heightAmp
;
320 Dim2d metaBlkDimDbg
= {8, 8};
321 for (UINT_32 index
= 0; index
< numCompressBlkPerMetaBlkLog2
; index
++)
323 if (metaBlkDimDbg
.h
< metaBlkDimDbg
.w
)
325 metaBlkDimDbg
.h
<<= 1;
329 metaBlkDimDbg
.w
<<= 1;
332 ADDR_ASSERT((metaBlkDimDbg
.w
== metaBlkDim
.w
) && (metaBlkDimDbg
.h
== metaBlkDim
.h
));
335 UINT_32 numMetaBlkX
= (pIn
->unalignedWidth
+ metaBlkDim
.w
- 1) / metaBlkDim
.w
;
336 UINT_32 numMetaBlkY
= (pIn
->unalignedHeight
+ metaBlkDim
.h
- 1) / metaBlkDim
.h
;
337 UINT_32 numMetaBlkZ
= Max(pIn
->numSlices
, 1u);
339 UINT_32 sizeAlign
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
341 if (m_settings
.metaBaseAlignFix
)
343 sizeAlign
= Max(sizeAlign
, GetBlockSize(pIn
->swizzleMode
));
346 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
347 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
348 pOut
->sliceSize
= (numMetaBlkX
* numMetaBlkY
* numCompressBlkPerMetaBlk
) >> 1;
349 pOut
->cmaskBytes
= PowTwoAlign(pOut
->sliceSize
* numMetaBlkZ
, sizeAlign
);
350 pOut
->baseAlign
= Max(numCompressBlkPerMetaBlk
>> 1, sizeAlign
);
352 pOut
->metaBlkWidth
= metaBlkDim
.w
;
353 pOut
->metaBlkHeight
= metaBlkDim
.h
;
355 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
361 ************************************************************************************************************************
362 * Gfx9Lib::GetMetaMipInfo
369 ************************************************************************************************************************
371 VOID
Gfx9Lib::GetMetaMipInfo(
372 UINT_32 numMipLevels
, ///< [in] number of mip levels
373 Dim3d
* pMetaBlkDim
, ///< [in] meta block dimension
374 BOOL_32 dataThick
, ///< [in] data surface is thick
375 ADDR2_META_MIP_INFO
* pInfo
, ///< [out] meta mip info
376 UINT_32 mip0Width
, ///< [in] mip0 width
377 UINT_32 mip0Height
, ///< [in] mip0 height
378 UINT_32 mip0Depth
, ///< [in] mip0 depth
379 UINT_32
* pNumMetaBlkX
, ///< [out] number of metablock X in mipchain
380 UINT_32
* pNumMetaBlkY
, ///< [out] number of metablock Y in mipchain
381 UINT_32
* pNumMetaBlkZ
) ///< [out] number of metablock Z in mipchain
384 UINT_32 numMetaBlkX
= (mip0Width
+ pMetaBlkDim
->w
- 1) / pMetaBlkDim
->w
;
385 UINT_32 numMetaBlkY
= (mip0Height
+ pMetaBlkDim
->h
- 1) / pMetaBlkDim
->h
;
386 UINT_32 numMetaBlkZ
= (mip0Depth
+ pMetaBlkDim
->d
- 1) / pMetaBlkDim
->d
;
387 UINT_32 tailWidth
= pMetaBlkDim
->w
;
388 UINT_32 tailHeight
= pMetaBlkDim
->h
>> 1;
389 UINT_32 tailDepth
= pMetaBlkDim
->d
;
390 BOOL_32 inTail
= FALSE
;
391 AddrMajorMode major
= ADDR_MAJOR_MAX_TYPE
;
393 if (numMipLevels
> 1)
395 if (dataThick
&& (numMetaBlkZ
> numMetaBlkX
) && (numMetaBlkZ
> numMetaBlkY
))
398 major
= ADDR_MAJOR_Z
;
400 else if (numMetaBlkX
>= numMetaBlkY
)
403 major
= ADDR_MAJOR_X
;
408 major
= ADDR_MAJOR_Y
;
411 inTail
= ((mip0Width
<= tailWidth
) &&
412 (mip0Height
<= tailHeight
) &&
413 ((dataThick
== FALSE
) || (mip0Depth
<= tailDepth
)));
421 if (major
== ADDR_MAJOR_Z
)
424 pMipDim
= &numMetaBlkY
;
425 pOrderDim
= &numMetaBlkZ
;
428 else if (major
== ADDR_MAJOR_X
)
431 pMipDim
= &numMetaBlkY
;
432 pOrderDim
= &numMetaBlkX
;
438 pMipDim
= &numMetaBlkX
;
439 pOrderDim
= &numMetaBlkY
;
443 if ((*pMipDim
< 3) && (*pOrderDim
> orderLimit
) && (numMipLevels
> 3))
449 *pMipDim
+= ((*pMipDim
/ 2) + (*pMipDim
& 1));
456 UINT_32 mipWidth
= mip0Width
;
457 UINT_32 mipHeight
= mip0Height
;
458 UINT_32 mipDepth
= mip0Depth
;
459 Dim3d mipCoord
= {0};
461 for (UINT_32 mip
= 0; mip
< numMipLevels
; mip
++)
465 GetMetaMiptailInfo(&pInfo
[mip
], mipCoord
, numMipLevels
- mip
,
471 mipWidth
= PowTwoAlign(mipWidth
, pMetaBlkDim
->w
);
472 mipHeight
= PowTwoAlign(mipHeight
, pMetaBlkDim
->h
);
473 mipDepth
= PowTwoAlign(mipDepth
, pMetaBlkDim
->d
);
475 pInfo
[mip
].inMiptail
= FALSE
;
476 pInfo
[mip
].startX
= mipCoord
.w
;
477 pInfo
[mip
].startY
= mipCoord
.h
;
478 pInfo
[mip
].startZ
= mipCoord
.d
;
479 pInfo
[mip
].width
= mipWidth
;
480 pInfo
[mip
].height
= mipHeight
;
481 pInfo
[mip
].depth
= dataThick
? mipDepth
: 1;
483 if ((mip
>= 3) || (mip
& 1))
488 mipCoord
.w
+= mipWidth
;
491 mipCoord
.h
+= mipHeight
;
494 mipCoord
.d
+= mipDepth
;
505 mipCoord
.h
+= mipHeight
;
508 mipCoord
.w
+= mipWidth
;
511 mipCoord
.h
+= mipHeight
;
518 mipWidth
= Max(mipWidth
>> 1, 1u);
519 mipHeight
= Max(mipHeight
>> 1, 1u);
520 mipDepth
= Max(mipDepth
>> 1, 1u);
522 inTail
= ((mipWidth
<= tailWidth
) &&
523 (mipHeight
<= tailHeight
) &&
524 ((dataThick
== FALSE
) || (mipDepth
<= tailDepth
)));
529 *pNumMetaBlkX
= numMetaBlkX
;
530 *pNumMetaBlkY
= numMetaBlkY
;
531 *pNumMetaBlkZ
= numMetaBlkZ
;
535 ************************************************************************************************************************
536 * Gfx9Lib::HwlComputeDccInfo
539 * Interface function to compute DCC key info
543 ************************************************************************************************************************
545 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeDccInfo(
546 const ADDR2_COMPUTE_DCCINFO_INPUT
* pIn
, ///< [in] input structure
547 ADDR2_COMPUTE_DCCINFO_OUTPUT
* pOut
///< [out] output structure
550 BOOL_32 dataLinear
= IsLinear(pIn
->swizzleMode
);
551 BOOL_32 metaLinear
= pIn
->dccKeyFlags
.linear
;
552 BOOL_32 pipeAligned
= pIn
->dccKeyFlags
.pipeAligned
;
558 else if (metaLinear
== TRUE
)
563 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pipeAligned
, pIn
->swizzleMode
);
567 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
568 ADDR_ASSERT_ALWAYS();
570 pOut
->dccRamBaseAlign
= numPipeTotal
* m_pipeInterleaveBytes
;
571 pOut
->dccRamSize
= PowTwoAlign((pIn
->dataSurfaceSize
/ 256), pOut
->dccRamBaseAlign
);
575 BOOL_32 dataThick
= IsThick(pIn
->resourceType
, pIn
->swizzleMode
);
577 UINT_32 minMetaBlkSize
= dataThick
? 65536 : 4096;
579 UINT_32 numFrags
= Max(pIn
->numFrags
, 1u);
580 UINT_32 numSlices
= Max(pIn
->numSlices
, 1u);
582 minMetaBlkSize
/= numFrags
;
584 UINT_32 numCompressBlkPerMetaBlk
= minMetaBlkSize
;
586 UINT_32 numRbTotal
= pIn
->dccKeyFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
588 if ((numPipeTotal
> 1) || (numRbTotal
> 1))
590 const UINT_32 thinBlkSize
= 1 << (m_settings
.applyAliasFix
? Max(10u, m_pipeInterleaveLog2
) : 10);
592 numCompressBlkPerMetaBlk
=
593 Max(numCompressBlkPerMetaBlk
, m_se
* m_rbPerSe
* (dataThick
? 262144 : thinBlkSize
));
595 if (numCompressBlkPerMetaBlk
> 65536 * pIn
->bpp
)
597 numCompressBlkPerMetaBlk
= 65536 * pIn
->bpp
;
601 Dim3d compressBlkDim
= GetDccCompressBlk(pIn
->resourceType
, pIn
->swizzleMode
, pIn
->bpp
);
602 Dim3d metaBlkDim
= compressBlkDim
;
604 for (UINT_32 index
= 1; index
< numCompressBlkPerMetaBlk
; index
<<= 1)
606 if ((metaBlkDim
.h
< metaBlkDim
.w
) ||
607 ((pIn
->numMipLevels
> 1) && (metaBlkDim
.h
== metaBlkDim
.w
)))
609 if ((dataThick
== FALSE
) || (metaBlkDim
.h
<= metaBlkDim
.d
))
620 if ((dataThick
== FALSE
) || (metaBlkDim
.w
<= metaBlkDim
.d
))
635 GetMetaMipInfo(pIn
->numMipLevels
, &metaBlkDim
, dataThick
, pOut
->pMipInfo
,
636 pIn
->unalignedWidth
, pIn
->unalignedHeight
, numSlices
,
637 &numMetaBlkX
, &numMetaBlkY
, &numMetaBlkZ
);
639 UINT_32 sizeAlign
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
641 if (numFrags
> m_maxCompFrag
)
643 sizeAlign
*= (numFrags
/ m_maxCompFrag
);
646 if (m_settings
.metaBaseAlignFix
)
648 sizeAlign
= Max(sizeAlign
, GetBlockSize(pIn
->swizzleMode
));
651 pOut
->dccRamSize
= numMetaBlkX
* numMetaBlkY
* numMetaBlkZ
*
652 numCompressBlkPerMetaBlk
* numFrags
;
653 pOut
->dccRamSize
= PowTwoAlign(pOut
->dccRamSize
, sizeAlign
);
654 pOut
->dccRamBaseAlign
= Max(numCompressBlkPerMetaBlk
, sizeAlign
);
656 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
657 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
658 pOut
->depth
= numMetaBlkZ
* metaBlkDim
.d
;
660 pOut
->compressBlkWidth
= compressBlkDim
.w
;
661 pOut
->compressBlkHeight
= compressBlkDim
.h
;
662 pOut
->compressBlkDepth
= compressBlkDim
.d
;
664 pOut
->metaBlkWidth
= metaBlkDim
.w
;
665 pOut
->metaBlkHeight
= metaBlkDim
.h
;
666 pOut
->metaBlkDepth
= metaBlkDim
.d
;
668 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
669 pOut
->fastClearSizePerSlice
=
670 pOut
->metaBlkNumPerSlice
* numCompressBlkPerMetaBlk
* Min(numFrags
, m_maxCompFrag
);
677 ************************************************************************************************************************
678 * Gfx9Lib::HwlComputeMaxBaseAlignments
681 * Gets maximum alignments
684 ************************************************************************************************************************
686 UINT_32
Gfx9Lib::HwlComputeMaxBaseAlignments() const
688 return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB
);
692 ************************************************************************************************************************
693 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
696 * Gets maximum alignments for metadata
698 * maximum alignments for metadata
699 ************************************************************************************************************************
701 UINT_32
Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
703 // Max base alignment for Htile
704 const UINT_32 maxNumPipeTotal
= GetPipeNumForMetaAddressing(TRUE
, ADDR_SW_64KB_Z
);
705 const UINT_32 maxNumRbTotal
= m_se
* m_rbPerSe
;
707 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
708 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
709 ADDR_ASSERT((m_settings
.applyAliasFix
== FALSE
) || (m_pipeInterleaveLog2
<= 10u));
710 const UINT_32 maxNumCompressBlkPerMetaBlk
= 1u << (m_seLog2
+ m_rbPerSeLog2
+ 10u);
712 UINT_32 maxBaseAlignHtile
= maxNumPipeTotal
* maxNumRbTotal
* m_pipeInterleaveBytes
;
714 if (maxNumPipeTotal
> 2)
716 maxBaseAlignHtile
*= (maxNumPipeTotal
>> 1);
719 maxBaseAlignHtile
= Max(maxNumCompressBlkPerMetaBlk
<< 2, maxBaseAlignHtile
);
721 if (m_settings
.metaBaseAlignFix
)
723 maxBaseAlignHtile
= Max(maxBaseAlignHtile
, GetBlockSize(ADDR_SW_64KB
));
726 if (m_settings
.htileAlignFix
)
728 maxBaseAlignHtile
*= maxNumPipeTotal
;
731 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
733 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
734 UINT_32 maxBaseAlignDcc3D
= 65536;
736 if ((maxNumPipeTotal
> 1) || (maxNumRbTotal
> 1))
738 maxBaseAlignDcc3D
= Min(m_se
* m_rbPerSe
* 262144, 65536 * 128u);
741 // Max base alignment for Msaa Dcc
742 UINT_32 maxBaseAlignDccMsaa
= maxNumPipeTotal
* maxNumRbTotal
* m_pipeInterleaveBytes
* (8 / m_maxCompFrag
);
744 if (m_settings
.metaBaseAlignFix
)
746 maxBaseAlignDccMsaa
= Max(maxBaseAlignDccMsaa
, GetBlockSize(ADDR_SW_64KB
));
749 return Max(maxBaseAlignHtile
, Max(maxBaseAlignDccMsaa
, maxBaseAlignDcc3D
));
753 ************************************************************************************************************************
754 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
757 * Interface function stub of AddrComputeCmaskAddrFromCoord
761 ************************************************************************************************************************
763 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeCmaskAddrFromCoord(
764 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
765 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
767 ADDR2_COMPUTE_CMASK_INFO_INPUT input
= {0};
768 input
.size
= sizeof(input
);
769 input
.cMaskFlags
= pIn
->cMaskFlags
;
770 input
.colorFlags
= pIn
->colorFlags
;
771 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
772 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
773 input
.numSlices
= Max(pIn
->numSlices
, 1u);
774 input
.swizzleMode
= pIn
->swizzleMode
;
775 input
.resourceType
= pIn
->resourceType
;
777 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output
= {0};
778 output
.size
= sizeof(output
);
780 ADDR_E_RETURNCODE returnCode
= ComputeCmaskInfo(&input
, &output
);
782 if (returnCode
== ADDR_OK
)
784 UINT_32 fmaskBpp
= GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
);
785 UINT_32 fmaskElementBytesLog2
= Log2(fmaskBpp
>> 3);
786 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
787 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
789 MetaEqParams metaEqParams
= {0, fmaskElementBytesLog2
, 0, pIn
->cMaskFlags
,
790 Gfx9DataFmask
, pIn
->swizzleMode
, pIn
->resourceType
,
791 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0};
793 const CoordEq
* pMetaEq
= GetMetaEquation(metaEqParams
);
795 UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
796 UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
797 UINT_32 zb
= pIn
->slice
;
799 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
800 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
801 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
803 UINT_64 address
= pMetaEq
->solve(pIn
->x
, pIn
->y
, pIn
->slice
, 0, blockIndex
);
805 pOut
->addr
= address
>> 1;
806 pOut
->bitPosition
= static_cast<UINT_32
>((address
& 1) << 2);
809 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->cMaskFlags
.pipeAligned
,
812 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
814 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
821 ************************************************************************************************************************
822 * Gfx9Lib::HwlComputeHtileAddrFromCoord
825 * Interface function stub of AddrComputeHtileAddrFromCoord
829 ************************************************************************************************************************
831 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileAddrFromCoord(
832 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
833 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
835 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
837 if (pIn
->numMipLevels
> 1)
839 returnCode
= ADDR_NOTIMPLEMENTED
;
843 ADDR2_COMPUTE_HTILE_INFO_INPUT input
= {0};
844 input
.size
= sizeof(input
);
845 input
.hTileFlags
= pIn
->hTileFlags
;
846 input
.depthFlags
= pIn
->depthflags
;
847 input
.swizzleMode
= pIn
->swizzleMode
;
848 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
849 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
850 input
.numSlices
= Max(pIn
->numSlices
, 1u);
851 input
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
853 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output
= {0};
854 output
.size
= sizeof(output
);
856 returnCode
= ComputeHtileInfo(&input
, &output
);
858 if (returnCode
== ADDR_OK
)
860 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
861 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
862 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
863 UINT_32 numSamplesLog2
= Log2(pIn
->numSamples
);
865 MetaEqParams metaEqParams
= {0, elementBytesLog2
, numSamplesLog2
, pIn
->hTileFlags
,
866 Gfx9DataDepthStencil
, pIn
->swizzleMode
, ADDR_RSRC_TEX_2D
,
867 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0};
869 const CoordEq
* pMetaEq
= GetMetaEquation(metaEqParams
);
871 UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
872 UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
873 UINT_32 zb
= pIn
->slice
;
875 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
876 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
877 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
879 UINT_64 address
= pMetaEq
->solve(pIn
->x
, pIn
->y
, pIn
->slice
, 0, blockIndex
);
881 pOut
->addr
= address
>> 1;
883 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
886 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
888 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
896 ************************************************************************************************************************
897 * Gfx9Lib::HwlComputeHtileCoordFromAddr
900 * Interface function stub of AddrComputeHtileCoordFromAddr
904 ************************************************************************************************************************
906 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileCoordFromAddr(
907 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT
* pIn
, ///< [in] input structure
908 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
* pOut
) ///< [out] output structure
910 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
912 if (pIn
->numMipLevels
> 1)
914 returnCode
= ADDR_NOTIMPLEMENTED
;
918 ADDR2_COMPUTE_HTILE_INFO_INPUT input
= {0};
919 input
.size
= sizeof(input
);
920 input
.hTileFlags
= pIn
->hTileFlags
;
921 input
.swizzleMode
= pIn
->swizzleMode
;
922 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
923 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
924 input
.numSlices
= Max(pIn
->numSlices
, 1u);
925 input
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
927 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output
= {0};
928 output
.size
= sizeof(output
);
930 returnCode
= ComputeHtileInfo(&input
, &output
);
932 if (returnCode
== ADDR_OK
)
934 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
935 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
936 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
937 UINT_32 numSamplesLog2
= Log2(pIn
->numSamples
);
939 MetaEqParams metaEqParams
= {0, elementBytesLog2
, numSamplesLog2
, pIn
->hTileFlags
,
940 Gfx9DataDepthStencil
, pIn
->swizzleMode
, ADDR_RSRC_TEX_2D
,
941 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0};
943 const CoordEq
* pMetaEq
= GetMetaEquation(metaEqParams
);
945 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
948 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
950 UINT_64 nibbleAddress
= (pIn
->addr
^ (pipeXor
<< m_pipeInterleaveLog2
)) << 1;
952 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
953 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
955 UINT_32 x
, y
, z
, s
, m
;
956 pMetaEq
->solveAddr(nibbleAddress
, sliceSizeInBlock
, x
, y
, z
, s
, m
);
958 pOut
->slice
= m
/ sliceSizeInBlock
;
959 pOut
->y
= ((m
% sliceSizeInBlock
) / pitchInBlock
) * output
.metaBlkHeight
+ y
;
960 pOut
->x
= (m
% pitchInBlock
) * output
.metaBlkWidth
+ x
;
968 ************************************************************************************************************************
969 * Gfx9Lib::HwlComputeDccAddrFromCoord
972 * Interface function stub of AddrComputeDccAddrFromCoord
976 ************************************************************************************************************************
978 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeDccAddrFromCoord(
979 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
* pIn
,
980 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT
* pOut
)
982 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
984 if ((pIn
->numMipLevels
> 1) || (pIn
->mipId
> 1) || pIn
->dccKeyFlags
.linear
)
986 returnCode
= ADDR_NOTIMPLEMENTED
;
990 ADDR2_COMPUTE_DCCINFO_INPUT input
= {0};
991 input
.size
= sizeof(input
);
992 input
.dccKeyFlags
= pIn
->dccKeyFlags
;
993 input
.colorFlags
= pIn
->colorFlags
;
994 input
.swizzleMode
= pIn
->swizzleMode
;
995 input
.resourceType
= pIn
->resourceType
;
996 input
.bpp
= pIn
->bpp
;
997 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
998 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
999 input
.numSlices
= Max(pIn
->numSlices
, 1u);
1000 input
.numFrags
= Max(pIn
->numFrags
, 1u);
1001 input
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
1003 ADDR2_COMPUTE_DCCINFO_OUTPUT output
= {0};
1004 output
.size
= sizeof(output
);
1006 returnCode
= ComputeDccInfo(&input
, &output
);
1008 if (returnCode
== ADDR_OK
)
1010 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
1011 UINT_32 numSamplesLog2
= Log2(pIn
->numFrags
);
1012 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
1013 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
1014 UINT_32 metaBlkDepthLog2
= Log2(output
.metaBlkDepth
);
1015 UINT_32 compBlkWidthLog2
= Log2(output
.compressBlkWidth
);
1016 UINT_32 compBlkHeightLog2
= Log2(output
.compressBlkHeight
);
1017 UINT_32 compBlkDepthLog2
= Log2(output
.compressBlkDepth
);
1019 MetaEqParams metaEqParams
= {pIn
->mipId
, elementBytesLog2
, numSamplesLog2
, pIn
->dccKeyFlags
,
1020 Gfx9DataColor
, pIn
->swizzleMode
, pIn
->resourceType
,
1021 metaBlkWidthLog2
, metaBlkHeightLog2
, metaBlkDepthLog2
,
1022 compBlkWidthLog2
, compBlkHeightLog2
, compBlkDepthLog2
};
1024 const CoordEq
* pMetaEq
= GetMetaEquation(metaEqParams
);
1026 UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
1027 UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
1028 UINT_32 zb
= pIn
->slice
/ output
.metaBlkDepth
;
1030 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
1031 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
1032 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
1034 UINT_64 address
= pMetaEq
->solve(pIn
->x
, pIn
->y
, pIn
->slice
, pIn
->sample
, blockIndex
);
1036 pOut
->addr
= address
>> 1;
1038 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->dccKeyFlags
.pipeAligned
,
1041 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
1043 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
1051 ************************************************************************************************************************
1052 * Gfx9Lib::HwlInitGlobalParams
1055 * Initializes global parameters
1058 * TRUE if all settings are valid
1060 ************************************************************************************************************************
1062 BOOL_32
Gfx9Lib::HwlInitGlobalParams(
1063 const ADDR_CREATE_INPUT
* pCreateIn
) ///< [in] create input
1065 BOOL_32 valid
= TRUE
;
1067 if (m_settings
.isArcticIsland
)
1069 GB_ADDR_CONFIG gbAddrConfig
;
1071 gbAddrConfig
.u32All
= pCreateIn
->regValue
.gbAddrConfig
;
1073 // These values are copied from CModel code
1074 switch (gbAddrConfig
.bits
.NUM_PIPES
)
1076 case ADDR_CONFIG_1_PIPE
:
1080 case ADDR_CONFIG_2_PIPE
:
1084 case ADDR_CONFIG_4_PIPE
:
1088 case ADDR_CONFIG_8_PIPE
:
1092 case ADDR_CONFIG_16_PIPE
:
1096 case ADDR_CONFIG_32_PIPE
:
1101 ADDR_ASSERT_ALWAYS();
1105 switch (gbAddrConfig
.bits
.PIPE_INTERLEAVE_SIZE
)
1107 case ADDR_CONFIG_PIPE_INTERLEAVE_256B
:
1108 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_256B
;
1109 m_pipeInterleaveLog2
= 8;
1111 case ADDR_CONFIG_PIPE_INTERLEAVE_512B
:
1112 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_512B
;
1113 m_pipeInterleaveLog2
= 9;
1115 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB
:
1116 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_1KB
;
1117 m_pipeInterleaveLog2
= 10;
1119 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB
:
1120 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_2KB
;
1121 m_pipeInterleaveLog2
= 11;
1124 ADDR_ASSERT_ALWAYS();
1128 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1129 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1130 ADDR_ASSERT(m_pipeInterleaveBytes
== ADDR_PIPEINTERLEAVE_256B
);
1132 switch (gbAddrConfig
.bits
.NUM_BANKS
)
1134 case ADDR_CONFIG_1_BANK
:
1138 case ADDR_CONFIG_2_BANK
:
1142 case ADDR_CONFIG_4_BANK
:
1146 case ADDR_CONFIG_8_BANK
:
1150 case ADDR_CONFIG_16_BANK
:
1155 ADDR_ASSERT_ALWAYS();
1159 switch (gbAddrConfig
.bits
.NUM_SHADER_ENGINES
)
1161 case ADDR_CONFIG_1_SHADER_ENGINE
:
1165 case ADDR_CONFIG_2_SHADER_ENGINE
:
1169 case ADDR_CONFIG_4_SHADER_ENGINE
:
1173 case ADDR_CONFIG_8_SHADER_ENGINE
:
1178 ADDR_ASSERT_ALWAYS();
1182 switch (gbAddrConfig
.bits
.NUM_RB_PER_SE
)
1184 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE
:
1188 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE
:
1192 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE
:
1197 ADDR_ASSERT_ALWAYS();
1201 switch (gbAddrConfig
.bits
.MAX_COMPRESSED_FRAGS
)
1203 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS
:
1205 m_maxCompFragLog2
= 0;
1207 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS
:
1209 m_maxCompFragLog2
= 1;
1211 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS
:
1213 m_maxCompFragLog2
= 2;
1215 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS
:
1217 m_maxCompFragLog2
= 3;
1220 ADDR_ASSERT_ALWAYS();
1224 m_blockVarSizeLog2
= pCreateIn
->regValue
.blockVarSizeLog2
;
1225 ADDR_ASSERT((m_blockVarSizeLog2
== 0) ||
1226 ((m_blockVarSizeLog2
>= 17u) && (m_blockVarSizeLog2
<= 20u)));
1227 m_blockVarSizeLog2
= Min(Max(17u, m_blockVarSizeLog2
), 20u);
1229 if ((m_rbPerSeLog2
== 1) &&
1230 (((m_pipesLog2
== 1) && ((m_seLog2
== 2) || (m_seLog2
== 3))) ||
1231 ((m_pipesLog2
== 2) && ((m_seLog2
== 1) || (m_seLog2
== 2)))))
1233 ADDR_ASSERT(m_settings
.isVega10
== FALSE
);
1234 ADDR_ASSERT(m_settings
.isRaven
== FALSE
);
1235 ADDR_ASSERT(m_settings
.isVega20
== FALSE
);
1237 if (m_settings
.isVega12
)
1239 m_settings
.htileCacheRbConflict
= 1;
1246 ADDR_NOT_IMPLEMENTED();
1251 InitEquationTable();
1258 ************************************************************************************************************************
1259 * Gfx9Lib::HwlConvertChipFamily
1262 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1265 ************************************************************************************************************************
1267 ChipFamily
Gfx9Lib::HwlConvertChipFamily(
1268 UINT_32 uChipFamily
, ///< [in] chip family defined in atiih.h
1269 UINT_32 uChipRevision
) ///< [in] chip revision defined in "asic_family"_id.h
1271 ChipFamily family
= ADDR_CHIP_FAMILY_AI
;
1273 switch (uChipFamily
)
1276 m_settings
.isArcticIsland
= 1;
1277 m_settings
.isVega10
= ASICREV_IS_VEGA10_P(uChipRevision
);
1278 m_settings
.isVega12
= ASICREV_IS_VEGA12_P(uChipRevision
);
1279 m_settings
.isVega20
= ASICREV_IS_VEGA20_P(uChipRevision
);
1280 m_settings
.isDce12
= 1;
1282 if (m_settings
.isVega10
== 0)
1284 m_settings
.htileAlignFix
= 1;
1285 m_settings
.applyAliasFix
= 1;
1288 m_settings
.metaBaseAlignFix
= 1;
1290 m_settings
.depthPipeXorDisable
= 1;
1293 m_settings
.isArcticIsland
= 1;
1294 m_settings
.isRaven
= ASICREV_IS_RAVEN(uChipRevision
) || ASICREV_IS_RAVEN2(uChipRevision
);
1296 if (m_settings
.isRaven
)
1298 m_settings
.isDcn1
= 1;
1301 m_settings
.metaBaseAlignFix
= 1;
1303 if (ASICREV_IS_RAVEN(uChipRevision
))
1305 m_settings
.depthPipeXorDisable
= 1;
1310 ADDR_ASSERT(!"This should be a Fusion");
1318 ************************************************************************************************************************
1319 * Gfx9Lib::InitRbEquation
1325 ************************************************************************************************************************
1327 VOID
Gfx9Lib::GetRbEquation(
1328 CoordEq
* pRbEq
, ///< [out] rb equation
1329 UINT_32 numRbPerSeLog2
, ///< [in] number of rb per shader engine
1330 UINT_32 numSeLog2
) ///< [in] number of shader engine
1333 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1334 UINT_32 rbRegion
= (numRbPerSeLog2
== 0) ? 5 : 4;
1335 Coordinate
cx('x', rbRegion
);
1336 Coordinate
cy('y', rbRegion
);
1339 UINT_32 numRbTotalLog2
= numRbPerSeLog2
+ numSeLog2
;
1341 // Clear the rb equation
1343 pRbEq
->resize(numRbTotalLog2
);
1345 if ((numSeLog2
> 0) && (numRbPerSeLog2
== 1))
1347 // Special case when more than 1 SE, and 2 RB per SE
1348 (*pRbEq
)[0].add(cx
);
1349 (*pRbEq
)[0].add(cy
);
1353 if (m_settings
.applyAliasFix
== false)
1355 (*pRbEq
)[0].add(cy
);
1358 (*pRbEq
)[0].add(cy
);
1362 UINT_32 numBits
= 2 * (numRbTotalLog2
- start
);
1364 for (UINT_32 i
= 0; i
< numBits
; i
++)
1367 start
+ (((start
+ i
) >= numRbTotalLog2
) ? (2 * (numRbTotalLog2
- start
) - i
- 1) : i
);
1371 (*pRbEq
)[idx
].add(cx
);
1376 (*pRbEq
)[idx
].add(cy
);
1383 ************************************************************************************************************************
1384 * Gfx9Lib::GetDataEquation
1387 * Get data equation for fmask and Z
1390 ************************************************************************************************************************
1392 VOID
Gfx9Lib::GetDataEquation(
1393 CoordEq
* pDataEq
, ///< [out] data surface equation
1394 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1395 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1396 AddrResourceType resourceType
, ///< [in] data surface resource type
1397 UINT_32 elementBytesLog2
, ///< [in] data surface element bytes
1398 UINT_32 numSamplesLog2
) ///< [in] data surface sample count
1401 Coordinate
cx('x', 0);
1402 Coordinate
cy('y', 0);
1403 Coordinate
cz('z', 0);
1404 Coordinate
cs('s', 0);
1406 // Clear the equation
1408 pDataEq
->resize(27);
1410 if (dataSurfaceType
== Gfx9DataColor
)
1412 if (IsLinear(swizzleMode
))
1414 Coordinate
cm('m', 0);
1416 pDataEq
->resize(49);
1418 for (UINT_32 i
= 0; i
< 49; i
++)
1420 (*pDataEq
)[i
].add(cm
);
1424 else if (IsThick(resourceType
, swizzleMode
))
1426 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1428 if (IsStandardSwizzle(resourceType
, swizzleMode
))
1430 // Standard 3d swizzle
1431 // Fill in bottom x bits
1432 for (i
= elementBytesLog2
; i
< 4; i
++)
1434 (*pDataEq
)[i
].add(cx
);
1437 // Fill in 2 bits of y and then z
1438 for (i
= 4; i
< 6; i
++)
1440 (*pDataEq
)[i
].add(cy
);
1443 for (i
= 6; i
< 8; i
++)
1445 (*pDataEq
)[i
].add(cz
);
1448 if (elementBytesLog2
< 2)
1450 // fill in z & y bit
1451 (*pDataEq
)[8].add(cz
);
1452 (*pDataEq
)[9].add(cy
);
1456 else if (elementBytesLog2
== 2)
1458 // fill in y and x bit
1459 (*pDataEq
)[8].add(cy
);
1460 (*pDataEq
)[9].add(cx
);
1467 (*pDataEq
)[8].add(cx
);
1469 (*pDataEq
)[9].add(cx
);
1476 UINT_32 m2dEnd
= (elementBytesLog2
==0) ? 3 : ((elementBytesLog2
< 4) ? 4 : 5);
1477 UINT_32 numZs
= (elementBytesLog2
== 0 || elementBytesLog2
== 4) ?
1478 2 : ((elementBytesLog2
== 1) ? 3 : 1);
1479 pDataEq
->mort2d(cx
, cy
, elementBytesLog2
, m2dEnd
);
1480 for (i
= m2dEnd
+ 1; i
<= m2dEnd
+ numZs
; i
++)
1482 (*pDataEq
)[i
].add(cz
);
1485 if ((elementBytesLog2
== 0) || (elementBytesLog2
== 3))
1488 (*pDataEq
)[6].add(cx
);
1489 (*pDataEq
)[7].add(cz
);
1493 else if (elementBytesLog2
== 2)
1496 (*pDataEq
)[6].add(cy
);
1497 (*pDataEq
)[7].add(cz
);
1502 (*pDataEq
)[8].add(cy
);
1503 (*pDataEq
)[9].add(cx
);
1507 // Fill in bit 10 and up
1508 pDataEq
->mort3d( cz
, cy
, cx
, 10 );
1510 else if (IsThin(resourceType
, swizzleMode
))
1512 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swizzleMode
);
1514 UINT_32 microYBits
= (8 - elementBytesLog2
) / 2;
1515 UINT_32 tileSplitStart
= blockSizeLog2
- numSamplesLog2
;
1517 // Fill in bottom x bits
1518 for (i
= elementBytesLog2
; i
< 4; i
++)
1520 (*pDataEq
)[i
].add(cx
);
1523 // Fill in bottom y bits
1524 for (i
= 4; i
< 4 + microYBits
; i
++)
1526 (*pDataEq
)[i
].add(cy
);
1529 // Fill in last of the micro_x bits
1530 for (i
= 4 + microYBits
; i
< 8; i
++)
1532 (*pDataEq
)[i
].add(cx
);
1535 // Fill in x/y bits below sample split
1536 pDataEq
->mort2d(cy
, cx
, 8, tileSplitStart
- 1);
1537 // Fill in sample bits
1538 for (i
= 0; i
< numSamplesLog2
; i
++)
1541 (*pDataEq
)[tileSplitStart
+ i
].add(cs
);
1543 // Fill in x/y bits above sample split
1544 if ((numSamplesLog2
& 1) ^ (blockSizeLog2
& 1))
1546 pDataEq
->mort2d(cx
, cy
, blockSizeLog2
);
1550 pDataEq
->mort2d(cy
, cx
, blockSizeLog2
);
1555 ADDR_ASSERT_ALWAYS();
1561 UINT_32 sampleStart
= elementBytesLog2
;
1562 UINT_32 pixelStart
= elementBytesLog2
+ numSamplesLog2
;
1563 UINT_32 ymajStart
= 6 + numSamplesLog2
;
1565 for (UINT_32 s
= 0; s
< numSamplesLog2
; s
++)
1568 (*pDataEq
)[sampleStart
+ s
].add(cs
);
1571 // Put in the x-major order pixel bits
1572 pDataEq
->mort2d(cx
, cy
, pixelStart
, ymajStart
- 1);
1573 // Put in the y-major order pixel bits
1574 pDataEq
->mort2d(cy
, cx
, ymajStart
);
1579 ************************************************************************************************************************
1580 * Gfx9Lib::GetPipeEquation
1586 ************************************************************************************************************************
1588 VOID
Gfx9Lib::GetPipeEquation(
1589 CoordEq
* pPipeEq
, ///< [out] pipe equation
1590 CoordEq
* pDataEq
, ///< [in] data equation
1591 UINT_32 pipeInterleaveLog2
, ///< [in] pipe interleave
1592 UINT_32 numPipeLog2
, ///< [in] number of pipes
1593 UINT_32 numSamplesLog2
, ///< [in] data surface sample count
1594 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1595 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1596 AddrResourceType resourceType
///< [in] data surface resource type
1599 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swizzleMode
);
1602 pDataEq
->copy(dataEq
);
1604 if (dataSurfaceType
== Gfx9DataColor
)
1606 INT_32 shift
= static_cast<INT_32
>(numSamplesLog2
);
1607 dataEq
.shift(-shift
, blockSizeLog2
- numSamplesLog2
);
1610 dataEq
.copy(*pPipeEq
, pipeInterleaveLog2
, numPipeLog2
);
1612 // This section should only apply to z/stencil, maybe fmask
1613 // If the pipe bit is below the comp block size,
1614 // then keep moving up the address until we find a bit that is above
1615 UINT_32 pipeStart
= 0;
1617 if (dataSurfaceType
!= Gfx9DataColor
)
1619 Coordinate
tileMin('x', 3);
1621 while (dataEq
[pipeInterleaveLog2
+ pipeStart
][0] < tileMin
)
1626 // if pipe is 0, then the first pipe bit is above the comp block size,
1627 // so we don't need to do anything
1628 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1629 // we will get the same pipe equation
1632 for (UINT_32 i
= 0; i
< numPipeLog2
; i
++)
1634 // Copy the jth bit above pipe interleave to the current pipe equation bit
1635 dataEq
[pipeInterleaveLog2
+ pipeStart
+ i
].copyto((*pPipeEq
)[i
]);
1640 if (IsPrt(swizzleMode
))
1642 // Clear out bits above the block size if prt's are enabled
1643 dataEq
.resize(blockSizeLog2
);
1647 if (IsXor(swizzleMode
))
1651 if (IsThick(resourceType
, swizzleMode
))
1655 dataEq
.copy(xorMask2
, pipeInterleaveLog2
+ numPipeLog2
, 2 * numPipeLog2
);
1657 xorMask
.resize(numPipeLog2
);
1659 for (UINT_32 pipeIdx
= 0; pipeIdx
< numPipeLog2
; pipeIdx
++)
1661 xorMask
[pipeIdx
].add(xorMask2
[2 * pipeIdx
]);
1662 xorMask
[pipeIdx
].add(xorMask2
[2 * pipeIdx
+ 1]);
1667 // Xor in the bits above the pipe+gpu bits
1668 dataEq
.copy(xorMask
, pipeInterleaveLog2
+ pipeStart
+ numPipeLog2
, numPipeLog2
);
1670 if ((numSamplesLog2
== 0) && (IsPrt(swizzleMode
) == FALSE
))
1674 // if 1xaa and not prt, then xor in the z bits
1676 xorMask2
.resize(numPipeLog2
);
1677 for (UINT_32 pipeIdx
= 0; pipeIdx
< numPipeLog2
; pipeIdx
++)
1679 co
.set('z', numPipeLog2
- 1 - pipeIdx
);
1680 xorMask2
[pipeIdx
].add(co
);
1683 pPipeEq
->xorin(xorMask2
);
1688 pPipeEq
->xorin(xorMask
);
1692 ************************************************************************************************************************
1693 * Gfx9Lib::GetMetaEquation
1696 * Get meta equation for cmask/htile/DCC
1698 * Pointer to a calculated meta equation
1699 ************************************************************************************************************************
1701 const CoordEq
* Gfx9Lib::GetMetaEquation(
1702 const MetaEqParams
& metaEqParams
)
1704 UINT_32 cachedMetaEqIndex
;
1706 for (cachedMetaEqIndex
= 0; cachedMetaEqIndex
< MaxCachedMetaEq
; cachedMetaEqIndex
++)
1708 if (memcmp(&metaEqParams
,
1709 &m_cachedMetaEqKey
[cachedMetaEqIndex
],
1710 static_cast<UINT_32
>(sizeof(metaEqParams
))) == 0)
1716 CoordEq
* pMetaEq
= NULL
;
1718 if (cachedMetaEqIndex
< MaxCachedMetaEq
)
1720 pMetaEq
= &m_cachedMetaEq
[cachedMetaEqIndex
];
1724 m_cachedMetaEqKey
[m_metaEqOverrideIndex
] = metaEqParams
;
1726 pMetaEq
= &m_cachedMetaEq
[m_metaEqOverrideIndex
++];
1728 m_metaEqOverrideIndex
%= MaxCachedMetaEq
;
1730 GenMetaEquation(pMetaEq
,
1731 metaEqParams
.maxMip
,
1732 metaEqParams
.elementBytesLog2
,
1733 metaEqParams
.numSamplesLog2
,
1734 metaEqParams
.metaFlag
,
1735 metaEqParams
.dataSurfaceType
,
1736 metaEqParams
.swizzleMode
,
1737 metaEqParams
.resourceType
,
1738 metaEqParams
.metaBlkWidthLog2
,
1739 metaEqParams
.metaBlkHeightLog2
,
1740 metaEqParams
.metaBlkDepthLog2
,
1741 metaEqParams
.compBlkWidthLog2
,
1742 metaEqParams
.compBlkHeightLog2
,
1743 metaEqParams
.compBlkDepthLog2
);
1750 ************************************************************************************************************************
1751 * Gfx9Lib::GenMetaEquation
1754 * Get meta equation for cmask/htile/DCC
1757 ************************************************************************************************************************
1759 VOID
Gfx9Lib::GenMetaEquation(
1760 CoordEq
* pMetaEq
, ///< [out] meta equation
1761 UINT_32 maxMip
, ///< [in] max mip Id
1762 UINT_32 elementBytesLog2
, ///< [in] data surface element bytes
1763 UINT_32 numSamplesLog2
, ///< [in] data surface sample count
1764 ADDR2_META_FLAGS metaFlag
, ///< [in] meta falg
1765 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1766 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1767 AddrResourceType resourceType
, ///< [in] data surface resource type
1768 UINT_32 metaBlkWidthLog2
, ///< [in] meta block width
1769 UINT_32 metaBlkHeightLog2
, ///< [in] meta block height
1770 UINT_32 metaBlkDepthLog2
, ///< [in] meta block depth
1771 UINT_32 compBlkWidthLog2
, ///< [in] compress block width
1772 UINT_32 compBlkHeightLog2
, ///< [in] compress block height
1773 UINT_32 compBlkDepthLog2
) ///< [in] compress block depth
1776 UINT_32 numPipeTotalLog2
= GetPipeLog2ForMetaAddressing(metaFlag
.pipeAligned
, swizzleMode
);
1777 UINT_32 pipeInterleaveLog2
= m_pipeInterleaveLog2
;
1779 // Get the correct data address and rb equation
1781 GetDataEquation(&dataEq
, dataSurfaceType
, swizzleMode
, resourceType
,
1782 elementBytesLog2
, numSamplesLog2
);
1784 // Get pipe and rb equations
1785 CoordEq pipeEquation
;
1786 GetPipeEquation(&pipeEquation
, &dataEq
, pipeInterleaveLog2
, numPipeTotalLog2
,
1787 numSamplesLog2
, dataSurfaceType
, swizzleMode
, resourceType
);
1788 numPipeTotalLog2
= pipeEquation
.getsize();
1790 if (metaFlag
.linear
)
1792 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1793 ADDR_ASSERT_ALWAYS();
1795 ADDR_ASSERT(dataSurfaceType
== Gfx9DataColor
);
1797 dataEq
.copy(*pMetaEq
);
1799 if (IsLinear(swizzleMode
))
1801 if (metaFlag
.pipeAligned
)
1803 // Remove the pipe bits
1804 INT_32 shift
= static_cast<INT_32
>(numPipeTotalLog2
);
1805 pMetaEq
->shift(-shift
, pipeInterleaveLog2
);
1807 // Divide by comp block size, which for linear (which is always color) is 256 B
1810 if (metaFlag
.pipeAligned
)
1812 // Put pipe bits back in
1813 pMetaEq
->shift(numPipeTotalLog2
, pipeInterleaveLog2
);
1815 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1817 pipeEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+ i
]);
1826 UINT_32 maxCompFragLog2
= static_cast<INT_32
>(m_maxCompFragLog2
);
1827 UINT_32 compFragLog2
=
1828 ((dataSurfaceType
== Gfx9DataColor
) && (numSamplesLog2
> maxCompFragLog2
)) ?
1829 maxCompFragLog2
: numSamplesLog2
;
1831 UINT_32 uncompFragLog2
= numSamplesLog2
- compFragLog2
;
1833 // Make sure the metaaddr is cleared
1835 pMetaEq
->resize(27);
1837 if (IsThick(resourceType
, swizzleMode
))
1839 Coordinate
cx('x', 0);
1840 Coordinate
cy('y', 0);
1841 Coordinate
cz('z', 0);
1845 pMetaEq
->mort3d(cy
, cx
, cz
);
1849 pMetaEq
->mort3d(cx
, cy
, cz
);
1854 Coordinate
cx('x', 0);
1855 Coordinate
cy('y', 0);
1860 pMetaEq
->mort2d(cy
, cx
, compFragLog2
);
1864 pMetaEq
->mort2d(cx
, cy
, compFragLog2
);
1867 //------------------------------------------------------------------------------------------------------------------------
1868 // Put the compressible fragments at the lsb
1869 // the uncompressible frags will be at the msb of the micro address
1870 //------------------------------------------------------------------------------------------------------------------------
1871 for (UINT_32 s
= 0; s
< compFragLog2
; s
++)
1874 (*pMetaEq
)[s
].add(cs
);
1878 // Keep a copy of the pipe equations
1879 CoordEq origPipeEquation
;
1880 pipeEquation
.copy(origPipeEquation
);
1883 // filter out everything under the compressed block size
1884 co
.set('x', compBlkWidthLog2
);
1885 pMetaEq
->Filter('<', co
, 0, 'x');
1886 co
.set('y', compBlkHeightLog2
);
1887 pMetaEq
->Filter('<', co
, 0, 'y');
1888 co
.set('z', compBlkDepthLog2
);
1889 pMetaEq
->Filter('<', co
, 0, 'z');
1891 // For non-color, filter out sample bits
1892 if (dataSurfaceType
!= Gfx9DataColor
)
1895 pMetaEq
->Filter('<', co
, 0, 's');
1898 // filter out everything above the metablock size
1899 co
.set('x', metaBlkWidthLog2
- 1);
1900 pMetaEq
->Filter('>', co
, 0, 'x');
1901 co
.set('y', metaBlkHeightLog2
- 1);
1902 pMetaEq
->Filter('>', co
, 0, 'y');
1903 co
.set('z', metaBlkDepthLog2
- 1);
1904 pMetaEq
->Filter('>', co
, 0, 'z');
1906 // filter out everything above the metablock size for the channel bits
1907 co
.set('x', metaBlkWidthLog2
- 1);
1908 pipeEquation
.Filter('>', co
, 0, 'x');
1909 co
.set('y', metaBlkHeightLog2
- 1);
1910 pipeEquation
.Filter('>', co
, 0, 'y');
1911 co
.set('z', metaBlkDepthLog2
- 1);
1912 pipeEquation
.Filter('>', co
, 0, 'z');
1914 // Make sure we still have the same number of channel bits
1915 if (pipeEquation
.getsize() != numPipeTotalLog2
)
1917 ADDR_ASSERT_ALWAYS();
1920 // Loop through all channel and rb bits,
1921 // and make sure these components exist in the metadata address
1922 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1924 for (UINT_32 j
= pipeEquation
[i
].getsize(); j
> 0; j
--)
1926 if (pMetaEq
->Exists(pipeEquation
[i
][j
- 1]) == FALSE
)
1928 ADDR_ASSERT_ALWAYS();
1933 const UINT_32 numSeLog2
= metaFlag
.rbAligned
? m_seLog2
: 0;
1934 const UINT_32 numRbPeSeLog2
= metaFlag
.rbAligned
? m_rbPerSeLog2
: 0;
1935 const UINT_32 numRbTotalLog2
= numRbPeSeLog2
+ numSeLog2
;
1936 CoordEq origRbEquation
;
1938 GetRbEquation(&origRbEquation
, numRbPeSeLog2
, numSeLog2
);
1940 CoordEq rbEquation
= origRbEquation
;
1942 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
1944 for (UINT_32 j
= rbEquation
[i
].getsize(); j
> 0; j
--)
1946 if (pMetaEq
->Exists(rbEquation
[i
][j
- 1]) == FALSE
)
1948 ADDR_ASSERT_ALWAYS();
1953 if (m_settings
.applyAliasFix
)
1958 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1959 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
1961 for (UINT_32 j
= 0; j
< numPipeTotalLog2
; j
++)
1963 BOOL_32 isRbEquationInPipeEquation
= FALSE
;
1965 if (m_settings
.applyAliasFix
)
1967 CoordTerm filteredPipeEq
;
1968 filteredPipeEq
= pipeEquation
[j
];
1970 filteredPipeEq
.Filter('>', co
, 0, 'z');
1972 isRbEquationInPipeEquation
= (rbEquation
[i
] == filteredPipeEq
);
1976 isRbEquationInPipeEquation
= (rbEquation
[i
] == pipeEquation
[j
]);
1979 if (isRbEquationInPipeEquation
)
1981 rbEquation
[i
].Clear();
1986 bool rbAppendedWithPipeBits
[1 << (MaxSeLog2
+ MaxRbPerSeLog2
)] = {};
1988 // Loop through each bit of the channel, get the smallest coordinate,
1989 // and remove it from the metaaddr, and rb_equation
1990 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1992 pipeEquation
[i
].getsmallest(co
);
1994 UINT_32 old_size
= pMetaEq
->getsize();
1995 pMetaEq
->Filter('=', co
);
1996 UINT_32 new_size
= pMetaEq
->getsize();
1997 if (new_size
!= old_size
-1)
1999 ADDR_ASSERT_ALWAYS();
2001 pipeEquation
.remove(co
);
2002 for (UINT_32 j
= 0; j
< numRbTotalLog2
; j
++)
2004 if (rbEquation
[j
].remove(co
))
2006 // if we actually removed something from this bit, then add the remaining
2007 // channel bits, as these can be removed for this bit
2008 for (UINT_32 k
= 0; k
< pipeEquation
[i
].getsize(); k
++)
2010 if (pipeEquation
[i
][k
] != co
)
2012 rbEquation
[j
].add(pipeEquation
[i
][k
]);
2013 rbAppendedWithPipeBits
[j
] = true;
2020 // Loop through the rb bits and see what remain;
2021 // filter out the smallest coordinate if it remains
2022 UINT_32 rbBitsLeft
= 0;
2023 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
2025 BOOL_32 isRbEqAppended
= FALSE
;
2027 if (m_settings
.applyAliasFix
)
2029 isRbEqAppended
= (rbEquation
[i
].getsize() > (rbAppendedWithPipeBits
[i
] ? 1 : 0));
2033 isRbEqAppended
= (rbEquation
[i
].getsize() > 0);
2039 rbEquation
[i
].getsmallest(co
);
2040 UINT_32 old_size
= pMetaEq
->getsize();
2041 pMetaEq
->Filter('=', co
);
2042 UINT_32 new_size
= pMetaEq
->getsize();
2043 if (new_size
!= old_size
- 1)
2047 for (UINT_32 j
= i
+ 1; j
< numRbTotalLog2
; j
++)
2049 if (rbEquation
[j
].remove(co
))
2051 // if we actually removed something from this bit, then add the remaining
2052 // rb bits, as these can be removed for this bit
2053 for (UINT_32 k
= 0; k
< rbEquation
[i
].getsize(); k
++)
2055 if (rbEquation
[i
][k
] != co
)
2057 rbEquation
[j
].add(rbEquation
[i
][k
]);
2058 rbAppendedWithPipeBits
[j
] |= rbAppendedWithPipeBits
[i
];
2066 // capture the size of the metaaddr
2067 UINT_32 metaSize
= pMetaEq
->getsize();
2068 // resize to 49 bits...make this a nibble address
2069 pMetaEq
->resize(49);
2070 // Concatenate the macro address above the current address
2071 for (UINT_32 i
= metaSize
, j
= 0; i
< 49; i
++, j
++)
2074 (*pMetaEq
)[i
].add(co
);
2077 // Multiply by meta element size (in nibbles)
2078 if (dataSurfaceType
== Gfx9DataColor
)
2082 else if (dataSurfaceType
== Gfx9DataDepthStencil
)
2087 //------------------------------------------------------------------------------------------
2088 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2089 // Shift up from pipe interleave number of channel
2090 // and rb bits left, and uncompressed fragments
2091 //------------------------------------------------------------------------------------------
2093 pMetaEq
->shift(numPipeTotalLog2
+ rbBitsLeft
+ uncompFragLog2
, pipeInterleaveLog2
+ 1);
2095 // Put in the channel bits
2096 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
2098 origPipeEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+1 + i
]);
2101 // Put in remaining rb bits
2102 for (UINT_32 i
= 0, j
= 0; j
< rbBitsLeft
; i
= (i
+ 1) % numRbTotalLog2
)
2104 BOOL_32 isRbEqAppended
= FALSE
;
2106 if (m_settings
.applyAliasFix
)
2108 isRbEqAppended
= (rbEquation
[i
].getsize() > (rbAppendedWithPipeBits
[i
] ? 1 : 0));
2112 isRbEqAppended
= (rbEquation
[i
].getsize() > 0);
2117 origRbEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+ 1 + numPipeTotalLog2
+ j
]);
2118 // Mark any rb bit we add in to the rb mask
2123 //------------------------------------------------------------------------------------------
2124 // Put in the uncompressed fragment bits
2125 //------------------------------------------------------------------------------------------
2126 for (UINT_32 i
= 0; i
< uncompFragLog2
; i
++)
2128 co
.set('s', compFragLog2
+ i
);
2129 (*pMetaEq
)[pipeInterleaveLog2
+ 1 + numPipeTotalLog2
+ rbBitsLeft
+ i
].add(co
);
2135 ************************************************************************************************************************
2136 * Gfx9Lib::IsEquationSupported
2139 * Check if equation is supported for given swizzle mode and resource type.
2143 ************************************************************************************************************************
2145 BOOL_32
Gfx9Lib::IsEquationSupported(
2146 AddrResourceType rsrcType
,
2147 AddrSwizzleMode swMode
,
2148 UINT_32 elementBytesLog2
) const
2150 BOOL_32 supported
= (elementBytesLog2
< MaxElementBytesLog2
) &&
2151 (IsLinear(swMode
) == FALSE
) &&
2152 (((IsTex2d(rsrcType
) == TRUE
) &&
2153 ((elementBytesLog2
< 4) ||
2154 ((IsRotateSwizzle(swMode
) == FALSE
) &&
2155 (IsZOrderSwizzle(swMode
) == FALSE
)))) ||
2156 ((IsTex3d(rsrcType
) == TRUE
) &&
2157 (IsRotateSwizzle(swMode
) == FALSE
) &&
2158 (IsBlock256b(swMode
) == FALSE
)));
2164 ************************************************************************************************************************
2165 * Gfx9Lib::InitEquationTable
2168 * Initialize Equation table.
2172 ************************************************************************************************************************
2174 VOID
Gfx9Lib::InitEquationTable()
2176 memset(m_equationTable
, 0, sizeof(m_equationTable
));
2178 // Loop all possible resource type (2D/3D)
2179 for (UINT_32 rsrcTypeIdx
= 0; rsrcTypeIdx
< MaxRsrcType
; rsrcTypeIdx
++)
2181 AddrResourceType rsrcType
= static_cast<AddrResourceType
>(rsrcTypeIdx
+ ADDR_RSRC_TEX_2D
);
2183 // Loop all possible swizzle mode
2184 for (UINT_32 swModeIdx
= 0; swModeIdx
< MaxSwMode
; swModeIdx
++)
2186 AddrSwizzleMode swMode
= static_cast<AddrSwizzleMode
>(swModeIdx
);
2188 // Loop all possible bpp
2189 for (UINT_32 bppIdx
= 0; bppIdx
< MaxElementBytesLog2
; bppIdx
++)
2191 UINT_32 equationIndex
= ADDR_INVALID_EQUATION_INDEX
;
2193 // Check if the input is supported
2194 if (IsEquationSupported(rsrcType
, swMode
, bppIdx
))
2196 ADDR_EQUATION equation
;
2197 ADDR_E_RETURNCODE retCode
;
2199 memset(&equation
, 0, sizeof(ADDR_EQUATION
));
2201 // Generate the equation
2202 if (IsBlock256b(swMode
) && IsTex2d(rsrcType
))
2204 retCode
= ComputeBlock256Equation(rsrcType
, swMode
, bppIdx
, &equation
);
2206 else if (IsThin(rsrcType
, swMode
))
2208 retCode
= ComputeThinEquation(rsrcType
, swMode
, bppIdx
, &equation
);
2212 retCode
= ComputeThickEquation(rsrcType
, swMode
, bppIdx
, &equation
);
2215 // Only fill the equation into the table if the return code is ADDR_OK,
2216 // otherwise if the return code is not ADDR_OK, it indicates this is not
2217 // a valid input, we do nothing but just fill invalid equation index
2218 // into the lookup table.
2219 if (retCode
== ADDR_OK
)
2221 equationIndex
= m_numEquations
;
2222 ADDR_ASSERT(equationIndex
< EquationTableSize
);
2224 m_equationTable
[equationIndex
] = equation
;
2230 ADDR_ASSERT_ALWAYS();
2234 // Fill the index into the lookup table, if the combination is not supported
2235 // fill the invalid equation index
2236 m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][bppIdx
] = equationIndex
;
2243 ************************************************************************************************************************
2244 * Gfx9Lib::HwlGetEquationIndex
2247 * Interface function stub of GetEquationIndex
2251 ************************************************************************************************************************
2253 UINT_32
Gfx9Lib::HwlGetEquationIndex(
2254 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
,
2255 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
2258 AddrResourceType rsrcType
= pIn
->resourceType
;
2259 AddrSwizzleMode swMode
= pIn
->swizzleMode
;
2260 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
2261 UINT_32 index
= ADDR_INVALID_EQUATION_INDEX
;
2263 if (IsEquationSupported(rsrcType
, swMode
, elementBytesLog2
))
2265 UINT_32 rsrcTypeIdx
= static_cast<UINT_32
>(rsrcType
) - 1;
2266 UINT_32 swModeIdx
= static_cast<UINT_32
>(swMode
);
2268 index
= m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][elementBytesLog2
];
2271 if (pOut
->pMipInfo
!= NULL
)
2273 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
2275 pOut
->pMipInfo
[i
].equationIndex
= index
;
2283 ************************************************************************************************************************
2284 * Gfx9Lib::HwlComputeBlock256Equation
2287 * Interface function stub of ComputeBlock256Equation
2291 ************************************************************************************************************************
2293 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeBlock256Equation(
2294 AddrResourceType rsrcType
,
2295 AddrSwizzleMode swMode
,
2296 UINT_32 elementBytesLog2
,
2297 ADDR_EQUATION
* pEquation
) const
2299 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2301 pEquation
->numBits
= 8;
2304 for (; i
< elementBytesLog2
; i
++)
2306 InitChannel(1, 0 , i
, &pEquation
->addr
[i
]);
2309 ADDR_CHANNEL_SETTING
* pixelBit
= &pEquation
->addr
[elementBytesLog2
];
2311 const UINT_32 maxBitsUsed
= 4;
2312 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2313 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2315 for (i
= 0; i
< maxBitsUsed
; i
++)
2317 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2318 InitChannel(1, 1, i
, &y
[i
]);
2321 if (IsStandardSwizzle(rsrcType
, swMode
))
2323 switch (elementBytesLog2
)
2366 ADDR_ASSERT_ALWAYS();
2367 ret
= ADDR_INVALIDPARAMS
;
2371 else if (IsDisplaySwizzle(rsrcType
, swMode
))
2373 switch (elementBytesLog2
)
2416 ADDR_ASSERT_ALWAYS();
2417 ret
= ADDR_INVALIDPARAMS
;
2421 else if (IsRotateSwizzle(swMode
))
2423 switch (elementBytesLog2
)
2460 ADDR_ASSERT_ALWAYS();
2462 ret
= ADDR_INVALIDPARAMS
;
2468 ADDR_ASSERT_ALWAYS();
2469 ret
= ADDR_INVALIDPARAMS
;
2475 MAYBE_UNUSED Dim2d microBlockDim
= Block256_2d
[elementBytesLog2
];
2476 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation
->addr
, 8, 0)) ==
2477 (microBlockDim
.w
* (1 << elementBytesLog2
)));
2478 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation
->addr
, 8, 1)) == microBlockDim
.h
);
2485 ************************************************************************************************************************
2486 * Gfx9Lib::HwlComputeThinEquation
2489 * Interface function stub of ComputeThinEquation
2493 ************************************************************************************************************************
2495 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeThinEquation(
2496 AddrResourceType rsrcType
,
2497 AddrSwizzleMode swMode
,
2498 UINT_32 elementBytesLog2
,
2499 ADDR_EQUATION
* pEquation
) const
2501 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2503 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swMode
);
2505 UINT_32 maxXorBits
= blockSizeLog2
;
2506 if (IsNonPrtXor(swMode
))
2508 // For non-prt-xor, maybe need to initialize some more bits for xor
2509 // The highest xor bit used in equation will be max the following 3 items:
2510 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2511 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2514 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+ 2 * GetPipeXorBits(blockSizeLog2
));
2515 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+
2516 GetPipeXorBits(blockSizeLog2
) +
2517 2 * GetBankXorBits(blockSizeLog2
));
2520 const UINT_32 maxBitsUsed
= 14;
2521 ADDR_ASSERT((2 * maxBitsUsed
) >= maxXorBits
);
2522 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2523 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2525 const UINT_32 extraXorBits
= 16;
2526 ADDR_ASSERT(extraXorBits
>= maxXorBits
- blockSizeLog2
);
2527 ADDR_CHANNEL_SETTING xorExtra
[extraXorBits
] = {};
2529 for (UINT_32 i
= 0; i
< maxBitsUsed
; i
++)
2531 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2532 InitChannel(1, 1, i
, &y
[i
]);
2535 ADDR_CHANNEL_SETTING
* pixelBit
= pEquation
->addr
;
2537 for (UINT_32 i
= 0; i
< elementBytesLog2
; i
++)
2539 InitChannel(1, 0 , i
, &pixelBit
[i
]);
2544 UINT_32 lowBits
= 0;
2546 if (IsZOrderSwizzle(swMode
))
2548 if (elementBytesLog2
<= 3)
2550 for (UINT_32 i
= elementBytesLog2
; i
< 6; i
++)
2552 pixelBit
[i
] = (((i
- elementBytesLog2
) & 1) == 0) ? x
[xIdx
++] : y
[yIdx
++];
2559 ret
= ADDR_INVALIDPARAMS
;
2564 ret
= HwlComputeBlock256Equation(rsrcType
, swMode
, elementBytesLog2
, pEquation
);
2568 Dim2d microBlockDim
= Block256_2d
[elementBytesLog2
];
2569 xIdx
= Log2(microBlockDim
.w
);
2570 yIdx
= Log2(microBlockDim
.h
);
2577 for (UINT_32 i
= lowBits
; i
< blockSizeLog2
; i
++)
2579 pixelBit
[i
] = ((i
& 1) == 0) ? y
[yIdx
++] : x
[xIdx
++];
2582 for (UINT_32 i
= blockSizeLog2
; i
< maxXorBits
; i
++)
2584 xorExtra
[i
- blockSizeLog2
] = ((i
& 1) == 0) ? y
[yIdx
++] : x
[xIdx
++];
2590 UINT_32 pipeStart
= m_pipeInterleaveLog2
;
2591 UINT_32 pipeXorBits
= GetPipeXorBits(blockSizeLog2
);
2593 UINT_32 bankStart
= pipeStart
+ pipeXorBits
;
2594 UINT_32 bankXorBits
= GetBankXorBits(blockSizeLog2
);
2596 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2598 UINT_32 xor1BitPos
= pipeStart
+ 2 * pipeXorBits
- 1 - i
;
2599 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2600 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2602 InitChannel(&pEquation
->xor1
[pipeStart
+ i
], pXor1Src
);
2605 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2607 UINT_32 xor1BitPos
= bankStart
+ 2 * bankXorBits
- 1 - i
;
2608 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2609 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2611 InitChannel(&pEquation
->xor1
[bankStart
+ i
], pXor1Src
);
2614 if (IsPrt(swMode
) == FALSE
)
2616 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2618 InitChannel(1, 2, pipeXorBits
- i
- 1, &pEquation
->xor2
[pipeStart
+ i
]);
2621 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2623 InitChannel(1, 2, bankXorBits
- i
- 1 + pipeXorBits
, &pEquation
->xor2
[bankStart
+ i
]);
2628 pEquation
->numBits
= blockSizeLog2
;
2635 ************************************************************************************************************************
2636 * Gfx9Lib::HwlComputeThickEquation
2639 * Interface function stub of ComputeThickEquation
2643 ************************************************************************************************************************
2645 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeThickEquation(
2646 AddrResourceType rsrcType
,
2647 AddrSwizzleMode swMode
,
2648 UINT_32 elementBytesLog2
,
2649 ADDR_EQUATION
* pEquation
) const
2651 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2653 ADDR_ASSERT(IsTex3d(rsrcType
));
2655 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swMode
);
2657 UINT_32 maxXorBits
= blockSizeLog2
;
2658 if (IsNonPrtXor(swMode
))
2660 // For non-prt-xor, maybe need to initialize some more bits for xor
2661 // The highest xor bit used in equation will be max the following 3:
2662 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2663 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2666 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+ 3 * GetPipeXorBits(blockSizeLog2
));
2667 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+
2668 GetPipeXorBits(blockSizeLog2
) +
2669 3 * GetBankXorBits(blockSizeLog2
));
2672 for (UINT_32 i
= 0; i
< elementBytesLog2
; i
++)
2674 InitChannel(1, 0 , i
, &pEquation
->addr
[i
]);
2677 ADDR_CHANNEL_SETTING
* pixelBit
= &pEquation
->addr
[elementBytesLog2
];
2679 const UINT_32 maxBitsUsed
= 12;
2680 ADDR_ASSERT((3 * maxBitsUsed
) >= maxXorBits
);
2681 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2682 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2683 ADDR_CHANNEL_SETTING z
[maxBitsUsed
] = {};
2685 const UINT_32 extraXorBits
= 24;
2686 ADDR_ASSERT(extraXorBits
>= maxXorBits
- blockSizeLog2
);
2687 ADDR_CHANNEL_SETTING xorExtra
[extraXorBits
] = {};
2689 for (UINT_32 i
= 0; i
< maxBitsUsed
; i
++)
2691 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2692 InitChannel(1, 1, i
, &y
[i
]);
2693 InitChannel(1, 2, i
, &z
[i
]);
2696 if (IsZOrderSwizzle(swMode
))
2698 switch (elementBytesLog2
)
2751 ADDR_ASSERT_ALWAYS();
2752 ret
= ADDR_INVALIDPARAMS
;
2756 else if (IsStandardSwizzle(rsrcType
, swMode
))
2758 switch (elementBytesLog2
)
2811 ADDR_ASSERT_ALWAYS();
2812 ret
= ADDR_INVALIDPARAMS
;
2818 ADDR_ASSERT_ALWAYS();
2819 ret
= ADDR_INVALIDPARAMS
;
2824 Dim3d microBlockDim
= Block1K_3d
[elementBytesLog2
];
2825 UINT_32 xIdx
= Log2(microBlockDim
.w
);
2826 UINT_32 yIdx
= Log2(microBlockDim
.h
);
2827 UINT_32 zIdx
= Log2(microBlockDim
.d
);
2829 pixelBit
= pEquation
->addr
;
2831 const UINT_32 lowBits
= 10;
2832 ADDR_ASSERT(pEquation
->addr
[lowBits
- 1].valid
== 1);
2833 ADDR_ASSERT(pEquation
->addr
[lowBits
].valid
== 0);
2835 for (UINT_32 i
= lowBits
; i
< blockSizeLog2
; i
++)
2839 pixelBit
[i
] = x
[xIdx
++];
2841 else if ((i
% 3) == 1)
2843 pixelBit
[i
] = z
[zIdx
++];
2847 pixelBit
[i
] = y
[yIdx
++];
2851 for (UINT_32 i
= blockSizeLog2
; i
< maxXorBits
; i
++)
2855 xorExtra
[i
- blockSizeLog2
] = x
[xIdx
++];
2857 else if ((i
% 3) == 1)
2859 xorExtra
[i
- blockSizeLog2
] = z
[zIdx
++];
2863 xorExtra
[i
- blockSizeLog2
] = y
[yIdx
++];
2870 UINT_32 pipeStart
= m_pipeInterleaveLog2
;
2871 UINT_32 pipeXorBits
= GetPipeXorBits(blockSizeLog2
);
2872 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2874 UINT_32 xor1BitPos
= pipeStart
+ (3 * pipeXorBits
) - 1 - (2 * i
);
2875 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2876 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2878 InitChannel(&pEquation
->xor1
[pipeStart
+ i
], pXor1Src
);
2880 UINT_32 xor2BitPos
= pipeStart
+ (3 * pipeXorBits
) - 2 - (2 * i
);
2881 ADDR_CHANNEL_SETTING
* pXor2Src
= (xor2BitPos
< blockSizeLog2
) ?
2882 &pEquation
->addr
[xor2BitPos
] : &xorExtra
[xor2BitPos
- blockSizeLog2
];
2884 InitChannel(&pEquation
->xor2
[pipeStart
+ i
], pXor2Src
);
2887 UINT_32 bankStart
= pipeStart
+ pipeXorBits
;
2888 UINT_32 bankXorBits
= GetBankXorBits(blockSizeLog2
);
2889 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2891 UINT_32 xor1BitPos
= bankStart
+ (3 * bankXorBits
) - 1 - (2 * i
);
2892 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2893 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2895 InitChannel(&pEquation
->xor1
[bankStart
+ i
], pXor1Src
);
2897 UINT_32 xor2BitPos
= bankStart
+ (3 * bankXorBits
) - 2 - (2 * i
);
2898 ADDR_CHANNEL_SETTING
* pXor2Src
= (xor2BitPos
< blockSizeLog2
) ?
2899 &pEquation
->addr
[xor2BitPos
] : &xorExtra
[xor2BitPos
- blockSizeLog2
];
2901 InitChannel(&pEquation
->xor2
[bankStart
+ i
], pXor2Src
);
2905 pEquation
->numBits
= blockSizeLog2
;
2912 ************************************************************************************************************************
2913 * Gfx9Lib::IsValidDisplaySwizzleMode
2916 * Check if a swizzle mode is supported by display engine
2919 * TRUE is swizzle mode is supported by display engine
2920 ************************************************************************************************************************
2922 BOOL_32
Gfx9Lib::IsValidDisplaySwizzleMode(
2923 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
2925 BOOL_32 support
= FALSE
;
2927 const AddrResourceType resourceType
= pIn
->resourceType
;
2929 const AddrSwizzleMode swizzleMode
= pIn
->swizzleMode
;
2931 if (m_settings
.isDce12
)
2933 switch (swizzleMode
)
2935 case ADDR_SW_256B_D
:
2936 case ADDR_SW_256B_R
:
2937 support
= (pIn
->bpp
== 32);
2940 case ADDR_SW_LINEAR
:
2943 case ADDR_SW_64KB_D
:
2944 case ADDR_SW_64KB_R
:
2947 case ADDR_SW_4KB_D_X
:
2948 case ADDR_SW_4KB_R_X
:
2949 case ADDR_SW_64KB_D_X
:
2950 case ADDR_SW_64KB_R_X
:
2951 case ADDR_SW_VAR_D_X
:
2952 case ADDR_SW_VAR_R_X
:
2953 support
= (pIn
->bpp
<= 64);
2960 else if (m_settings
.isDcn1
)
2962 switch (swizzleMode
)
2965 case ADDR_SW_64KB_D
:
2967 case ADDR_SW_64KB_D_T
:
2968 case ADDR_SW_4KB_D_X
:
2969 case ADDR_SW_64KB_D_X
:
2970 case ADDR_SW_VAR_D_X
:
2971 support
= (pIn
->bpp
== 64);
2974 case ADDR_SW_LINEAR
:
2976 case ADDR_SW_64KB_S
:
2978 case ADDR_SW_64KB_S_T
:
2979 case ADDR_SW_4KB_S_X
:
2980 case ADDR_SW_64KB_S_X
:
2981 case ADDR_SW_VAR_S_X
:
2982 support
= (pIn
->bpp
<= 64);
2991 ADDR_NOT_IMPLEMENTED();
2998 ************************************************************************************************************************
2999 * Gfx9Lib::HwlComputePipeBankXor
3002 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3006 ************************************************************************************************************************
3008 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputePipeBankXor(
3009 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT
* pIn
,
3010 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
* pOut
) const
3012 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
3013 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
3014 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
3016 UINT_32 pipeXor
= 0;
3017 UINT_32 bankXor
= 0;
3019 const UINT_32 bankMask
= (1 << bankBits
) - 1;
3020 const UINT_32 index
= pIn
->surfIndex
& bankMask
;
3022 const UINT_32 bpp
= pIn
->flags
.fmask
?
3023 GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
) : GetElemLib()->GetBitsPerPixel(pIn
->format
);
3026 static const UINT_32 BankXorSmallBpp
[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3027 static const UINT_32 BankXorLargeBpp
[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3029 bankXor
= (bpp
<= 32) ? BankXorSmallBpp
[index
] : BankXorLargeBpp
[index
];
3031 else if (bankBits
> 0)
3033 UINT_32 bankIncrease
= (1 << (bankBits
- 1)) - 1;
3034 bankIncrease
= (bankIncrease
== 0) ? 1 : bankIncrease
;
3035 bankXor
= (index
* bankIncrease
) & bankMask
;
3038 pOut
->pipeBankXor
= (bankXor
<< pipeBits
) | pipeXor
;
3044 ************************************************************************************************************************
3045 * Gfx9Lib::HwlComputeSlicePipeBankXor
3048 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3052 ************************************************************************************************************************
3054 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSlicePipeBankXor(
3055 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT
* pIn
,
3056 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
* pOut
) const
3058 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
3059 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
3060 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
3062 UINT_32 pipeXor
= ReverseBitVector(pIn
->slice
, pipeBits
);
3063 UINT_32 bankXor
= ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
);
3065 pOut
->pipeBankXor
= pIn
->basePipeBankXor
^ (pipeXor
| (bankXor
<< pipeBits
));
3071 ************************************************************************************************************************
3072 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3075 * Compute sub resource offset to support swizzle pattern
3079 ************************************************************************************************************************
3081 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3082 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
* pIn
,
3083 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
* pOut
) const
3085 ADDR_ASSERT(IsThin(pIn
->resourceType
, pIn
->swizzleMode
));
3087 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
3088 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
3089 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
3090 UINT_32 pipeXor
= ReverseBitVector(pIn
->slice
, pipeBits
);
3091 UINT_32 bankXor
= ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
);
3092 UINT_32 pipeBankXor
= ((pipeXor
| (bankXor
<< pipeBits
)) ^ (pIn
->pipeBankXor
)) << m_pipeInterleaveLog2
;
3094 pOut
->offset
= pIn
->slice
* pIn
->sliceSize
+
3095 pIn
->macroBlockOffset
+
3096 (pIn
->mipTailOffset
^ pipeBankXor
) -
3097 static_cast<UINT_64
>(pipeBankXor
);
3102 ************************************************************************************************************************
3103 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3106 * Compute surface info sanity check
3110 ************************************************************************************************************************
3112 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3113 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
3115 BOOL_32 invalid
= FALSE
;
3117 if ((pIn
->bpp
> 128) || (pIn
->width
== 0) || (pIn
->numFrags
> 8) || (pIn
->numSamples
> 16))
3121 else if ((pIn
->swizzleMode
>= ADDR_SW_MAX_TYPE
) ||
3122 (pIn
->resourceType
>= ADDR_RSRC_MAX_TYPE
))
3127 BOOL_32 mipmap
= (pIn
->numMipLevels
> 1);
3128 BOOL_32 msaa
= (pIn
->numFrags
> 1);
3130 ADDR2_SURFACE_FLAGS flags
= pIn
->flags
;
3131 BOOL_32 zbuffer
= (flags
.depth
|| flags
.stencil
);
3132 BOOL_32 color
= flags
.color
;
3133 BOOL_32 display
= flags
.display
|| flags
.rotated
;
3135 AddrResourceType rsrcType
= pIn
->resourceType
;
3136 BOOL_32 tex3d
= IsTex3d(rsrcType
);
3137 AddrSwizzleMode swizzle
= pIn
->swizzleMode
;
3138 BOOL_32 linear
= IsLinear(swizzle
);
3139 BOOL_32 blk256B
= IsBlock256b(swizzle
);
3140 BOOL_32 blkVar
= IsBlockVariable(swizzle
);
3141 BOOL_32 isNonPrtXor
= IsNonPrtXor(swizzle
);
3142 BOOL_32 prt
= flags
.prt
;
3143 BOOL_32 stereo
= flags
.qbStereo
;
3145 if (invalid
== FALSE
)
3147 if ((pIn
->numFrags
> 1) &&
3148 (GetBlockSize(swizzle
) < (m_pipeInterleaveBytes
* pIn
->numFrags
)))
3150 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3155 if (invalid
== FALSE
)
3159 case ADDR_RSRC_TEX_1D
:
3160 invalid
= msaa
|| zbuffer
|| display
|| (linear
== FALSE
) || stereo
;
3162 case ADDR_RSRC_TEX_2D
:
3163 invalid
= (msaa
&& mipmap
) || (stereo
&& msaa
) || (stereo
&& mipmap
);
3165 case ADDR_RSRC_TEX_3D
:
3166 invalid
= msaa
|| zbuffer
|| display
|| stereo
;
3174 if (invalid
== FALSE
)
3178 invalid
= (IsValidDisplaySwizzleMode(pIn
) == FALSE
);
3182 if (invalid
== FALSE
)
3186 invalid
= ((ADDR_RSRC_TEX_1D
!= rsrcType
) && prt
) ||
3187 zbuffer
|| msaa
|| (pIn
->bpp
== 0) || ((pIn
->bpp
% 8) != 0);
3191 if (blk256B
|| blkVar
|| isNonPrtXor
)
3196 invalid
= invalid
|| zbuffer
|| tex3d
|| mipmap
|| msaa
;
3200 if (invalid
== FALSE
)
3202 if (IsZOrderSwizzle(swizzle
))
3204 invalid
= color
&& msaa
;
3206 else if (IsStandardSwizzle(rsrcType
, swizzle
))
3210 else if (IsDisplaySwizzle(rsrcType
, swizzle
))
3214 else if (IsRotateSwizzle(swizzle
))
3216 invalid
= zbuffer
|| (pIn
->bpp
> 64) || tex3d
;
3220 ADDR_ASSERT(!"invalid swizzle mode");
3227 ADDR_ASSERT(invalid
== FALSE
);
3229 return invalid
? ADDR_INVALIDPARAMS
: ADDR_OK
;
3233 ************************************************************************************************************************
3234 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3237 * Internal function to get suggested surface information for cliet to use
3241 ************************************************************************************************************************
3243 ADDR_E_RETURNCODE
Gfx9Lib::HwlGetPreferredSurfaceSetting(
3244 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
* pIn
,
3245 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
* pOut
) const
3247 // Macro define resource block type
3250 AddrBlockMicro
= 0, // Resource uses 256B block
3251 AddrBlock4KB
= 1, // Resource uses 4KB block
3252 AddrBlock64KB
= 2, // Resource uses 64KB block
3253 AddrBlockVar
= 3, // Resource uses var blcok
3254 AddrBlockLinear
= 4, // Resource uses linear swizzle mode
3256 AddrBlockMaxTiledType
= AddrBlock64KB
+ 1,
3261 AddrBlockSetMicro
= 1 << AddrBlockMicro
,
3262 AddrBlockSetMacro4KB
= 1 << AddrBlock4KB
,
3263 AddrBlockSetMacro64KB
= 1 << AddrBlock64KB
,
3264 AddrBlockSetVar
= 1 << AddrBlockVar
,
3265 AddrBlockSetLinear
= 1 << AddrBlockLinear
,
3267 AddrBlockSetMacro
= AddrBlockSetMacro4KB
| AddrBlockSetMacro64KB
,
3272 AddrSwSetZ
= 1 << ADDR_SW_Z
,
3273 AddrSwSetS
= 1 << ADDR_SW_S
,
3274 AddrSwSetD
= 1 << ADDR_SW_D
,
3275 AddrSwSetR
= 1 << ADDR_SW_R
,
3277 AddrSwSetAll
= AddrSwSetZ
| AddrSwSetS
| AddrSwSetD
| AddrSwSetR
,
3280 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
3281 ElemLib
* pElemLib
= GetElemLib();
3283 // Set format to INVALID will skip this conversion
3284 UINT_32 expandX
= 1;
3285 UINT_32 expandY
= 1;
3286 UINT_32 bpp
= pIn
->bpp
;
3287 UINT_32 width
= pIn
->width
;
3288 UINT_32 height
= pIn
->height
;
3290 if (pIn
->format
!= ADDR_FMT_INVALID
)
3292 // Don't care for this case
3293 ElemMode elemMode
= ADDR_UNCOMPRESSED
;
3295 // Get compression/expansion factors and element mode which indicates compression/expansion
3296 bpp
= pElemLib
->GetBitsPerPixel(pIn
->format
,
3301 UINT_32 basePitch
= 0;
3302 GetElemLib()->AdjustSurfaceInfo(elemMode
,
3311 UINT_32 numSamples
= Max(pIn
->numSamples
, 1u);
3312 UINT_32 numFrags
= (pIn
->numFrags
== 0) ? numSamples
: pIn
->numFrags
;
3313 UINT_32 slice
= Max(pIn
->numSlices
, 1u);
3314 UINT_32 numMipLevels
= Max(pIn
->numMipLevels
, 1u);
3315 UINT_32 minSizeAlign
= NextPow2(pIn
->minSizeAlign
);
3317 if (pIn
->flags
.fmask
)
3319 bpp
= GetFmaskBpp(numSamples
, numFrags
);
3322 pOut
->resourceType
= ADDR_RSRC_TEX_2D
;
3326 // The output may get changed for volume(3D) texture resource in future
3327 pOut
->resourceType
= pIn
->resourceType
;
3332 ADDR_ASSERT_ALWAYS();
3334 returnCode
= ADDR_INVALIDPARAMS
;
3336 else if (IsTex1d(pOut
->resourceType
))
3338 pOut
->swizzleMode
= ADDR_SW_LINEAR
;
3339 pOut
->validBlockSet
.value
= AddrBlockSetLinear
;
3340 pOut
->canXor
= FALSE
;
3344 ADDR2_BLOCK_SET blockSet
;
3347 ADDR2_SWTYPE_SET addrPreferredSwSet
, addrValidSwSet
, clientPreferredSwSet
;
3348 addrPreferredSwSet
.value
= AddrSwSetS
;
3349 addrValidSwSet
= addrPreferredSwSet
;
3350 clientPreferredSwSet
= pIn
->preferredSwSet
;
3352 if (clientPreferredSwSet
.value
== 0)
3354 clientPreferredSwSet
.value
= AddrSwSetAll
;
3357 // prt Xor and non-xor will have less height align requirement for stereo surface
3358 BOOL_32 prtXor
= (pIn
->flags
.prt
|| pIn
->flags
.qbStereo
) && (pIn
->noXor
== FALSE
);
3359 BOOL_32 displayResource
= FALSE
;
3361 pOut
->canXor
= (pIn
->flags
.prt
== FALSE
) && (pIn
->noXor
== FALSE
);
3363 // Filter out improper swType and blockSet by HW restriction
3364 if (pIn
->flags
.fmask
|| pIn
->flags
.depth
|| pIn
->flags
.stencil
)
3366 ADDR_ASSERT(IsTex2d(pOut
->resourceType
));
3367 blockSet
.value
= AddrBlockSetMacro
;
3368 addrPreferredSwSet
.value
= AddrSwSetZ
;
3369 addrValidSwSet
.value
= AddrSwSetZ
;
3371 if (pIn
->flags
.noMetadata
== FALSE
)
3373 if (pIn
->flags
.depth
&&
3374 pIn
->flags
.texture
&&
3375 (((bpp
== 16) && (numFrags
>= 4)) || ((bpp
== 32) && (numFrags
>= 2))))
3377 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3378 // equation from wrong address within memory range a tile covered and use the
3379 // garbage data for compressed Z reading which finally leads to corruption.
3380 pOut
->canXor
= FALSE
;
3384 if (m_settings
.htileCacheRbConflict
&&
3385 (pIn
->flags
.depth
|| pIn
->flags
.stencil
) &&
3387 (pIn
->flags
.metaRbUnaligned
== FALSE
) &&
3388 (pIn
->flags
.metaPipeUnaligned
== FALSE
))
3390 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3391 pOut
->canXor
= FALSE
;
3395 else if (ElemLib::IsBlockCompressed(pIn
->format
))
3397 // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes.
3398 // Not sure under what circumstances "_D" would be appropriate as these formats
3399 // are not displayable.
3400 blockSet
.value
= AddrBlockSetMacro
;
3402 // This isn't to be used as texture and caller doesn't allow macro tiled.
3403 if ((pIn
->flags
.texture
== FALSE
) &&
3404 (pIn
->forbiddenBlock
.macro4KB
&& pIn
->forbiddenBlock
.macro64KB
))
3406 blockSet
.value
|= AddrBlockSetLinear
;
3409 addrPreferredSwSet
.value
= AddrSwSetD
;
3410 addrValidSwSet
.value
= AddrSwSetS
| AddrSwSetD
;
3412 else if (ElemLib::IsMacroPixelPacked(pIn
->format
))
3414 // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes.
3415 // Its notclear under what circumstances the D or R modes would be appropriate
3416 // since these formats are not displayable.
3417 blockSet
.value
= AddrBlockSetLinear
| AddrBlockSetMacro
;
3419 addrPreferredSwSet
.value
= AddrSwSetS
;
3420 addrValidSwSet
.value
= AddrSwSetS
| AddrSwSetD
| AddrSwSetR
;
3422 else if (IsTex3d(pOut
->resourceType
))
3424 blockSet
.value
= AddrBlockSetLinear
| AddrBlockSetMacro
;
3428 // PRT cannot use SW_D which gives an unexpected block dimension
3429 addrPreferredSwSet
.value
= AddrSwSetZ
;
3430 addrValidSwSet
.value
= AddrSwSetZ
| AddrSwSetS
;
3432 else if ((numMipLevels
> 1) && (slice
>= width
) && (slice
>= height
))
3434 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3435 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3436 addrPreferredSwSet
.value
= AddrSwSetZ
;
3437 addrValidSwSet
.value
= AddrSwSetZ
| AddrSwSetS
;
3439 else if (pIn
->flags
.color
)
3441 addrPreferredSwSet
.value
= AddrSwSetD
;
3442 addrValidSwSet
.value
= AddrSwSetZ
| AddrSwSetS
| AddrSwSetD
;
3446 addrPreferredSwSet
.value
= AddrSwSetZ
;
3447 addrValidSwSet
.value
= AddrSwSetZ
| AddrSwSetD
;
3450 addrValidSwSet
.value
|= AddrSwSetS
;
3456 addrPreferredSwSet
.value
= ((pIn
->flags
.display
== TRUE
) ||
3457 (pIn
->flags
.overlay
== TRUE
) ||
3458 (pIn
->bpp
== 128)) ? AddrSwSetD
: AddrSwSetS
;
3460 addrValidSwSet
.value
= AddrSwSetS
| AddrSwSetD
| AddrSwSetR
;
3462 if (numMipLevels
> 1)
3464 ADDR_ASSERT(numFrags
== 1);
3465 blockSet
.value
= AddrBlockSetLinear
| AddrBlockSetMacro
;
3467 else if ((numFrags
> 1) || (numSamples
> 1))
3469 ADDR_ASSERT(IsTex2d(pOut
->resourceType
));
3470 blockSet
.value
= AddrBlockSetMacro
;
3474 ADDR_ASSERT(IsTex2d(pOut
->resourceType
));
3475 blockSet
.value
= AddrBlockSetLinear
| AddrBlockSetMicro
| AddrBlockSetMacro
;
3477 displayResource
= pIn
->flags
.rotated
|| pIn
->flags
.display
;
3479 if (displayResource
)
3481 addrPreferredSwSet
.value
= pIn
->flags
.rotated
? AddrSwSetR
: AddrSwSetD
;
3487 else if (m_settings
.isDce12
)
3491 blockSet
.micro
= FALSE
;
3494 // DCE12 does not support display surface to be _T swizzle mode
3497 addrValidSwSet
.value
= AddrSwSetD
| AddrSwSetR
;
3499 else if (m_settings
.isDcn1
)
3501 // _R is not supported by Dcn1
3504 addrPreferredSwSet
.value
= AddrSwSetD
;
3505 addrValidSwSet
.value
= AddrSwSetS
| AddrSwSetD
;
3509 addrPreferredSwSet
.value
= AddrSwSetS
;
3510 addrValidSwSet
.value
= AddrSwSetS
;
3513 blockSet
.micro
= FALSE
;
3517 ADDR_NOT_IMPLEMENTED();
3518 returnCode
= ADDR_NOTSUPPORTED
;
3524 ADDR_ASSERT((addrValidSwSet
.value
& addrPreferredSwSet
.value
) == addrPreferredSwSet
.value
);
3526 pOut
->clientPreferredSwSet
= clientPreferredSwSet
;
3528 // Clamp client preferred set to valid set
3529 clientPreferredSwSet
.value
&= addrValidSwSet
.value
;
3531 pOut
->validSwTypeSet
= addrValidSwSet
;
3533 if (clientPreferredSwSet
.value
== 0)
3535 // Client asks for an invalid swizzle type...
3536 ADDR_ASSERT_ALWAYS();
3537 returnCode
= ADDR_INVALIDPARAMS
;
3541 if (IsPow2(clientPreferredSwSet
.value
))
3543 // Only one swizzle type left, use it directly
3544 addrPreferredSwSet
.value
= clientPreferredSwSet
.value
;
3546 else if ((clientPreferredSwSet
.value
& addrPreferredSwSet
.value
) == 0)
3548 // Client wants 2 or more a valid swizzle type but none of them is addrlib preferred
3549 if (clientPreferredSwSet
.sw_D
)
3551 addrPreferredSwSet
.value
= AddrSwSetD
;
3553 else if (clientPreferredSwSet
.sw_Z
)
3555 addrPreferredSwSet
.value
= AddrSwSetZ
;
3557 else if (clientPreferredSwSet
.sw_R
)
3559 addrPreferredSwSet
.value
= AddrSwSetR
;
3563 ADDR_ASSERT(clientPreferredSwSet
.sw_S
);
3564 addrPreferredSwSet
.value
= AddrSwSetS
;
3568 if ((numFrags
> 1) &&
3569 (GetBlockSize(ADDR_SW_4KB
) < (m_pipeInterleaveBytes
* numFrags
)))
3571 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3572 blockSet
.macro4KB
= FALSE
;
3577 blockSet
.value
&= AddrBlockSetMacro64KB
;
3580 // Apply customized forbidden setting
3581 blockSet
.value
&= ~pIn
->forbiddenBlock
.value
;
3583 if (pIn
->maxAlign
> 0)
3585 if (pIn
->maxAlign
< GetBlockSize(ADDR_SW_64KB
))
3587 blockSet
.macro64KB
= FALSE
;
3590 if (pIn
->maxAlign
< GetBlockSize(ADDR_SW_4KB
))
3592 blockSet
.macro4KB
= FALSE
;
3595 if (pIn
->maxAlign
< GetBlockSize(ADDR_SW_256B
))
3597 blockSet
.micro
= FALSE
;
3601 Dim3d blkAlign
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}};
3602 Dim3d paddedDim
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}};
3603 UINT_64 padSize
[AddrBlockMaxTiledType
] = {0};
3607 returnCode
= ComputeBlockDimensionForSurf(&blkAlign
[AddrBlockMicro
].w
,
3608 &blkAlign
[AddrBlockMicro
].h
,
3609 &blkAlign
[AddrBlockMicro
].d
,
3615 if (returnCode
== ADDR_OK
)
3617 if (displayResource
)
3619 blkAlign
[AddrBlockMicro
].w
= PowTwoAlign(blkAlign
[AddrBlockMicro
].w
, 32);
3621 else if ((blkAlign
[AddrBlockMicro
].w
>= width
) && (blkAlign
[AddrBlockMicro
].h
>= height
) &&
3622 (minSizeAlign
<= GetBlockSize(ADDR_SW_256B
)))
3624 // If one 256B block can contain the surface, don't bother bigger block type
3625 blockSet
.macro4KB
= FALSE
;
3626 blockSet
.macro64KB
= FALSE
;
3627 blockSet
.var
= FALSE
;
3630 padSize
[AddrBlockMicro
] = ComputePadSize(&blkAlign
[AddrBlockMicro
], width
, height
,
3631 slice
, &paddedDim
[AddrBlockMicro
]);
3635 if ((returnCode
== ADDR_OK
) && blockSet
.macro4KB
)
3637 returnCode
= ComputeBlockDimensionForSurf(&blkAlign
[AddrBlock4KB
].w
,
3638 &blkAlign
[AddrBlock4KB
].h
,
3639 &blkAlign
[AddrBlock4KB
].d
,
3645 if (returnCode
== ADDR_OK
)
3647 if (displayResource
)
3649 blkAlign
[AddrBlock4KB
].w
= PowTwoAlign(blkAlign
[AddrBlock4KB
].w
, 32);
3652 padSize
[AddrBlock4KB
] = ComputePadSize(&blkAlign
[AddrBlock4KB
], width
, height
,
3653 slice
, &paddedDim
[AddrBlock4KB
]);
3655 ADDR_ASSERT(padSize
[AddrBlock4KB
] >= padSize
[AddrBlockMicro
]);
3659 if ((returnCode
== ADDR_OK
) && blockSet
.macro64KB
)
3661 returnCode
= ComputeBlockDimensionForSurf(&blkAlign
[AddrBlock64KB
].w
,
3662 &blkAlign
[AddrBlock64KB
].h
,
3663 &blkAlign
[AddrBlock64KB
].d
,
3669 if (returnCode
== ADDR_OK
)
3671 if (displayResource
)
3673 blkAlign
[AddrBlock64KB
].w
= PowTwoAlign(blkAlign
[AddrBlock64KB
].w
, 32);
3676 padSize
[AddrBlock64KB
] = ComputePadSize(&blkAlign
[AddrBlock64KB
], width
, height
,
3677 slice
, &paddedDim
[AddrBlock64KB
]);
3679 ADDR_ASSERT(padSize
[AddrBlock64KB
] >= padSize
[AddrBlock4KB
]);
3680 ADDR_ASSERT(padSize
[AddrBlock64KB
] >= padSize
[AddrBlockMicro
]);
3684 if (returnCode
== ADDR_OK
)
3686 UINT_64 minSizeAlignInElement
= Max(minSizeAlign
/ (bpp
>> 3), 1u);
3688 for (UINT_32 i
= AddrBlockMicro
; i
< AddrBlockMaxTiledType
; i
++)
3690 padSize
[i
] = PowTwoAlign(padSize
[i
], minSizeAlignInElement
);
3693 // Use minimum block type which meets all conditions above if flag minimizeAlign was set
3694 if (pIn
->flags
.minimizeAlign
)
3696 // If padded size of 64KB block is larger than padded size of 256B block or 4KB
3697 // block, filter out 64KB block from candidate list
3698 if (blockSet
.macro64KB
&&
3699 ((blockSet
.micro
&& (padSize
[AddrBlockMicro
] < padSize
[AddrBlock64KB
])) ||
3700 (blockSet
.macro4KB
&& (padSize
[AddrBlock4KB
] < padSize
[AddrBlock64KB
]))))
3702 blockSet
.macro64KB
= FALSE
;
3705 // If padded size of 4KB block is larger than padded size of 256B block,
3706 // filter out 4KB block from candidate list
3707 if (blockSet
.macro4KB
&&
3709 (padSize
[AddrBlockMicro
] < padSize
[AddrBlock4KB
]))
3711 blockSet
.macro4KB
= FALSE
;
3714 // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
3715 else if (pIn
->flags
.opt4space
)
3717 UINT_64 threshold
= blockSet
.micro
? padSize
[AddrBlockMicro
] :
3718 (blockSet
.macro4KB
? padSize
[AddrBlock4KB
] : padSize
[AddrBlock64KB
]);
3720 threshold
+= threshold
>> 1;
3722 if (blockSet
.macro64KB
&& (padSize
[AddrBlock64KB
] > threshold
))
3724 blockSet
.macro64KB
= FALSE
;
3727 if (blockSet
.macro4KB
&& (padSize
[AddrBlock4KB
] > threshold
))
3729 blockSet
.macro4KB
= FALSE
;
3734 if (blockSet
.macro64KB
&&
3735 (padSize
[AddrBlock64KB
] >= static_cast<UINT_64
>(width
) * height
* slice
* 2) &&
3736 ((blockSet
.value
& ~AddrBlockSetMacro64KB
) != 0))
3738 // If 64KB block waste more than half memory on padding, filter it out from
3739 // candidate list when it is not the only choice left
3740 blockSet
.macro64KB
= FALSE
;
3744 if (blockSet
.value
== 0)
3746 // Bad things happen, client will not get any useful information from AddrLib.
3747 // Maybe we should fill in some output earlier instead of outputing nothing?
3748 ADDR_ASSERT_ALWAYS();
3749 returnCode
= ADDR_INVALIDPARAMS
;
3753 pOut
->validBlockSet
= blockSet
;
3754 pOut
->canXor
= pOut
->canXor
&&
3755 (blockSet
.macro4KB
|| blockSet
.macro64KB
|| blockSet
.var
);
3757 if (blockSet
.macro64KB
|| blockSet
.macro4KB
)
3759 if (addrPreferredSwSet
.value
== AddrSwSetZ
)
3761 pOut
->swizzleMode
= blockSet
.macro64KB
? ADDR_SW_64KB_Z
: ADDR_SW_4KB_Z
;
3763 else if (addrPreferredSwSet
.value
== AddrSwSetS
)
3765 pOut
->swizzleMode
= blockSet
.macro64KB
? ADDR_SW_64KB_S
: ADDR_SW_4KB_S
;
3767 else if (addrPreferredSwSet
.value
== AddrSwSetD
)
3769 pOut
->swizzleMode
= blockSet
.macro64KB
? ADDR_SW_64KB_D
: ADDR_SW_4KB_D
;
3773 ADDR_ASSERT(addrPreferredSwSet
.value
== AddrSwSetR
);
3774 pOut
->swizzleMode
= blockSet
.macro64KB
? ADDR_SW_64KB_R
: ADDR_SW_4KB_R
;
3777 if (prtXor
&& blockSet
.macro64KB
)
3779 // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
3780 const UINT_32 prtGap
= ADDR_SW_64KB_Z_T
- ADDR_SW_64KB_Z
;
3781 pOut
->swizzleMode
= static_cast<AddrSwizzleMode
>(pOut
->swizzleMode
+ prtGap
);
3783 else if (pOut
->canXor
)
3785 // Client wants XOR and this is allowed, return XOR version swizzle mode
3786 const UINT_32 xorGap
= ADDR_SW_4KB_Z_X
- ADDR_SW_4KB_Z
;
3787 pOut
->swizzleMode
= static_cast<AddrSwizzleMode
>(pOut
->swizzleMode
+ xorGap
);
3790 else if (blockSet
.micro
)
3792 if (addrPreferredSwSet
.value
== AddrSwSetS
)
3794 pOut
->swizzleMode
= ADDR_SW_256B_S
;
3796 else if (addrPreferredSwSet
.value
== AddrSwSetD
)
3798 pOut
->swizzleMode
= ADDR_SW_256B_D
;
3802 ADDR_ASSERT(addrPreferredSwSet
.value
== AddrSwSetR
);
3803 pOut
->swizzleMode
= ADDR_SW_256B_R
;
3806 else if (blockSet
.linear
)
3808 // Fall into this branch doesn't mean linear is suitable, only no other choices!
3809 pOut
->swizzleMode
= ADDR_SW_LINEAR
;
3813 ADDR_ASSERT(blockSet
.var
);
3815 // Designer consider VAR swizzle mode is usless for most cases
3816 ADDR_UNHANDLED_CASE();
3818 returnCode
= ADDR_NOTSUPPORTED
;
3822 // Post sanity check, at least AddrLib should accept the output generated by its own
3823 if (pOut
->swizzleMode
!= ADDR_SW_LINEAR
)
3825 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {0};
3826 localIn
.flags
= pIn
->flags
;
3827 localIn
.swizzleMode
= pOut
->swizzleMode
;
3828 localIn
.resourceType
= pOut
->resourceType
;
3829 localIn
.format
= pIn
->format
;
3831 localIn
.width
= width
;
3832 localIn
.height
= height
;
3833 localIn
.numSlices
= slice
;
3834 localIn
.numMipLevels
= numMipLevels
;
3835 localIn
.numSamples
= numSamples
;
3836 localIn
.numFrags
= numFrags
;
3838 HwlComputeSurfaceInfoSanityCheck(&localIn
);
3851 ************************************************************************************************************************
3852 * Gfx9Lib::ComputeStereoInfo
3855 * Compute height alignment and right eye pipeBankXor for stereo surface
3860 ************************************************************************************************************************
3862 ADDR_E_RETURNCODE
Gfx9Lib::ComputeStereoInfo(
3863 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
,
3864 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
,
3865 UINT_32
* pHeightAlign
3868 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
3870 UINT_32 eqIndex
= HwlGetEquationIndex(pIn
, pOut
);
3872 if (eqIndex
< m_numEquations
)
3874 if (IsXor(pIn
->swizzleMode
))
3876 const UINT_32 blkSizeLog2
= GetBlockSizeLog2(pIn
->swizzleMode
);
3877 const UINT_32 numPipeBits
= GetPipeXorBits(blkSizeLog2
);
3878 const UINT_32 numBankBits
= GetBankXorBits(blkSizeLog2
);
3879 const UINT_32 bppLog2
= Log2(pIn
->bpp
>> 3);
3880 const UINT_32 maxYCoordBlock256
= Log2(Block256_2d
[bppLog2
].h
) - 1;
3881 MAYBE_UNUSED
const ADDR_EQUATION
*pEqToCheck
= &m_equationTable
[eqIndex
];
3883 ADDR_ASSERT(maxYCoordBlock256
==
3884 GetMaxValidChannelIndex(&pEqToCheck
->addr
[0], GetBlockSizeLog2(ADDR_SW_256B
), 1));
3886 const UINT_32 maxYCoordInBaseEquation
=
3887 (blkSizeLog2
- GetBlockSizeLog2(ADDR_SW_256B
)) / 2 + maxYCoordBlock256
;
3889 ADDR_ASSERT(maxYCoordInBaseEquation
==
3890 GetMaxValidChannelIndex(&pEqToCheck
->addr
[0], blkSizeLog2
, 1));
3892 const UINT_32 maxYCoordInPipeXor
= (numPipeBits
== 0) ? 0 : maxYCoordBlock256
+ numPipeBits
;
3894 ADDR_ASSERT(maxYCoordInPipeXor
==
3895 GetMaxValidChannelIndex(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
], numPipeBits
, 1));
3897 const UINT_32 maxYCoordInBankXor
= (numBankBits
== 0) ?
3898 0 : maxYCoordBlock256
+ (numPipeBits
+ 1) / 2 + numBankBits
;
3900 ADDR_ASSERT(maxYCoordInBankXor
==
3901 GetMaxValidChannelIndex(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
+ numPipeBits
], numBankBits
, 1));
3903 const UINT_32 maxYCoordInPipeBankXor
= Max(maxYCoordInPipeXor
, maxYCoordInBankXor
);
3905 if (maxYCoordInPipeBankXor
> maxYCoordInBaseEquation
)
3907 *pHeightAlign
= 1u << maxYCoordInPipeBankXor
;
3909 if (pOut
->pStereoInfo
!= NULL
)
3911 pOut
->pStereoInfo
->rightSwizzle
= 0;
3913 if ((PowTwoAlign(pIn
->height
, *pHeightAlign
) % (*pHeightAlign
* 2)) != 0)
3915 if (maxYCoordInPipeXor
== maxYCoordInPipeBankXor
)
3917 pOut
->pStereoInfo
->rightSwizzle
|= (1u << 1);
3920 if (maxYCoordInBankXor
== maxYCoordInPipeBankXor
)
3922 pOut
->pStereoInfo
->rightSwizzle
|=
3923 1u << ((numPipeBits
% 2) ? numPipeBits
: numPipeBits
+ 1);
3926 ADDR_ASSERT(pOut
->pStereoInfo
->rightSwizzle
==
3927 GetCoordActiveMask(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
],
3928 numPipeBits
+ numBankBits
, 1, maxYCoordInPipeBankXor
));
3936 ADDR_ASSERT_ALWAYS();
3937 returnCode
= ADDR_ERROR
;
3944 ************************************************************************************************************************
3945 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3948 * Internal function to calculate alignment for tiled surface
3952 ************************************************************************************************************************
3954 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceInfoTiled(
3955 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
3956 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
3959 ADDR_E_RETURNCODE returnCode
= ComputeBlockDimensionForSurf(&pOut
->blockWidth
,
3967 if (returnCode
== ADDR_OK
)
3969 UINT_32 pitchAlignInElement
= pOut
->blockWidth
;
3971 if ((IsTex2d(pIn
->resourceType
) == TRUE
) &&
3972 (pIn
->flags
.display
|| pIn
->flags
.rotated
) &&
3973 (pIn
->numMipLevels
<= 1) &&
3974 (pIn
->numSamples
<= 1) &&
3975 (pIn
->numFrags
<= 1))
3977 // Display engine needs pitch align to be at least 32 pixels.
3978 pitchAlignInElement
= PowTwoAlign(pitchAlignInElement
, 32);
3981 pOut
->pitch
= PowTwoAlign(pIn
->width
, pitchAlignInElement
);
3983 if ((pIn
->numMipLevels
<= 1) && (pIn
->pitchInElement
> 0))
3985 if ((pIn
->pitchInElement
% pitchAlignInElement
) != 0)
3987 returnCode
= ADDR_INVALIDPARAMS
;
3989 else if (pIn
->pitchInElement
< pOut
->pitch
)
3991 returnCode
= ADDR_INVALIDPARAMS
;
3995 pOut
->pitch
= pIn
->pitchInElement
;
3999 UINT_32 heightAlign
= 0;
4001 if (pIn
->flags
.qbStereo
)
4003 returnCode
= ComputeStereoInfo(pIn
, pOut
, &heightAlign
);
4006 if (returnCode
== ADDR_OK
)
4008 pOut
->height
= PowTwoAlign(pIn
->height
, pOut
->blockHeight
);
4010 if (heightAlign
> 1)
4012 pOut
->height
= PowTwoAlign(pOut
->height
, heightAlign
);
4015 pOut
->numSlices
= PowTwoAlign(pIn
->numSlices
, pOut
->blockSlices
);
4017 pOut
->epitchIsHeight
= FALSE
;
4018 pOut
->mipChainInTail
= FALSE
;
4019 pOut
->firstMipIdInTail
= pIn
->numMipLevels
;
4021 pOut
->mipChainPitch
= pOut
->pitch
;
4022 pOut
->mipChainHeight
= pOut
->height
;
4023 pOut
->mipChainSlice
= pOut
->numSlices
;
4025 if (pIn
->numMipLevels
> 1)
4027 pOut
->firstMipIdInTail
= GetMipChainInfo(pIn
->resourceType
,
4039 const UINT_32 endingMipId
= Min(pOut
->firstMipIdInTail
, pIn
->numMipLevels
- 1);
4041 if (endingMipId
== 0)
4043 const Dim3d tailMaxDim
= GetMipTailDim(pIn
->resourceType
,
4049 pOut
->epitchIsHeight
= TRUE
;
4050 pOut
->pitch
= tailMaxDim
.w
;
4051 pOut
->height
= tailMaxDim
.h
;
4052 pOut
->numSlices
= IsThick(pIn
->resourceType
, pIn
->swizzleMode
) ?
4053 tailMaxDim
.d
: pIn
->numSlices
;
4054 pOut
->mipChainInTail
= TRUE
;
4058 UINT_32 mip0WidthInBlk
= pOut
->pitch
/ pOut
->blockWidth
;
4059 UINT_32 mip0HeightInBlk
= pOut
->height
/ pOut
->blockHeight
;
4061 AddrMajorMode majorMode
= GetMajorMode(pIn
->resourceType
,
4065 pOut
->numSlices
/ pOut
->blockSlices
);
4066 if (majorMode
== ADDR_MAJOR_Y
)
4068 UINT_32 mip1WidthInBlk
= RoundHalf(mip0WidthInBlk
);
4070 if ((mip1WidthInBlk
== 1) && (endingMipId
> 2))
4075 pOut
->mipChainPitch
+= (mip1WidthInBlk
* pOut
->blockWidth
);
4077 pOut
->epitchIsHeight
= FALSE
;
4081 UINT_32 mip1HeightInBlk
= RoundHalf(mip0HeightInBlk
);
4083 if ((mip1HeightInBlk
== 1) && (endingMipId
> 2))
4088 pOut
->mipChainHeight
+= (mip1HeightInBlk
* pOut
->blockHeight
);
4090 pOut
->epitchIsHeight
= TRUE
;
4094 if (pOut
->pMipInfo
!= NULL
)
4096 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
4098 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
4100 Dim3d mipStartPos
= {0};
4101 UINT_32 mipTailOffsetInBytes
= 0;
4103 mipStartPos
= GetMipStartPos(pIn
->resourceType
,
4113 &mipTailOffsetInBytes
);
4115 UINT_32 pitchInBlock
=
4116 pOut
->mipChainPitch
/ pOut
->blockWidth
;
4117 UINT_32 sliceInBlock
=
4118 (pOut
->mipChainHeight
/ pOut
->blockHeight
) * pitchInBlock
;
4119 UINT_64 blockIndex
=
4120 mipStartPos
.d
* sliceInBlock
+ mipStartPos
.h
* pitchInBlock
+ mipStartPos
.w
;
4121 UINT_64 macroBlockOffset
=
4122 blockIndex
<< GetBlockSizeLog2(pIn
->swizzleMode
);
4124 pOut
->pMipInfo
[i
].macroBlockOffset
= macroBlockOffset
;
4125 pOut
->pMipInfo
[i
].mipTailOffset
= mipTailOffsetInBytes
;
4129 else if (pOut
->pMipInfo
!= NULL
)
4131 pOut
->pMipInfo
[0].pitch
= pOut
->pitch
;
4132 pOut
->pMipInfo
[0].height
= pOut
->height
;
4133 pOut
->pMipInfo
[0].depth
= IsTex3d(pIn
->resourceType
)? pOut
->numSlices
: 1;
4134 pOut
->pMipInfo
[0].offset
= 0;
4137 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->mipChainPitch
) * pOut
->mipChainHeight
*
4138 (pIn
->bpp
>> 3) * pIn
->numFrags
;
4139 pOut
->surfSize
= pOut
->sliceSize
* pOut
->mipChainSlice
;
4140 pOut
->baseAlign
= ComputeSurfaceBaseAlignTiled(pIn
->swizzleMode
);
4144 pOut
->baseAlign
= Max(pOut
->baseAlign
, PrtAlignment
);
4153 ************************************************************************************************************************
4154 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4157 * Internal function to calculate alignment for linear surface
4161 ************************************************************************************************************************
4163 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceInfoLinear(
4164 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
4165 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
4168 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
4170 UINT_32 actualHeight
= 0;
4171 UINT_32 elementBytes
= pIn
->bpp
>> 3;
4172 const UINT_32 alignment
= pIn
->flags
.prt
? PrtAlignment
: 256;
4174 if (IsTex1d(pIn
->resourceType
))
4176 if (pIn
->height
> 1)
4178 returnCode
= ADDR_INVALIDPARAMS
;
4182 const UINT_32 pitchAlignInElement
= alignment
/ elementBytes
;
4184 pitch
= PowTwoAlign(pIn
->width
, pitchAlignInElement
);
4185 actualHeight
= pIn
->numMipLevels
;
4187 if (pIn
->flags
.prt
== FALSE
)
4189 returnCode
= ApplyCustomizedPitchHeight(pIn
, elementBytes
, pitchAlignInElement
,
4190 &pitch
, &actualHeight
);
4193 if (returnCode
== ADDR_OK
)
4195 if (pOut
->pMipInfo
!= NULL
)
4197 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
4199 pOut
->pMipInfo
[i
].offset
= pitch
* elementBytes
* i
;
4200 pOut
->pMipInfo
[i
].pitch
= pitch
;
4201 pOut
->pMipInfo
[i
].height
= 1;
4202 pOut
->pMipInfo
[i
].depth
= 1;
4210 returnCode
= ComputeSurfaceLinearPadding(pIn
, &pitch
, &actualHeight
, pOut
->pMipInfo
);
4213 if ((pitch
== 0) || (actualHeight
== 0))
4215 returnCode
= ADDR_INVALIDPARAMS
;
4218 if (returnCode
== ADDR_OK
)
4220 pOut
->pitch
= pitch
;
4221 pOut
->height
= pIn
->height
;
4222 pOut
->numSlices
= pIn
->numSlices
;
4223 pOut
->mipChainPitch
= pitch
;
4224 pOut
->mipChainHeight
= actualHeight
;
4225 pOut
->mipChainSlice
= pOut
->numSlices
;
4226 pOut
->epitchIsHeight
= (pIn
->numMipLevels
> 1) ? TRUE
: FALSE
;
4227 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->pitch
) * actualHeight
* elementBytes
;
4228 pOut
->surfSize
= pOut
->sliceSize
* pOut
->numSlices
;
4229 pOut
->baseAlign
= (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
) ? (pIn
->bpp
/ 8) : alignment
;
4230 pOut
->blockWidth
= (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
) ? 1 : (256 / elementBytes
);
4231 pOut
->blockHeight
= 1;
4232 pOut
->blockSlices
= 1;
4235 // Post calculation validate
4236 ADDR_ASSERT(pOut
->sliceSize
> 0);
4242 ************************************************************************************************************************
4243 * Gfx9Lib::GetMipChainInfo
4246 * Internal function to get out information about mip chain
4249 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4250 ************************************************************************************************************************
4252 UINT_32
Gfx9Lib::GetMipChainInfo(
4253 AddrResourceType resourceType
,
4254 AddrSwizzleMode swizzleMode
,
4260 UINT_32 blockHeight
,
4262 UINT_32 numMipLevel
,
4263 ADDR2_MIP_INFO
* pMipInfo
) const
4265 const Dim3d tailMaxDim
=
4266 GetMipTailDim(resourceType
, swizzleMode
, blockWidth
, blockHeight
, blockDepth
);
4268 UINT_32 mipPitch
= mip0Width
;
4269 UINT_32 mipHeight
= mip0Height
;
4270 UINT_32 mipDepth
= IsTex3d(resourceType
) ? mip0Depth
: 1;
4272 UINT_32 firstMipIdInTail
= numMipLevel
;
4273 BOOL_32 inTail
= FALSE
;
4274 BOOL_32 finalDim
= FALSE
;
4275 BOOL_32 is3dThick
= IsThick(resourceType
, swizzleMode
);
4276 BOOL_32 is3dThin
= IsTex3d(resourceType
) && (is3dThick
== FALSE
);
4278 for (UINT_32 mipId
= 0; mipId
< numMipLevel
; mipId
++)
4282 if (finalDim
== FALSE
)
4288 mipSize
= mipPitch
* mipHeight
* mipDepth
* (bpp
>> 3);
4292 mipSize
= mipPitch
* mipHeight
* (bpp
>> 3);
4297 UINT_32 index
= Log2(bpp
>> 3);
4301 mipPitch
= Block256_3dZ
[index
].w
;
4302 mipHeight
= Block256_3dZ
[index
].h
;
4303 mipDepth
= Block256_3dZ
[index
].d
;
4307 mipPitch
= Block256_2d
[index
].w
;
4308 mipHeight
= Block256_2d
[index
].h
;
4317 inTail
= IsInMipTail(resourceType
, swizzleMode
, tailMaxDim
,
4318 mipPitch
, mipHeight
, mipDepth
);
4322 firstMipIdInTail
= mipId
;
4323 mipPitch
= tailMaxDim
.w
;
4324 mipHeight
= tailMaxDim
.h
;
4328 mipDepth
= tailMaxDim
.d
;
4333 mipPitch
= PowTwoAlign(mipPitch
, blockWidth
);
4334 mipHeight
= PowTwoAlign(mipHeight
, blockHeight
);
4338 mipDepth
= PowTwoAlign(mipDepth
, blockDepth
);
4343 if (pMipInfo
!= NULL
)
4345 pMipInfo
[mipId
].pitch
= mipPitch
;
4346 pMipInfo
[mipId
].height
= mipHeight
;
4347 pMipInfo
[mipId
].depth
= mipDepth
;
4348 pMipInfo
[mipId
].offset
= offset
;
4351 offset
+= (mipPitch
* mipHeight
* mipDepth
* (bpp
>> 3));
4357 mipDepth
= Max(mipDepth
>> 1, 1u);
4362 mipPitch
= Max(mipPitch
>> 1, 1u);
4363 mipHeight
= Max(mipHeight
>> 1, 1u);
4365 if (is3dThick
|| is3dThin
)
4367 mipDepth
= Max(mipDepth
>> 1, 1u);
4372 return firstMipIdInTail
;
4376 ************************************************************************************************************************
4377 * Gfx9Lib::GetMetaMiptailInfo
4380 * Get mip tail coordinate information.
4384 ************************************************************************************************************************
4386 VOID
Gfx9Lib::GetMetaMiptailInfo(
4387 ADDR2_META_MIP_INFO
* pInfo
, ///< [out] output structure to store per mip coord
4388 Dim3d mipCoord
, ///< [in] mip tail base coord
4389 UINT_32 numMipInTail
, ///< [in] number of mips in tail
4390 Dim3d
* pMetaBlkDim
///< [in] meta block width/height/depth
4393 BOOL_32 isThick
= (pMetaBlkDim
->d
> 1);
4394 UINT_32 mipWidth
= pMetaBlkDim
->w
;
4395 UINT_32 mipHeight
= pMetaBlkDim
->h
>> 1;
4396 UINT_32 mipDepth
= pMetaBlkDim
->d
;
4401 minInc
= (pMetaBlkDim
->h
>= 512) ? 128 : ((pMetaBlkDim
->h
== 256) ? 64 : 32);
4403 else if (pMetaBlkDim
->h
>= 1024)
4407 else if (pMetaBlkDim
->h
== 512)
4416 UINT_32 blk32MipId
= 0xFFFFFFFF;
4418 for (UINT_32 mip
= 0; mip
< numMipInTail
; mip
++)
4420 pInfo
[mip
].inMiptail
= TRUE
;
4421 pInfo
[mip
].startX
= mipCoord
.w
;
4422 pInfo
[mip
].startY
= mipCoord
.h
;
4423 pInfo
[mip
].startZ
= mipCoord
.d
;
4424 pInfo
[mip
].width
= mipWidth
;
4425 pInfo
[mip
].height
= mipHeight
;
4426 pInfo
[mip
].depth
= mipDepth
;
4430 if (blk32MipId
== 0xFFFFFFFF)
4435 mipCoord
.w
= pInfo
[blk32MipId
].startX
;
4436 mipCoord
.h
= pInfo
[blk32MipId
].startY
;
4437 mipCoord
.d
= pInfo
[blk32MipId
].startZ
;
4439 switch (mip
- blk32MipId
)
4442 mipCoord
.w
+= 32; // 16x16
4445 mipCoord
.h
+= 32; // 8x8
4448 mipCoord
.h
+= 32; // 4x4
4452 mipCoord
.h
+= 32; // 2x2
4456 mipCoord
.h
+= 32; // 1x1
4459 // The following are for BC/ASTC formats
4461 mipCoord
.h
+= 48; // 1/2 x 1/2
4464 mipCoord
.h
+= 48; // 1/4 x 1/4
4468 mipCoord
.h
+= 48; // 1/8 x 1/8
4472 mipCoord
.h
+= 48; // 1/16 x 1/16
4476 ADDR_ASSERT_ALWAYS();
4480 mipWidth
= ((mip
- blk32MipId
) == 0) ? 16 : 8;
4481 mipHeight
= mipWidth
;
4485 mipDepth
= mipWidth
;
4490 if (mipWidth
<= minInc
)
4492 // if we're below the minimal increment...
4495 // For 3d, just go in z direction
4496 mipCoord
.d
+= mipDepth
;
4500 // For 2d, first go across, then down
4501 if ((mipWidth
* 2) == minInc
)
4503 // if we're 2 mips below, that's when we go back in x, and down in y
4504 mipCoord
.w
-= minInc
;
4505 mipCoord
.h
+= minInc
;
4509 // otherwise, just go across in x
4510 mipCoord
.w
+= minInc
;
4516 // On even mip, go down, otherwise, go across
4519 mipCoord
.w
+= mipWidth
;
4523 mipCoord
.h
+= mipHeight
;
4526 // Divide the width by 2
4528 // After the first mip in tail, the mip is always a square
4529 mipHeight
= mipWidth
;
4530 // ...or for 3d, a cube
4533 mipDepth
= mipWidth
;
4540 ************************************************************************************************************************
4541 * Gfx9Lib::GetMipStartPos
4544 * Internal function to get out information about mip logical start position
4547 * logical start position in macro block width/heith/depth of one mip level within one slice
4548 ************************************************************************************************************************
4550 Dim3d
Gfx9Lib::GetMipStartPos(
4551 AddrResourceType resourceType
,
4552 AddrSwizzleMode swizzleMode
,
4557 UINT_32 blockHeight
,
4560 UINT_32 log2ElementBytes
,
4561 UINT_32
* pMipTailBytesOffset
) const
4563 Dim3d mipStartPos
= {0};
4564 const Dim3d tailMaxDim
= GetMipTailDim(resourceType
, swizzleMode
, blockWidth
, blockHeight
, blockDepth
);
4566 // Report mip in tail if Mip0 is already in mip tail
4567 BOOL_32 inMipTail
= IsInMipTail(resourceType
, swizzleMode
, tailMaxDim
, width
, height
, depth
);
4568 UINT_32 log2blkSize
= GetBlockSizeLog2(swizzleMode
);
4569 UINT_32 mipIndexInTail
= mipId
;
4571 if (inMipTail
== FALSE
)
4573 // Mip 0 dimension, unit in block
4574 UINT_32 mipWidthInBlk
= width
/ blockWidth
;
4575 UINT_32 mipHeightInBlk
= height
/ blockHeight
;
4576 UINT_32 mipDepthInBlk
= depth
/ blockDepth
;
4577 AddrMajorMode majorMode
= GetMajorMode(resourceType
,
4583 UINT_32 endingMip
= mipId
+ 1;
4585 for (UINT_32 i
= 1; i
<= mipId
; i
++)
4587 if ((i
== 1) || (i
== 3))
4589 if (majorMode
== ADDR_MAJOR_Y
)
4591 mipStartPos
.w
+= mipWidthInBlk
;
4595 mipStartPos
.h
+= mipHeightInBlk
;
4600 if (majorMode
== ADDR_MAJOR_X
)
4602 mipStartPos
.w
+= mipWidthInBlk
;
4604 else if (majorMode
== ADDR_MAJOR_Y
)
4606 mipStartPos
.h
+= mipHeightInBlk
;
4610 mipStartPos
.d
+= mipDepthInBlk
;
4614 BOOL_32 inTail
= FALSE
;
4616 if (IsThick(resourceType
, swizzleMode
))
4618 UINT_32 dim
= log2blkSize
% 3;
4623 (mipWidthInBlk
<= 2) && (mipHeightInBlk
== 1) && (mipDepthInBlk
<= 2);
4628 (mipWidthInBlk
== 1) && (mipHeightInBlk
<= 2) && (mipDepthInBlk
<= 2);
4633 (mipWidthInBlk
<= 2) && (mipHeightInBlk
<= 2) && (mipDepthInBlk
== 1);
4638 if (log2blkSize
& 1)
4640 inTail
= (mipWidthInBlk
<= 2) && (mipHeightInBlk
== 1);
4644 inTail
= (mipWidthInBlk
== 1) && (mipHeightInBlk
<= 2);
4654 mipWidthInBlk
= RoundHalf(mipWidthInBlk
);
4655 mipHeightInBlk
= RoundHalf(mipHeightInBlk
);
4656 mipDepthInBlk
= RoundHalf(mipDepthInBlk
);
4659 if (mipId
>= endingMip
)
4662 mipIndexInTail
= mipId
- endingMip
;
4668 UINT_32 index
= mipIndexInTail
+ MaxMacroBits
- log2blkSize
;
4669 ADDR_ASSERT(index
< sizeof(MipTailOffset256B
) / sizeof(UINT_32
));
4670 *pMipTailBytesOffset
= MipTailOffset256B
[index
] << 8;
4677 ************************************************************************************************************************
4678 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4681 * Internal function to calculate address from coord for tiled swizzle surface
4685 ************************************************************************************************************************
4687 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4688 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
4689 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
4692 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {0};
4693 localIn
.swizzleMode
= pIn
->swizzleMode
;
4694 localIn
.flags
= pIn
->flags
;
4695 localIn
.resourceType
= pIn
->resourceType
;
4696 localIn
.bpp
= pIn
->bpp
;
4697 localIn
.width
= Max(pIn
->unalignedWidth
, 1u);
4698 localIn
.height
= Max(pIn
->unalignedHeight
, 1u);
4699 localIn
.numSlices
= Max(pIn
->numSlices
, 1u);
4700 localIn
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
4701 localIn
.numSamples
= Max(pIn
->numSamples
, 1u);
4702 localIn
.numFrags
= Max(pIn
->numFrags
, 1u);
4703 if (localIn
.numMipLevels
<= 1)
4705 localIn
.pitchInElement
= pIn
->pitchInElement
;
4708 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut
= {0};
4709 ADDR_E_RETURNCODE returnCode
= ComputeSurfaceInfoTiled(&localIn
, &localOut
);
4711 BOOL_32 valid
= (returnCode
== ADDR_OK
) &&
4712 (IsThin(pIn
->resourceType
, pIn
->swizzleMode
) ||
4713 IsThick(pIn
->resourceType
, pIn
->swizzleMode
)) &&
4714 ((pIn
->pipeBankXor
== 0) || (IsXor(pIn
->swizzleMode
)));
4718 UINT_32 log2ElementBytes
= Log2(pIn
->bpp
>> 3);
4719 Dim3d mipStartPos
= {0};
4720 UINT_32 mipTailBytesOffset
= 0;
4722 if (pIn
->numMipLevels
> 1)
4724 // Mip-map chain cannot be MSAA surface
4725 ADDR_ASSERT((pIn
->numSamples
<= 1) && (pIn
->numFrags
<= 1));
4727 mipStartPos
= GetMipStartPos(pIn
->resourceType
,
4732 localOut
.blockWidth
,
4733 localOut
.blockHeight
,
4734 localOut
.blockSlices
,
4737 &mipTailBytesOffset
);
4740 UINT_32 interleaveOffset
= 0;
4741 UINT_32 pipeBits
= 0;
4742 UINT_32 pipeXor
= 0;
4743 UINT_32 bankBits
= 0;
4744 UINT_32 bankXor
= 0;
4746 if (IsThin(pIn
->resourceType
, pIn
->swizzleMode
))
4748 UINT_32 blockOffset
= 0;
4749 UINT_32 log2blkSize
= GetBlockSizeLog2(pIn
->swizzleMode
);
4751 if (IsZOrderSwizzle(pIn
->swizzleMode
))
4753 // Morton generation
4754 if ((log2ElementBytes
== 0) || (log2ElementBytes
== 2))
4756 UINT_32 totalLowBits
= 6 - log2ElementBytes
;
4757 UINT_32 mortBits
= totalLowBits
/ 2;
4758 UINT_32 lowBitsValue
= MortonGen2d(pIn
->y
, pIn
->x
, mortBits
);
4759 // Are 9 bits enough?
4760 UINT_32 highBitsValue
=
4761 MortonGen2d(pIn
->x
>> mortBits
, pIn
->y
>> mortBits
, 9) << totalLowBits
;
4762 blockOffset
= lowBitsValue
| highBitsValue
;
4763 ADDR_ASSERT(blockOffset
== lowBitsValue
+ highBitsValue
);
4767 blockOffset
= MortonGen2d(pIn
->y
, pIn
->x
, 13);
4770 // Fill LSBs with sample bits
4771 if (pIn
->numSamples
> 1)
4773 blockOffset
*= pIn
->numSamples
;
4774 blockOffset
|= pIn
->sample
;
4777 // Shift according to BytesPP
4778 blockOffset
<<= log2ElementBytes
;
4782 // Micro block offset
4783 UINT_32 microBlockOffset
= ComputeSurface2DMicroBlockOffset(pIn
);
4784 blockOffset
= microBlockOffset
;
4786 // Micro block dimension
4787 ADDR_ASSERT(log2ElementBytes
< MaxNumOfBpp
);
4788 Dim2d microBlockDim
= Block256_2d
[log2ElementBytes
];
4789 // Morton generation, does 12 bit enough?
4791 MortonGen2d((pIn
->x
/ microBlockDim
.w
), (pIn
->y
/ microBlockDim
.h
), 12) << 8;
4793 // Sample bits start location
4794 UINT_32 sampleStart
= log2blkSize
- Log2(pIn
->numSamples
);
4795 // Join sample bits information to the highest Macro block bits
4796 if (IsNonPrtXor(pIn
->swizzleMode
))
4798 // Non-prt-Xor : xor highest Macro block bits with sample bits
4799 blockOffset
= blockOffset
^ (pIn
->sample
<< sampleStart
);
4803 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4804 // after this op, the blockOffset only contains log2 Macro block size bits
4805 blockOffset
%= (1 << sampleStart
);
4806 blockOffset
|= (pIn
->sample
<< sampleStart
);
4807 ADDR_ASSERT((blockOffset
>> log2blkSize
) == 0);
4811 if (IsXor(pIn
->swizzleMode
))
4813 // Mask off bits above Macro block bits to keep page synonyms working for prt
4814 if (IsPrt(pIn
->swizzleMode
))
4816 blockOffset
&= ((1 << log2blkSize
) - 1);
4819 // Preserve offset inside pipe interleave
4820 interleaveOffset
= blockOffset
& ((1 << m_pipeInterleaveLog2
) - 1);
4821 blockOffset
>>= m_pipeInterleaveLog2
;
4824 pipeBits
= GetPipeXorBits(log2blkSize
);
4826 pipeXor
= FoldXor2d(blockOffset
, pipeBits
);
4827 blockOffset
>>= pipeBits
;
4830 bankBits
= GetBankXorBits(log2blkSize
);
4832 bankXor
= FoldXor2d(blockOffset
, bankBits
);
4833 blockOffset
>>= bankBits
;
4835 // Put all the part back together
4836 blockOffset
<<= bankBits
;
4837 blockOffset
|= bankXor
;
4838 blockOffset
<<= pipeBits
;
4839 blockOffset
|= pipeXor
;
4840 blockOffset
<<= m_pipeInterleaveLog2
;
4841 blockOffset
|= interleaveOffset
;
4844 ADDR_ASSERT((blockOffset
| mipTailBytesOffset
) == (blockOffset
+ mipTailBytesOffset
));
4845 ADDR_ASSERT((mipTailBytesOffset
== 0u) || (blockOffset
< (1u << log2blkSize
)));
4847 blockOffset
|= mipTailBytesOffset
;
4849 if (IsNonPrtXor(pIn
->swizzleMode
) && (pIn
->numSamples
<= 1))
4851 // Apply slice xor if not MSAA/PRT
4852 blockOffset
^= (ReverseBitVector(pIn
->slice
, pipeBits
) << m_pipeInterleaveLog2
);
4853 blockOffset
^= (ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
) <<
4854 (m_pipeInterleaveLog2
+ pipeBits
));
4857 returnCode
= ApplyCustomerPipeBankXor(pIn
->swizzleMode
, pIn
->pipeBankXor
,
4858 bankBits
, pipeBits
, &blockOffset
);
4860 blockOffset
%= (1 << log2blkSize
);
4862 UINT_32 pitchInMacroBlock
= localOut
.mipChainPitch
/ localOut
.blockWidth
;
4863 UINT_32 paddedHeightInMacroBlock
= localOut
.mipChainHeight
/ localOut
.blockHeight
;
4864 UINT_32 sliceSizeInMacroBlock
= pitchInMacroBlock
* paddedHeightInMacroBlock
;
4865 UINT_64 macroBlockIndex
=
4866 (pIn
->slice
+ mipStartPos
.d
) * sliceSizeInMacroBlock
+
4867 ((pIn
->y
/ localOut
.blockHeight
) + mipStartPos
.h
) * pitchInMacroBlock
+
4868 ((pIn
->x
/ localOut
.blockWidth
) + mipStartPos
.w
);
4870 pOut
->addr
= blockOffset
| (macroBlockIndex
<< log2blkSize
);
4874 UINT_32 log2blkSize
= GetBlockSizeLog2(pIn
->swizzleMode
);
4876 Dim3d microBlockDim
= Block1K_3d
[log2ElementBytes
];
4878 UINT_32 blockOffset
= MortonGen3d((pIn
->x
/ microBlockDim
.w
),
4879 (pIn
->y
/ microBlockDim
.h
),
4880 (pIn
->slice
/ microBlockDim
.d
),
4884 blockOffset
|= ComputeSurface3DMicroBlockOffset(pIn
);
4886 if (IsXor(pIn
->swizzleMode
))
4888 // Mask off bits above Macro block bits to keep page synonyms working for prt
4889 if (IsPrt(pIn
->swizzleMode
))
4891 blockOffset
&= ((1 << log2blkSize
) - 1);
4894 // Preserve offset inside pipe interleave
4895 interleaveOffset
= blockOffset
& ((1 << m_pipeInterleaveLog2
) - 1);
4896 blockOffset
>>= m_pipeInterleaveLog2
;
4899 pipeBits
= GetPipeXorBits(log2blkSize
);
4901 pipeXor
= FoldXor3d(blockOffset
, pipeBits
);
4902 blockOffset
>>= pipeBits
;
4905 bankBits
= GetBankXorBits(log2blkSize
);
4907 bankXor
= FoldXor3d(blockOffset
, bankBits
);
4908 blockOffset
>>= bankBits
;
4910 // Put all the part back together
4911 blockOffset
<<= bankBits
;
4912 blockOffset
|= bankXor
;
4913 blockOffset
<<= pipeBits
;
4914 blockOffset
|= pipeXor
;
4915 blockOffset
<<= m_pipeInterleaveLog2
;
4916 blockOffset
|= interleaveOffset
;
4919 ADDR_ASSERT((blockOffset
| mipTailBytesOffset
) == (blockOffset
+ mipTailBytesOffset
));
4920 ADDR_ASSERT((mipTailBytesOffset
== 0u) || (blockOffset
< (1u << log2blkSize
)));
4921 blockOffset
|= mipTailBytesOffset
;
4923 returnCode
= ApplyCustomerPipeBankXor(pIn
->swizzleMode
, pIn
->pipeBankXor
,
4924 bankBits
, pipeBits
, &blockOffset
);
4926 blockOffset
%= (1 << log2blkSize
);
4928 UINT_32 xb
= pIn
->x
/ localOut
.blockWidth
+ mipStartPos
.w
;
4929 UINT_32 yb
= pIn
->y
/ localOut
.blockHeight
+ mipStartPos
.h
;
4930 UINT_32 zb
= pIn
->slice
/ localOut
.blockSlices
+ + mipStartPos
.d
;
4932 UINT_32 pitchInBlock
= localOut
.mipChainPitch
/ localOut
.blockWidth
;
4933 UINT_32 sliceSizeInBlock
=
4934 (localOut
.mipChainHeight
/ localOut
.blockHeight
) * pitchInBlock
;
4935 UINT_64 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
4937 pOut
->addr
= blockOffset
| (blockIndex
<< log2blkSize
);
4942 returnCode
= ADDR_INVALIDPARAMS
;
4949 ************************************************************************************************************************
4950 * Gfx9Lib::ComputeSurfaceInfoLinear
4953 * Internal function to calculate padding for linear swizzle 2D/3D surface
4957 ************************************************************************************************************************
4959 ADDR_E_RETURNCODE
Gfx9Lib::ComputeSurfaceLinearPadding(
4960 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input srtucture
4961 UINT_32
* pMipmap0PaddedWidth
, ///< [out] padded width in element
4962 UINT_32
* pSlice0PaddedHeight
, ///< [out] padded height for HW
4963 ADDR2_MIP_INFO
* pMipInfo
///< [out] per mip information
4966 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
4968 UINT_32 elementBytes
= pIn
->bpp
>> 3;
4969 UINT_32 pitchAlignInElement
= 0;
4971 if (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
)
4973 ADDR_ASSERT(pIn
->numMipLevels
<= 1);
4974 ADDR_ASSERT(pIn
->numSlices
<= 1);
4975 pitchAlignInElement
= 1;
4979 pitchAlignInElement
= (256 / elementBytes
);
4982 UINT_32 mipChainWidth
= PowTwoAlign(pIn
->width
, pitchAlignInElement
);
4983 UINT_32 slice0PaddedHeight
= pIn
->height
;
4985 returnCode
= ApplyCustomizedPitchHeight(pIn
, elementBytes
, pitchAlignInElement
,
4986 &mipChainWidth
, &slice0PaddedHeight
);
4988 if (returnCode
== ADDR_OK
)
4990 UINT_32 mipChainHeight
= 0;
4991 UINT_32 mipHeight
= pIn
->height
;
4993 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
4995 if (pMipInfo
!= NULL
)
4997 pMipInfo
[i
].offset
= mipChainWidth
* mipChainHeight
* elementBytes
;
4998 pMipInfo
[i
].pitch
= mipChainWidth
;
4999 pMipInfo
[i
].height
= mipHeight
;
5000 pMipInfo
[i
].depth
= 1;
5003 mipChainHeight
+= mipHeight
;
5004 mipHeight
= RoundHalf(mipHeight
);
5005 mipHeight
= Max(mipHeight
, 1u);
5008 *pMipmap0PaddedWidth
= mipChainWidth
;
5009 *pSlice0PaddedHeight
= (pIn
->numMipLevels
> 1) ? mipChainHeight
: slice0PaddedHeight
;