2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
34 #include "gfx9addrlib.h"
36 #include "gfx9_gb_reg.h"
38 #include "amdgpu_asic_addr.h"
40 #include "util/macros.h"
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
49 ************************************************************************************************************************
53 * Creates an Gfx9Lib object.
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
59 Addr::Lib
* Gfx9HwlInit(const Client
* pClient
)
61 return V2::Gfx9Lib::CreateObj(pClient
);
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
71 const SwizzleModeFlags
Gfx9Lib::SwizzleModeTable
[ADDR_SW_MAX_TYPE
] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_R
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_R
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_R
88 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
91 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0}, // ADDR_SW_64KB_R_T
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_4KB_R_x
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_64KB_R_X
108 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
109 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
110 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
111 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
115 const UINT_32
Gfx9Lib::MipTailOffset256B
[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
117 const Dim3d
Gfx9Lib::Block256_3dS
[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
119 const Dim3d
Gfx9Lib::Block256_3dZ
[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
122 ************************************************************************************************************************
128 ************************************************************************************************************************
130 Gfx9Lib::Gfx9Lib(const Client
* pClient
)
134 m_class
= AI_ADDRLIB
;
135 memset(&m_settings
, 0, sizeof(m_settings
));
136 memcpy(m_swizzleModeTable
, SwizzleModeTable
, sizeof(SwizzleModeTable
));
137 memset(m_cachedMetaEqKey
, 0, sizeof(m_cachedMetaEqKey
));
138 m_metaEqOverrideIndex
= 0;
142 ************************************************************************************************************************
147 ************************************************************************************************************************
154 ************************************************************************************************************************
155 * Gfx9Lib::HwlComputeHtileInfo
158 * Interface function stub of AddrComputeHtilenfo
162 ************************************************************************************************************************
164 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileInfo(
165 const ADDR2_COMPUTE_HTILE_INFO_INPUT
* pIn
, ///< [in] input structure
166 ADDR2_COMPUTE_HTILE_INFO_OUTPUT
* pOut
///< [out] output structure
169 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
172 UINT_32 numRbTotal
= pIn
->hTileFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
174 UINT_32 numCompressBlkPerMetaBlk
, numCompressBlkPerMetaBlkLog2
;
176 if ((numPipeTotal
== 1) && (numRbTotal
== 1))
178 numCompressBlkPerMetaBlkLog2
= 10;
182 if (m_settings
.applyAliasFix
)
184 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ Max(10u, m_pipeInterleaveLog2
);
188 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ 10;
192 numCompressBlkPerMetaBlk
= 1 << numCompressBlkPerMetaBlkLog2
;
194 Dim3d metaBlkDim
= {8, 8, 1};
195 UINT_32 totalAmpBits
= numCompressBlkPerMetaBlkLog2
;
196 UINT_32 widthAmp
= (pIn
->numMipLevels
> 1) ? (totalAmpBits
>> 1) : RoundHalf(totalAmpBits
);
197 UINT_32 heightAmp
= totalAmpBits
- widthAmp
;
198 metaBlkDim
.w
<<= widthAmp
;
199 metaBlkDim
.h
<<= heightAmp
;
202 Dim3d metaBlkDimDbg
= {8, 8, 1};
203 for (UINT_32 index
= 0; index
< numCompressBlkPerMetaBlkLog2
; index
++)
205 if ((metaBlkDimDbg
.h
< metaBlkDimDbg
.w
) ||
206 ((pIn
->numMipLevels
> 1) && (metaBlkDimDbg
.h
== metaBlkDimDbg
.w
)))
208 metaBlkDimDbg
.h
<<= 1;
212 metaBlkDimDbg
.w
<<= 1;
215 ADDR_ASSERT((metaBlkDimDbg
.w
== metaBlkDim
.w
) && (metaBlkDimDbg
.h
== metaBlkDim
.h
));
222 GetMetaMipInfo(pIn
->numMipLevels
, &metaBlkDim
, FALSE
, pOut
->pMipInfo
,
223 pIn
->unalignedWidth
, pIn
->unalignedHeight
, pIn
->numSlices
,
224 &numMetaBlkX
, &numMetaBlkY
, &numMetaBlkZ
);
226 const UINT_32 metaBlkSize
= numCompressBlkPerMetaBlk
<< 2;
227 UINT_32 align
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
229 if ((IsXor(pIn
->swizzleMode
) == FALSE
) && (numPipeTotal
> 2))
231 align
*= (numPipeTotal
>> 1);
234 align
= Max(align
, metaBlkSize
);
236 if (m_settings
.metaBaseAlignFix
)
238 align
= Max(align
, GetBlockSize(pIn
->swizzleMode
));
241 if (m_settings
.htileAlignFix
)
243 const INT_32 metaBlkSizeLog2
= numCompressBlkPerMetaBlkLog2
+ 2;
244 const INT_32 htileCachelineSizeLog2
= 11;
245 const INT_32 maxNumOfRbMaskBits
= 1 + Log2(numPipeTotal
) + Log2(numRbTotal
);
247 INT_32 rbMaskPadding
= Max(0, htileCachelineSizeLog2
- (metaBlkSizeLog2
- maxNumOfRbMaskBits
));
249 align
<<= rbMaskPadding
;
252 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
253 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
254 pOut
->sliceSize
= numMetaBlkX
* numMetaBlkY
* metaBlkSize
;
256 pOut
->metaBlkWidth
= metaBlkDim
.w
;
257 pOut
->metaBlkHeight
= metaBlkDim
.h
;
258 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
260 pOut
->baseAlign
= align
;
261 pOut
->htileBytes
= PowTwoAlign(pOut
->sliceSize
* numMetaBlkZ
, align
);
267 ************************************************************************************************************************
268 * Gfx9Lib::HwlComputeCmaskInfo
271 * Interface function stub of AddrComputeCmaskInfo
275 ************************************************************************************************************************
277 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeCmaskInfo(
278 const ADDR2_COMPUTE_CMASK_INFO_INPUT
* pIn
, ///< [in] input structure
279 ADDR2_COMPUTE_CMASK_INFO_OUTPUT
* pOut
///< [out] output structure
282 ADDR_ASSERT(pIn
->resourceType
== ADDR_RSRC_TEX_2D
);
284 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pIn
->cMaskFlags
.pipeAligned
,
287 UINT_32 numRbTotal
= pIn
->cMaskFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
289 UINT_32 numCompressBlkPerMetaBlkLog2
, numCompressBlkPerMetaBlk
;
291 if ((numPipeTotal
== 1) && (numRbTotal
== 1))
293 numCompressBlkPerMetaBlkLog2
= 13;
297 if (m_settings
.applyAliasFix
)
299 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ Max(10u, m_pipeInterleaveLog2
);
303 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ 10;
306 numCompressBlkPerMetaBlkLog2
= Max(numCompressBlkPerMetaBlkLog2
, 13u);
309 numCompressBlkPerMetaBlk
= 1 << numCompressBlkPerMetaBlkLog2
;
311 Dim2d metaBlkDim
= {8, 8};
312 UINT_32 totalAmpBits
= numCompressBlkPerMetaBlkLog2
;
313 UINT_32 heightAmp
= totalAmpBits
>> 1;
314 UINT_32 widthAmp
= totalAmpBits
- heightAmp
;
315 metaBlkDim
.w
<<= widthAmp
;
316 metaBlkDim
.h
<<= heightAmp
;
319 Dim2d metaBlkDimDbg
= {8, 8};
320 for (UINT_32 index
= 0; index
< numCompressBlkPerMetaBlkLog2
; index
++)
322 if (metaBlkDimDbg
.h
< metaBlkDimDbg
.w
)
324 metaBlkDimDbg
.h
<<= 1;
328 metaBlkDimDbg
.w
<<= 1;
331 ADDR_ASSERT((metaBlkDimDbg
.w
== metaBlkDim
.w
) && (metaBlkDimDbg
.h
== metaBlkDim
.h
));
334 UINT_32 numMetaBlkX
= (pIn
->unalignedWidth
+ metaBlkDim
.w
- 1) / metaBlkDim
.w
;
335 UINT_32 numMetaBlkY
= (pIn
->unalignedHeight
+ metaBlkDim
.h
- 1) / metaBlkDim
.h
;
336 UINT_32 numMetaBlkZ
= Max(pIn
->numSlices
, 1u);
338 UINT_32 sizeAlign
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
340 if (m_settings
.metaBaseAlignFix
)
342 sizeAlign
= Max(sizeAlign
, GetBlockSize(pIn
->swizzleMode
));
345 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
346 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
347 pOut
->sliceSize
= (numMetaBlkX
* numMetaBlkY
* numCompressBlkPerMetaBlk
) >> 1;
348 pOut
->cmaskBytes
= PowTwoAlign(pOut
->sliceSize
* numMetaBlkZ
, sizeAlign
);
349 pOut
->baseAlign
= Max(numCompressBlkPerMetaBlk
>> 1, sizeAlign
);
351 pOut
->metaBlkWidth
= metaBlkDim
.w
;
352 pOut
->metaBlkHeight
= metaBlkDim
.h
;
354 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
360 ************************************************************************************************************************
361 * Gfx9Lib::GetMetaMipInfo
368 ************************************************************************************************************************
370 VOID
Gfx9Lib::GetMetaMipInfo(
371 UINT_32 numMipLevels
, ///< [in] number of mip levels
372 Dim3d
* pMetaBlkDim
, ///< [in] meta block dimension
373 BOOL_32 dataThick
, ///< [in] data surface is thick
374 ADDR2_META_MIP_INFO
* pInfo
, ///< [out] meta mip info
375 UINT_32 mip0Width
, ///< [in] mip0 width
376 UINT_32 mip0Height
, ///< [in] mip0 height
377 UINT_32 mip0Depth
, ///< [in] mip0 depth
378 UINT_32
* pNumMetaBlkX
, ///< [out] number of metablock X in mipchain
379 UINT_32
* pNumMetaBlkY
, ///< [out] number of metablock Y in mipchain
380 UINT_32
* pNumMetaBlkZ
) ///< [out] number of metablock Z in mipchain
383 UINT_32 numMetaBlkX
= (mip0Width
+ pMetaBlkDim
->w
- 1) / pMetaBlkDim
->w
;
384 UINT_32 numMetaBlkY
= (mip0Height
+ pMetaBlkDim
->h
- 1) / pMetaBlkDim
->h
;
385 UINT_32 numMetaBlkZ
= (mip0Depth
+ pMetaBlkDim
->d
- 1) / pMetaBlkDim
->d
;
386 UINT_32 tailWidth
= pMetaBlkDim
->w
;
387 UINT_32 tailHeight
= pMetaBlkDim
->h
>> 1;
388 UINT_32 tailDepth
= pMetaBlkDim
->d
;
389 BOOL_32 inTail
= FALSE
;
390 AddrMajorMode major
= ADDR_MAJOR_MAX_TYPE
;
392 if (numMipLevels
> 1)
394 if (dataThick
&& (numMetaBlkZ
> numMetaBlkX
) && (numMetaBlkZ
> numMetaBlkY
))
397 major
= ADDR_MAJOR_Z
;
399 else if (numMetaBlkX
>= numMetaBlkY
)
402 major
= ADDR_MAJOR_X
;
407 major
= ADDR_MAJOR_Y
;
410 inTail
= ((mip0Width
<= tailWidth
) &&
411 (mip0Height
<= tailHeight
) &&
412 ((dataThick
== FALSE
) || (mip0Depth
<= tailDepth
)));
420 if (major
== ADDR_MAJOR_Z
)
423 pMipDim
= &numMetaBlkY
;
424 pOrderDim
= &numMetaBlkZ
;
427 else if (major
== ADDR_MAJOR_X
)
430 pMipDim
= &numMetaBlkY
;
431 pOrderDim
= &numMetaBlkX
;
437 pMipDim
= &numMetaBlkX
;
438 pOrderDim
= &numMetaBlkY
;
442 if ((*pMipDim
< 3) && (*pOrderDim
> orderLimit
) && (numMipLevels
> 3))
448 *pMipDim
+= ((*pMipDim
/ 2) + (*pMipDim
& 1));
455 UINT_32 mipWidth
= mip0Width
;
456 UINT_32 mipHeight
= mip0Height
;
457 UINT_32 mipDepth
= mip0Depth
;
458 Dim3d mipCoord
= {0};
460 for (UINT_32 mip
= 0; mip
< numMipLevels
; mip
++)
464 GetMetaMiptailInfo(&pInfo
[mip
], mipCoord
, numMipLevels
- mip
,
470 mipWidth
= PowTwoAlign(mipWidth
, pMetaBlkDim
->w
);
471 mipHeight
= PowTwoAlign(mipHeight
, pMetaBlkDim
->h
);
472 mipDepth
= PowTwoAlign(mipDepth
, pMetaBlkDim
->d
);
474 pInfo
[mip
].inMiptail
= FALSE
;
475 pInfo
[mip
].startX
= mipCoord
.w
;
476 pInfo
[mip
].startY
= mipCoord
.h
;
477 pInfo
[mip
].startZ
= mipCoord
.d
;
478 pInfo
[mip
].width
= mipWidth
;
479 pInfo
[mip
].height
= mipHeight
;
480 pInfo
[mip
].depth
= dataThick
? mipDepth
: 1;
482 if ((mip
>= 3) || (mip
& 1))
487 mipCoord
.w
+= mipWidth
;
490 mipCoord
.h
+= mipHeight
;
493 mipCoord
.d
+= mipDepth
;
504 mipCoord
.h
+= mipHeight
;
507 mipCoord
.w
+= mipWidth
;
510 mipCoord
.h
+= mipHeight
;
517 mipWidth
= Max(mipWidth
>> 1, 1u);
518 mipHeight
= Max(mipHeight
>> 1, 1u);
519 mipDepth
= Max(mipDepth
>> 1, 1u);
521 inTail
= ((mipWidth
<= tailWidth
) &&
522 (mipHeight
<= tailHeight
) &&
523 ((dataThick
== FALSE
) || (mipDepth
<= tailDepth
)));
528 *pNumMetaBlkX
= numMetaBlkX
;
529 *pNumMetaBlkY
= numMetaBlkY
;
530 *pNumMetaBlkZ
= numMetaBlkZ
;
534 ************************************************************************************************************************
535 * Gfx9Lib::HwlComputeDccInfo
538 * Interface function to compute DCC key info
542 ************************************************************************************************************************
544 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeDccInfo(
545 const ADDR2_COMPUTE_DCCINFO_INPUT
* pIn
, ///< [in] input structure
546 ADDR2_COMPUTE_DCCINFO_OUTPUT
* pOut
///< [out] output structure
549 BOOL_32 dataLinear
= IsLinear(pIn
->swizzleMode
);
550 BOOL_32 metaLinear
= pIn
->dccKeyFlags
.linear
;
551 BOOL_32 pipeAligned
= pIn
->dccKeyFlags
.pipeAligned
;
557 else if (metaLinear
== TRUE
)
562 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pipeAligned
, pIn
->swizzleMode
);
566 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
567 ADDR_ASSERT_ALWAYS();
569 pOut
->dccRamBaseAlign
= numPipeTotal
* m_pipeInterleaveBytes
;
570 pOut
->dccRamSize
= PowTwoAlign((pIn
->dataSurfaceSize
/ 256), pOut
->dccRamBaseAlign
);
574 BOOL_32 dataThick
= IsThick(pIn
->resourceType
, pIn
->swizzleMode
);
576 UINT_32 minMetaBlkSize
= dataThick
? 65536 : 4096;
578 UINT_32 numFrags
= Max(pIn
->numFrags
, 1u);
579 UINT_32 numSlices
= Max(pIn
->numSlices
, 1u);
581 minMetaBlkSize
/= numFrags
;
583 UINT_32 numCompressBlkPerMetaBlk
= minMetaBlkSize
;
585 UINT_32 numRbTotal
= pIn
->dccKeyFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
587 if ((numPipeTotal
> 1) || (numRbTotal
> 1))
589 const UINT_32 thinBlkSize
= 1 << (m_settings
.applyAliasFix
? Max(10u, m_pipeInterleaveLog2
) : 10);
591 numCompressBlkPerMetaBlk
=
592 Max(numCompressBlkPerMetaBlk
, m_se
* m_rbPerSe
* (dataThick
? 262144 : thinBlkSize
));
594 if (numCompressBlkPerMetaBlk
> 65536 * pIn
->bpp
)
596 numCompressBlkPerMetaBlk
= 65536 * pIn
->bpp
;
600 Dim3d compressBlkDim
= GetDccCompressBlk(pIn
->resourceType
, pIn
->swizzleMode
, pIn
->bpp
);
601 Dim3d metaBlkDim
= compressBlkDim
;
603 for (UINT_32 index
= 1; index
< numCompressBlkPerMetaBlk
; index
<<= 1)
605 if ((metaBlkDim
.h
< metaBlkDim
.w
) ||
606 ((pIn
->numMipLevels
> 1) && (metaBlkDim
.h
== metaBlkDim
.w
)))
608 if ((dataThick
== FALSE
) || (metaBlkDim
.h
<= metaBlkDim
.d
))
619 if ((dataThick
== FALSE
) || (metaBlkDim
.w
<= metaBlkDim
.d
))
634 GetMetaMipInfo(pIn
->numMipLevels
, &metaBlkDim
, dataThick
, pOut
->pMipInfo
,
635 pIn
->unalignedWidth
, pIn
->unalignedHeight
, numSlices
,
636 &numMetaBlkX
, &numMetaBlkY
, &numMetaBlkZ
);
638 UINT_32 sizeAlign
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
640 if (numFrags
> m_maxCompFrag
)
642 sizeAlign
*= (numFrags
/ m_maxCompFrag
);
645 if (m_settings
.metaBaseAlignFix
)
647 sizeAlign
= Max(sizeAlign
, GetBlockSize(pIn
->swizzleMode
));
650 pOut
->dccRamSize
= numMetaBlkX
* numMetaBlkY
* numMetaBlkZ
*
651 numCompressBlkPerMetaBlk
* numFrags
;
652 pOut
->dccRamSize
= PowTwoAlign(pOut
->dccRamSize
, sizeAlign
);
653 pOut
->dccRamBaseAlign
= Max(numCompressBlkPerMetaBlk
, sizeAlign
);
655 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
656 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
657 pOut
->depth
= numMetaBlkZ
* metaBlkDim
.d
;
659 pOut
->compressBlkWidth
= compressBlkDim
.w
;
660 pOut
->compressBlkHeight
= compressBlkDim
.h
;
661 pOut
->compressBlkDepth
= compressBlkDim
.d
;
663 pOut
->metaBlkWidth
= metaBlkDim
.w
;
664 pOut
->metaBlkHeight
= metaBlkDim
.h
;
665 pOut
->metaBlkDepth
= metaBlkDim
.d
;
667 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
668 pOut
->fastClearSizePerSlice
=
669 pOut
->metaBlkNumPerSlice
* numCompressBlkPerMetaBlk
* Min(numFrags
, m_maxCompFrag
);
676 ************************************************************************************************************************
677 * Gfx9Lib::HwlComputeMaxBaseAlignments
680 * Gets maximum alignments
683 ************************************************************************************************************************
685 UINT_32
Gfx9Lib::HwlComputeMaxBaseAlignments() const
691 ************************************************************************************************************************
692 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
695 * Gets maximum alignments for metadata
697 * maximum alignments for metadata
698 ************************************************************************************************************************
700 UINT_32
Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
702 // Max base alignment for Htile
703 const UINT_32 maxNumPipeTotal
= GetPipeNumForMetaAddressing(TRUE
, ADDR_SW_64KB_Z
);
704 const UINT_32 maxNumRbTotal
= m_se
* m_rbPerSe
;
706 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
707 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
708 ADDR_ASSERT((m_settings
.applyAliasFix
== FALSE
) || (m_pipeInterleaveLog2
<= 10u));
709 const UINT_32 maxNumCompressBlkPerMetaBlk
= 1u << (m_seLog2
+ m_rbPerSeLog2
+ 10u);
711 UINT_32 maxBaseAlignHtile
= maxNumPipeTotal
* maxNumRbTotal
* m_pipeInterleaveBytes
;
713 if (maxNumPipeTotal
> 2)
715 maxBaseAlignHtile
*= (maxNumPipeTotal
>> 1);
718 maxBaseAlignHtile
= Max(maxNumCompressBlkPerMetaBlk
<< 2, maxBaseAlignHtile
);
720 if (m_settings
.metaBaseAlignFix
)
722 maxBaseAlignHtile
= Max(maxBaseAlignHtile
, Size64K
);
725 if (m_settings
.htileAlignFix
)
727 maxBaseAlignHtile
*= maxNumPipeTotal
;
730 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
732 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
733 UINT_32 maxBaseAlignDcc3D
= 65536;
735 if ((maxNumPipeTotal
> 1) || (maxNumRbTotal
> 1))
737 maxBaseAlignDcc3D
= Min(m_se
* m_rbPerSe
* 262144, 65536 * 128u);
740 // Max base alignment for Msaa Dcc
741 UINT_32 maxBaseAlignDccMsaa
= maxNumPipeTotal
* maxNumRbTotal
* m_pipeInterleaveBytes
* (8 / m_maxCompFrag
);
743 if (m_settings
.metaBaseAlignFix
)
745 maxBaseAlignDccMsaa
= Max(maxBaseAlignDccMsaa
, Size64K
);
748 return Max(maxBaseAlignHtile
, Max(maxBaseAlignDccMsaa
, maxBaseAlignDcc3D
));
752 ************************************************************************************************************************
753 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
756 * Interface function stub of AddrComputeCmaskAddrFromCoord
760 ************************************************************************************************************************
762 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeCmaskAddrFromCoord(
763 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
764 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
766 ADDR2_COMPUTE_CMASK_INFO_INPUT input
= {0};
767 input
.size
= sizeof(input
);
768 input
.cMaskFlags
= pIn
->cMaskFlags
;
769 input
.colorFlags
= pIn
->colorFlags
;
770 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
771 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
772 input
.numSlices
= Max(pIn
->numSlices
, 1u);
773 input
.swizzleMode
= pIn
->swizzleMode
;
774 input
.resourceType
= pIn
->resourceType
;
776 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output
= {0};
777 output
.size
= sizeof(output
);
779 ADDR_E_RETURNCODE returnCode
= ComputeCmaskInfo(&input
, &output
);
781 if (returnCode
== ADDR_OK
)
783 UINT_32 fmaskBpp
= GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
);
784 UINT_32 fmaskElementBytesLog2
= Log2(fmaskBpp
>> 3);
785 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
786 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
788 MetaEqParams metaEqParams
= {0, fmaskElementBytesLog2
, 0, pIn
->cMaskFlags
,
789 Gfx9DataFmask
, pIn
->swizzleMode
, pIn
->resourceType
,
790 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0};
792 const CoordEq
* pMetaEq
= GetMetaEquation(metaEqParams
);
794 UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
795 UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
796 UINT_32 zb
= pIn
->slice
;
798 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
799 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
800 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
802 UINT_32 coords
[] = { pIn
->x
, pIn
->y
, pIn
->slice
, 0, blockIndex
};
803 UINT_64 address
= pMetaEq
->solve(coords
);
805 pOut
->addr
= address
>> 1;
806 pOut
->bitPosition
= static_cast<UINT_32
>((address
& 1) << 2);
808 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->cMaskFlags
.pipeAligned
,
811 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
813 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
820 ************************************************************************************************************************
821 * Gfx9Lib::HwlComputeHtileAddrFromCoord
824 * Interface function stub of AddrComputeHtileAddrFromCoord
828 ************************************************************************************************************************
830 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileAddrFromCoord(
831 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
832 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
834 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
836 if (pIn
->numMipLevels
> 1)
838 returnCode
= ADDR_NOTIMPLEMENTED
;
842 ADDR2_COMPUTE_HTILE_INFO_INPUT input
= {0};
843 input
.size
= sizeof(input
);
844 input
.hTileFlags
= pIn
->hTileFlags
;
845 input
.depthFlags
= pIn
->depthflags
;
846 input
.swizzleMode
= pIn
->swizzleMode
;
847 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
848 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
849 input
.numSlices
= Max(pIn
->numSlices
, 1u);
850 input
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
852 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output
= {0};
853 output
.size
= sizeof(output
);
855 returnCode
= ComputeHtileInfo(&input
, &output
);
857 if (returnCode
== ADDR_OK
)
859 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
860 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
861 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
862 UINT_32 numSamplesLog2
= Log2(pIn
->numSamples
);
864 MetaEqParams metaEqParams
= {0, elementBytesLog2
, numSamplesLog2
, pIn
->hTileFlags
,
865 Gfx9DataDepthStencil
, pIn
->swizzleMode
, ADDR_RSRC_TEX_2D
,
866 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0};
868 const CoordEq
* pMetaEq
= GetMetaEquation(metaEqParams
);
870 UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
871 UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
872 UINT_32 zb
= pIn
->slice
;
874 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
875 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
876 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
878 UINT_32 coords
[] = { pIn
->x
, pIn
->y
, pIn
->slice
, 0, blockIndex
};
879 UINT_64 address
= pMetaEq
->solve(coords
);
881 pOut
->addr
= address
>> 1;
883 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
886 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
888 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
896 ************************************************************************************************************************
897 * Gfx9Lib::HwlComputeHtileCoordFromAddr
900 * Interface function stub of AddrComputeHtileCoordFromAddr
904 ************************************************************************************************************************
906 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileCoordFromAddr(
907 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT
* pIn
, ///< [in] input structure
908 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
* pOut
) ///< [out] output structure
910 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
912 if (pIn
->numMipLevels
> 1)
914 returnCode
= ADDR_NOTIMPLEMENTED
;
918 ADDR2_COMPUTE_HTILE_INFO_INPUT input
= {0};
919 input
.size
= sizeof(input
);
920 input
.hTileFlags
= pIn
->hTileFlags
;
921 input
.swizzleMode
= pIn
->swizzleMode
;
922 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
923 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
924 input
.numSlices
= Max(pIn
->numSlices
, 1u);
925 input
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
927 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output
= {0};
928 output
.size
= sizeof(output
);
930 returnCode
= ComputeHtileInfo(&input
, &output
);
932 if (returnCode
== ADDR_OK
)
934 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
935 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
936 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
937 UINT_32 numSamplesLog2
= Log2(pIn
->numSamples
);
939 MetaEqParams metaEqParams
= {0, elementBytesLog2
, numSamplesLog2
, pIn
->hTileFlags
,
940 Gfx9DataDepthStencil
, pIn
->swizzleMode
, ADDR_RSRC_TEX_2D
,
941 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0};
943 const CoordEq
* pMetaEq
= GetMetaEquation(metaEqParams
);
945 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
948 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
950 UINT_64 nibbleAddress
= (pIn
->addr
^ (pipeXor
<< m_pipeInterleaveLog2
)) << 1;
952 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
953 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
955 UINT_32 coords
[NUM_DIMS
];
956 pMetaEq
->solveAddr(nibbleAddress
, sliceSizeInBlock
, coords
);
958 pOut
->slice
= coords
[DIM_M
] / sliceSizeInBlock
;
959 pOut
->y
= ((coords
[DIM_M
] % sliceSizeInBlock
) / pitchInBlock
) * output
.metaBlkHeight
+ coords
[DIM_Y
];
960 pOut
->x
= (coords
[DIM_M
] % pitchInBlock
) * output
.metaBlkWidth
+ coords
[DIM_X
];
968 ************************************************************************************************************************
969 * Gfx9Lib::HwlComputeDccAddrFromCoord
972 * Interface function stub of AddrComputeDccAddrFromCoord
976 ************************************************************************************************************************
978 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeDccAddrFromCoord(
979 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
* pIn
,
980 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT
* pOut
)
982 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
984 if ((pIn
->numMipLevels
> 1) || (pIn
->mipId
> 1) || pIn
->dccKeyFlags
.linear
)
986 returnCode
= ADDR_NOTIMPLEMENTED
;
990 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
991 UINT_32 numSamplesLog2
= Log2(pIn
->numFrags
);
992 UINT_32 metaBlkWidthLog2
= Log2(pIn
->metaBlkWidth
);
993 UINT_32 metaBlkHeightLog2
= Log2(pIn
->metaBlkHeight
);
994 UINT_32 metaBlkDepthLog2
= Log2(pIn
->metaBlkDepth
);
995 UINT_32 compBlkWidthLog2
= Log2(pIn
->compressBlkWidth
);
996 UINT_32 compBlkHeightLog2
= Log2(pIn
->compressBlkHeight
);
997 UINT_32 compBlkDepthLog2
= Log2(pIn
->compressBlkDepth
);
999 MetaEqParams metaEqParams
= {pIn
->mipId
, elementBytesLog2
, numSamplesLog2
, pIn
->dccKeyFlags
,
1000 Gfx9DataColor
, pIn
->swizzleMode
, pIn
->resourceType
,
1001 metaBlkWidthLog2
, metaBlkHeightLog2
, metaBlkDepthLog2
,
1002 compBlkWidthLog2
, compBlkHeightLog2
, compBlkDepthLog2
};
1004 const CoordEq
* pMetaEq
= GetMetaEquation(metaEqParams
);
1006 UINT_32 xb
= pIn
->x
/ pIn
->metaBlkWidth
;
1007 UINT_32 yb
= pIn
->y
/ pIn
->metaBlkHeight
;
1008 UINT_32 zb
= pIn
->slice
/ pIn
->metaBlkDepth
;
1010 UINT_32 pitchInBlock
= pIn
->pitch
/ pIn
->metaBlkWidth
;
1011 UINT_32 sliceSizeInBlock
= (pIn
->height
/ pIn
->metaBlkHeight
) * pitchInBlock
;
1012 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
1014 UINT_32 coords
[] = { pIn
->x
, pIn
->y
, pIn
->slice
, pIn
->sample
, blockIndex
};
1015 UINT_64 address
= pMetaEq
->solve(coords
);
1017 pOut
->addr
= address
>> 1;
1019 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->dccKeyFlags
.pipeAligned
,
1022 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
1024 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
1031 ************************************************************************************************************************
1032 * Gfx9Lib::HwlInitGlobalParams
1035 * Initializes global parameters
1038 * TRUE if all settings are valid
1040 ************************************************************************************************************************
1042 BOOL_32
Gfx9Lib::HwlInitGlobalParams(
1043 const ADDR_CREATE_INPUT
* pCreateIn
) ///< [in] create input
1045 BOOL_32 valid
= TRUE
;
1047 if (m_settings
.isArcticIsland
)
1049 GB_ADDR_CONFIG_gfx9 gbAddrConfig
;
1051 gbAddrConfig
.u32All
= pCreateIn
->regValue
.gbAddrConfig
;
1053 // These values are copied from CModel code
1054 switch (gbAddrConfig
.bits
.NUM_PIPES
)
1056 case ADDR_CONFIG_1_PIPE
:
1060 case ADDR_CONFIG_2_PIPE
:
1064 case ADDR_CONFIG_4_PIPE
:
1068 case ADDR_CONFIG_8_PIPE
:
1072 case ADDR_CONFIG_16_PIPE
:
1076 case ADDR_CONFIG_32_PIPE
:
1081 ADDR_ASSERT_ALWAYS();
1085 switch (gbAddrConfig
.bits
.PIPE_INTERLEAVE_SIZE
)
1087 case ADDR_CONFIG_PIPE_INTERLEAVE_256B
:
1088 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_256B
;
1089 m_pipeInterleaveLog2
= 8;
1091 case ADDR_CONFIG_PIPE_INTERLEAVE_512B
:
1092 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_512B
;
1093 m_pipeInterleaveLog2
= 9;
1095 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB
:
1096 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_1KB
;
1097 m_pipeInterleaveLog2
= 10;
1099 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB
:
1100 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_2KB
;
1101 m_pipeInterleaveLog2
= 11;
1104 ADDR_ASSERT_ALWAYS();
1108 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1109 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1110 ADDR_ASSERT(m_pipeInterleaveBytes
== ADDR_PIPEINTERLEAVE_256B
);
1112 switch (gbAddrConfig
.bits
.NUM_BANKS
)
1114 case ADDR_CONFIG_1_BANK
:
1118 case ADDR_CONFIG_2_BANK
:
1122 case ADDR_CONFIG_4_BANK
:
1126 case ADDR_CONFIG_8_BANK
:
1130 case ADDR_CONFIG_16_BANK
:
1135 ADDR_ASSERT_ALWAYS();
1139 switch (gbAddrConfig
.bits
.NUM_SHADER_ENGINES
)
1141 case ADDR_CONFIG_1_SHADER_ENGINE
:
1145 case ADDR_CONFIG_2_SHADER_ENGINE
:
1149 case ADDR_CONFIG_4_SHADER_ENGINE
:
1153 case ADDR_CONFIG_8_SHADER_ENGINE
:
1158 ADDR_ASSERT_ALWAYS();
1162 switch (gbAddrConfig
.bits
.NUM_RB_PER_SE
)
1164 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE
:
1168 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE
:
1172 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE
:
1177 ADDR_ASSERT_ALWAYS();
1181 switch (gbAddrConfig
.bits
.MAX_COMPRESSED_FRAGS
)
1183 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS
:
1185 m_maxCompFragLog2
= 0;
1187 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS
:
1189 m_maxCompFragLog2
= 1;
1191 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS
:
1193 m_maxCompFragLog2
= 2;
1195 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS
:
1197 m_maxCompFragLog2
= 3;
1200 ADDR_ASSERT_ALWAYS();
1204 if ((m_rbPerSeLog2
== 1) &&
1205 (((m_pipesLog2
== 1) && ((m_seLog2
== 2) || (m_seLog2
== 3))) ||
1206 ((m_pipesLog2
== 2) && ((m_seLog2
== 1) || (m_seLog2
== 2)))))
1208 ADDR_ASSERT(m_settings
.isVega10
== FALSE
);
1209 ADDR_ASSERT(m_settings
.isRaven
== FALSE
);
1211 ADDR_ASSERT(m_settings
.isVega20
== FALSE
);
1213 if (m_settings
.isVega12
)
1215 m_settings
.htileCacheRbConflict
= 1;
1219 // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1220 m_blockVarSizeLog2
= 0;
1225 ADDR_NOT_IMPLEMENTED();
1230 InitEquationTable();
1237 ************************************************************************************************************************
1238 * Gfx9Lib::HwlConvertChipFamily
1241 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1244 ************************************************************************************************************************
1246 ChipFamily
Gfx9Lib::HwlConvertChipFamily(
1247 UINT_32 uChipFamily
, ///< [in] chip family defined in atiih.h
1248 UINT_32 uChipRevision
) ///< [in] chip revision defined in "asic_family"_id.h
1250 ChipFamily family
= ADDR_CHIP_FAMILY_AI
;
1252 switch (uChipFamily
)
1255 m_settings
.isArcticIsland
= 1;
1256 m_settings
.isVega10
= ASICREV_IS_VEGA10_P(uChipRevision
);
1257 m_settings
.isVega12
= ASICREV_IS_VEGA12_P(uChipRevision
);
1258 m_settings
.isVega20
= ASICREV_IS_VEGA20_P(uChipRevision
);
1259 m_settings
.isDce12
= 1;
1261 if (m_settings
.isVega10
== 0)
1263 m_settings
.htileAlignFix
= 1;
1264 m_settings
.applyAliasFix
= 1;
1267 m_settings
.metaBaseAlignFix
= 1;
1269 m_settings
.depthPipeXorDisable
= 1;
1272 m_settings
.isArcticIsland
= 1;
1274 if (ASICREV_IS_RAVEN(uChipRevision
))
1276 m_settings
.isRaven
= 1;
1278 m_settings
.depthPipeXorDisable
= 1;
1281 if (ASICREV_IS_RAVEN2(uChipRevision
))
1283 m_settings
.isRaven
= 1;
1286 if (m_settings
.isRaven
== 0)
1288 m_settings
.htileAlignFix
= 1;
1289 m_settings
.applyAliasFix
= 1;
1292 if (ASICREV_IS_RENOIR(uChipRevision
))
1294 m_settings
.isRaven
= 1;
1297 m_settings
.isDcn1
= m_settings
.isRaven
;
1299 m_settings
.metaBaseAlignFix
= 1;
1303 ADDR_ASSERT(!"This should be a Fusion");
1311 ************************************************************************************************************************
1312 * Gfx9Lib::InitRbEquation
1318 ************************************************************************************************************************
1320 VOID
Gfx9Lib::GetRbEquation(
1321 CoordEq
* pRbEq
, ///< [out] rb equation
1322 UINT_32 numRbPerSeLog2
, ///< [in] number of rb per shader engine
1323 UINT_32 numSeLog2
) ///< [in] number of shader engine
1326 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1327 UINT_32 rbRegion
= (numRbPerSeLog2
== 0) ? 5 : 4;
1328 Coordinate
cx(DIM_X
, rbRegion
);
1329 Coordinate
cy(DIM_Y
, rbRegion
);
1332 UINT_32 numRbTotalLog2
= numRbPerSeLog2
+ numSeLog2
;
1334 // Clear the rb equation
1336 pRbEq
->resize(numRbTotalLog2
);
1338 if ((numSeLog2
> 0) && (numRbPerSeLog2
== 1))
1340 // Special case when more than 1 SE, and 2 RB per SE
1341 (*pRbEq
)[0].add(cx
);
1342 (*pRbEq
)[0].add(cy
);
1346 if (m_settings
.applyAliasFix
== false)
1348 (*pRbEq
)[0].add(cy
);
1351 (*pRbEq
)[0].add(cy
);
1355 UINT_32 numBits
= 2 * (numRbTotalLog2
- start
);
1357 for (UINT_32 i
= 0; i
< numBits
; i
++)
1360 start
+ (((start
+ i
) >= numRbTotalLog2
) ? (2 * (numRbTotalLog2
- start
) - i
- 1) : i
);
1364 (*pRbEq
)[idx
].add(cx
);
1369 (*pRbEq
)[idx
].add(cy
);
1376 ************************************************************************************************************************
1377 * Gfx9Lib::GetDataEquation
1380 * Get data equation for fmask and Z
1383 ************************************************************************************************************************
1385 VOID
Gfx9Lib::GetDataEquation(
1386 CoordEq
* pDataEq
, ///< [out] data surface equation
1387 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1388 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1389 AddrResourceType resourceType
, ///< [in] data surface resource type
1390 UINT_32 elementBytesLog2
, ///< [in] data surface element bytes
1391 UINT_32 numSamplesLog2
) ///< [in] data surface sample count
1394 Coordinate
cx(DIM_X
, 0);
1395 Coordinate
cy(DIM_Y
, 0);
1396 Coordinate
cz(DIM_Z
, 0);
1397 Coordinate
cs(DIM_S
, 0);
1399 // Clear the equation
1401 pDataEq
->resize(27);
1403 if (dataSurfaceType
== Gfx9DataColor
)
1405 if (IsLinear(swizzleMode
))
1407 Coordinate
cm(DIM_M
, 0);
1409 pDataEq
->resize(49);
1411 for (UINT_32 i
= 0; i
< 49; i
++)
1413 (*pDataEq
)[i
].add(cm
);
1417 else if (IsThick(resourceType
, swizzleMode
))
1419 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1421 if (IsStandardSwizzle(resourceType
, swizzleMode
))
1423 // Standard 3d swizzle
1424 // Fill in bottom x bits
1425 for (i
= elementBytesLog2
; i
< 4; i
++)
1427 (*pDataEq
)[i
].add(cx
);
1430 // Fill in 2 bits of y and then z
1431 for (i
= 4; i
< 6; i
++)
1433 (*pDataEq
)[i
].add(cy
);
1436 for (i
= 6; i
< 8; i
++)
1438 (*pDataEq
)[i
].add(cz
);
1441 if (elementBytesLog2
< 2)
1443 // fill in z & y bit
1444 (*pDataEq
)[8].add(cz
);
1445 (*pDataEq
)[9].add(cy
);
1449 else if (elementBytesLog2
== 2)
1451 // fill in y and x bit
1452 (*pDataEq
)[8].add(cy
);
1453 (*pDataEq
)[9].add(cx
);
1460 (*pDataEq
)[8].add(cx
);
1462 (*pDataEq
)[9].add(cx
);
1469 UINT_32 m2dEnd
= (elementBytesLog2
==0) ? 3 : ((elementBytesLog2
< 4) ? 4 : 5);
1470 UINT_32 numZs
= (elementBytesLog2
== 0 || elementBytesLog2
== 4) ?
1471 2 : ((elementBytesLog2
== 1) ? 3 : 1);
1472 pDataEq
->mort2d(cx
, cy
, elementBytesLog2
, m2dEnd
);
1473 for (i
= m2dEnd
+ 1; i
<= m2dEnd
+ numZs
; i
++)
1475 (*pDataEq
)[i
].add(cz
);
1478 if ((elementBytesLog2
== 0) || (elementBytesLog2
== 3))
1481 (*pDataEq
)[6].add(cx
);
1482 (*pDataEq
)[7].add(cz
);
1486 else if (elementBytesLog2
== 2)
1489 (*pDataEq
)[6].add(cy
);
1490 (*pDataEq
)[7].add(cz
);
1495 (*pDataEq
)[8].add(cy
);
1496 (*pDataEq
)[9].add(cx
);
1500 // Fill in bit 10 and up
1501 pDataEq
->mort3d( cz
, cy
, cx
, 10 );
1503 else if (IsThin(resourceType
, swizzleMode
))
1505 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swizzleMode
);
1507 UINT_32 microYBits
= (8 - elementBytesLog2
) / 2;
1508 UINT_32 tileSplitStart
= blockSizeLog2
- numSamplesLog2
;
1510 // Fill in bottom x bits
1511 for (i
= elementBytesLog2
; i
< 4; i
++)
1513 (*pDataEq
)[i
].add(cx
);
1516 // Fill in bottom y bits
1517 for (i
= 4; i
< 4 + microYBits
; i
++)
1519 (*pDataEq
)[i
].add(cy
);
1522 // Fill in last of the micro_x bits
1523 for (i
= 4 + microYBits
; i
< 8; i
++)
1525 (*pDataEq
)[i
].add(cx
);
1528 // Fill in x/y bits below sample split
1529 pDataEq
->mort2d(cy
, cx
, 8, tileSplitStart
- 1);
1530 // Fill in sample bits
1531 for (i
= 0; i
< numSamplesLog2
; i
++)
1534 (*pDataEq
)[tileSplitStart
+ i
].add(cs
);
1536 // Fill in x/y bits above sample split
1537 if ((numSamplesLog2
& 1) ^ (blockSizeLog2
& 1))
1539 pDataEq
->mort2d(cx
, cy
, blockSizeLog2
);
1543 pDataEq
->mort2d(cy
, cx
, blockSizeLog2
);
1548 ADDR_ASSERT_ALWAYS();
1554 UINT_32 sampleStart
= elementBytesLog2
;
1555 UINT_32 pixelStart
= elementBytesLog2
+ numSamplesLog2
;
1556 UINT_32 ymajStart
= 6 + numSamplesLog2
;
1558 for (UINT_32 s
= 0; s
< numSamplesLog2
; s
++)
1561 (*pDataEq
)[sampleStart
+ s
].add(cs
);
1564 // Put in the x-major order pixel bits
1565 pDataEq
->mort2d(cx
, cy
, pixelStart
, ymajStart
- 1);
1566 // Put in the y-major order pixel bits
1567 pDataEq
->mort2d(cy
, cx
, ymajStart
);
1572 ************************************************************************************************************************
1573 * Gfx9Lib::GetPipeEquation
1579 ************************************************************************************************************************
1581 VOID
Gfx9Lib::GetPipeEquation(
1582 CoordEq
* pPipeEq
, ///< [out] pipe equation
1583 CoordEq
* pDataEq
, ///< [in] data equation
1584 UINT_32 pipeInterleaveLog2
, ///< [in] pipe interleave
1585 UINT_32 numPipeLog2
, ///< [in] number of pipes
1586 UINT_32 numSamplesLog2
, ///< [in] data surface sample count
1587 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1588 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1589 AddrResourceType resourceType
///< [in] data surface resource type
1592 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swizzleMode
);
1595 pDataEq
->copy(dataEq
);
1597 if (dataSurfaceType
== Gfx9DataColor
)
1599 INT_32 shift
= static_cast<INT_32
>(numSamplesLog2
);
1600 dataEq
.shift(-shift
, blockSizeLog2
- numSamplesLog2
);
1603 dataEq
.copy(*pPipeEq
, pipeInterleaveLog2
, numPipeLog2
);
1605 // This section should only apply to z/stencil, maybe fmask
1606 // If the pipe bit is below the comp block size,
1607 // then keep moving up the address until we find a bit that is above
1608 UINT_32 pipeStart
= 0;
1610 if (dataSurfaceType
!= Gfx9DataColor
)
1612 Coordinate
tileMin(DIM_X
, 3);
1614 while (dataEq
[pipeInterleaveLog2
+ pipeStart
][0] < tileMin
)
1619 // if pipe is 0, then the first pipe bit is above the comp block size,
1620 // so we don't need to do anything
1621 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1622 // we will get the same pipe equation
1625 for (UINT_32 i
= 0; i
< numPipeLog2
; i
++)
1627 // Copy the jth bit above pipe interleave to the current pipe equation bit
1628 dataEq
[pipeInterleaveLog2
+ pipeStart
+ i
].copyto((*pPipeEq
)[i
]);
1633 if (IsPrt(swizzleMode
))
1635 // Clear out bits above the block size if prt's are enabled
1636 dataEq
.resize(blockSizeLog2
);
1640 if (IsXor(swizzleMode
))
1644 if (IsThick(resourceType
, swizzleMode
))
1648 dataEq
.copy(xorMask2
, pipeInterleaveLog2
+ numPipeLog2
, 2 * numPipeLog2
);
1650 xorMask
.resize(numPipeLog2
);
1652 for (UINT_32 pipeIdx
= 0; pipeIdx
< numPipeLog2
; pipeIdx
++)
1654 xorMask
[pipeIdx
].add(xorMask2
[2 * pipeIdx
]);
1655 xorMask
[pipeIdx
].add(xorMask2
[2 * pipeIdx
+ 1]);
1660 // Xor in the bits above the pipe+gpu bits
1661 dataEq
.copy(xorMask
, pipeInterleaveLog2
+ pipeStart
+ numPipeLog2
, numPipeLog2
);
1663 if ((numSamplesLog2
== 0) && (IsPrt(swizzleMode
) == FALSE
))
1667 // if 1xaa and not prt, then xor in the z bits
1669 xorMask2
.resize(numPipeLog2
);
1670 for (UINT_32 pipeIdx
= 0; pipeIdx
< numPipeLog2
; pipeIdx
++)
1672 co
.set(DIM_Z
, numPipeLog2
- 1 - pipeIdx
);
1673 xorMask2
[pipeIdx
].add(co
);
1676 pPipeEq
->xorin(xorMask2
);
1681 pPipeEq
->xorin(xorMask
);
1685 ************************************************************************************************************************
1686 * Gfx9Lib::GetMetaEquation
1689 * Get meta equation for cmask/htile/DCC
1691 * Pointer to a calculated meta equation
1692 ************************************************************************************************************************
1694 const CoordEq
* Gfx9Lib::GetMetaEquation(
1695 const MetaEqParams
& metaEqParams
)
1697 UINT_32 cachedMetaEqIndex
;
1699 for (cachedMetaEqIndex
= 0; cachedMetaEqIndex
< MaxCachedMetaEq
; cachedMetaEqIndex
++)
1701 if (memcmp(&metaEqParams
,
1702 &m_cachedMetaEqKey
[cachedMetaEqIndex
],
1703 static_cast<UINT_32
>(sizeof(metaEqParams
))) == 0)
1709 CoordEq
* pMetaEq
= NULL
;
1711 if (cachedMetaEqIndex
< MaxCachedMetaEq
)
1713 pMetaEq
= &m_cachedMetaEq
[cachedMetaEqIndex
];
1717 m_cachedMetaEqKey
[m_metaEqOverrideIndex
] = metaEqParams
;
1719 pMetaEq
= &m_cachedMetaEq
[m_metaEqOverrideIndex
++];
1721 m_metaEqOverrideIndex
%= MaxCachedMetaEq
;
1723 GenMetaEquation(pMetaEq
,
1724 metaEqParams
.maxMip
,
1725 metaEqParams
.elementBytesLog2
,
1726 metaEqParams
.numSamplesLog2
,
1727 metaEqParams
.metaFlag
,
1728 metaEqParams
.dataSurfaceType
,
1729 metaEqParams
.swizzleMode
,
1730 metaEqParams
.resourceType
,
1731 metaEqParams
.metaBlkWidthLog2
,
1732 metaEqParams
.metaBlkHeightLog2
,
1733 metaEqParams
.metaBlkDepthLog2
,
1734 metaEqParams
.compBlkWidthLog2
,
1735 metaEqParams
.compBlkHeightLog2
,
1736 metaEqParams
.compBlkDepthLog2
);
1743 ************************************************************************************************************************
1744 * Gfx9Lib::GenMetaEquation
1747 * Get meta equation for cmask/htile/DCC
1750 ************************************************************************************************************************
1752 VOID
Gfx9Lib::GenMetaEquation(
1753 CoordEq
* pMetaEq
, ///< [out] meta equation
1754 UINT_32 maxMip
, ///< [in] max mip Id
1755 UINT_32 elementBytesLog2
, ///< [in] data surface element bytes
1756 UINT_32 numSamplesLog2
, ///< [in] data surface sample count
1757 ADDR2_META_FLAGS metaFlag
, ///< [in] meta falg
1758 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1759 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1760 AddrResourceType resourceType
, ///< [in] data surface resource type
1761 UINT_32 metaBlkWidthLog2
, ///< [in] meta block width
1762 UINT_32 metaBlkHeightLog2
, ///< [in] meta block height
1763 UINT_32 metaBlkDepthLog2
, ///< [in] meta block depth
1764 UINT_32 compBlkWidthLog2
, ///< [in] compress block width
1765 UINT_32 compBlkHeightLog2
, ///< [in] compress block height
1766 UINT_32 compBlkDepthLog2
) ///< [in] compress block depth
1769 UINT_32 numPipeTotalLog2
= GetPipeLog2ForMetaAddressing(metaFlag
.pipeAligned
, swizzleMode
);
1770 UINT_32 pipeInterleaveLog2
= m_pipeInterleaveLog2
;
1772 // Get the correct data address and rb equation
1774 GetDataEquation(&dataEq
, dataSurfaceType
, swizzleMode
, resourceType
,
1775 elementBytesLog2
, numSamplesLog2
);
1777 // Get pipe and rb equations
1778 CoordEq pipeEquation
;
1779 GetPipeEquation(&pipeEquation
, &dataEq
, pipeInterleaveLog2
, numPipeTotalLog2
,
1780 numSamplesLog2
, dataSurfaceType
, swizzleMode
, resourceType
);
1781 numPipeTotalLog2
= pipeEquation
.getsize();
1783 if (metaFlag
.linear
)
1785 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1786 ADDR_ASSERT_ALWAYS();
1788 ADDR_ASSERT(dataSurfaceType
== Gfx9DataColor
);
1790 dataEq
.copy(*pMetaEq
);
1792 if (IsLinear(swizzleMode
))
1794 if (metaFlag
.pipeAligned
)
1796 // Remove the pipe bits
1797 INT_32 shift
= static_cast<INT_32
>(numPipeTotalLog2
);
1798 pMetaEq
->shift(-shift
, pipeInterleaveLog2
);
1800 // Divide by comp block size, which for linear (which is always color) is 256 B
1803 if (metaFlag
.pipeAligned
)
1805 // Put pipe bits back in
1806 pMetaEq
->shift(numPipeTotalLog2
, pipeInterleaveLog2
);
1808 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1810 pipeEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+ i
]);
1819 UINT_32 maxCompFragLog2
= static_cast<INT_32
>(m_maxCompFragLog2
);
1820 UINT_32 compFragLog2
=
1821 ((dataSurfaceType
== Gfx9DataColor
) && (numSamplesLog2
> maxCompFragLog2
)) ?
1822 maxCompFragLog2
: numSamplesLog2
;
1824 UINT_32 uncompFragLog2
= numSamplesLog2
- compFragLog2
;
1826 // Make sure the metaaddr is cleared
1828 pMetaEq
->resize(27);
1830 if (IsThick(resourceType
, swizzleMode
))
1832 Coordinate
cx(DIM_X
, 0);
1833 Coordinate
cy(DIM_Y
, 0);
1834 Coordinate
cz(DIM_Z
, 0);
1838 pMetaEq
->mort3d(cy
, cx
, cz
);
1842 pMetaEq
->mort3d(cx
, cy
, cz
);
1847 Coordinate
cx(DIM_X
, 0);
1848 Coordinate
cy(DIM_Y
, 0);
1853 pMetaEq
->mort2d(cy
, cx
, compFragLog2
);
1857 pMetaEq
->mort2d(cx
, cy
, compFragLog2
);
1860 //------------------------------------------------------------------------------------------------------------------------
1861 // Put the compressible fragments at the lsb
1862 // the uncompressible frags will be at the msb of the micro address
1863 //------------------------------------------------------------------------------------------------------------------------
1864 for (UINT_32 s
= 0; s
< compFragLog2
; s
++)
1867 (*pMetaEq
)[s
].add(cs
);
1871 // Keep a copy of the pipe equations
1872 CoordEq origPipeEquation
;
1873 pipeEquation
.copy(origPipeEquation
);
1876 // filter out everything under the compressed block size
1877 co
.set(DIM_X
, compBlkWidthLog2
);
1878 pMetaEq
->Filter('<', co
, 0, DIM_X
);
1879 co
.set(DIM_Y
, compBlkHeightLog2
);
1880 pMetaEq
->Filter('<', co
, 0, DIM_Y
);
1881 co
.set(DIM_Z
, compBlkDepthLog2
);
1882 pMetaEq
->Filter('<', co
, 0, DIM_Z
);
1884 // For non-color, filter out sample bits
1885 if (dataSurfaceType
!= Gfx9DataColor
)
1888 pMetaEq
->Filter('<', co
, 0, DIM_S
);
1891 // filter out everything above the metablock size
1892 co
.set(DIM_X
, metaBlkWidthLog2
- 1);
1893 pMetaEq
->Filter('>', co
, 0, DIM_X
);
1894 co
.set(DIM_Y
, metaBlkHeightLog2
- 1);
1895 pMetaEq
->Filter('>', co
, 0, DIM_Y
);
1896 co
.set(DIM_Z
, metaBlkDepthLog2
- 1);
1897 pMetaEq
->Filter('>', co
, 0, DIM_Z
);
1899 // filter out everything above the metablock size for the channel bits
1900 co
.set(DIM_X
, metaBlkWidthLog2
- 1);
1901 pipeEquation
.Filter('>', co
, 0, DIM_X
);
1902 co
.set(DIM_Y
, metaBlkHeightLog2
- 1);
1903 pipeEquation
.Filter('>', co
, 0, DIM_Y
);
1904 co
.set(DIM_Z
, metaBlkDepthLog2
- 1);
1905 pipeEquation
.Filter('>', co
, 0, DIM_Z
);
1907 // Make sure we still have the same number of channel bits
1908 if (pipeEquation
.getsize() != numPipeTotalLog2
)
1910 ADDR_ASSERT_ALWAYS();
1913 // Loop through all channel and rb bits,
1914 // and make sure these components exist in the metadata address
1915 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1917 for (UINT_32 j
= pipeEquation
[i
].getsize(); j
> 0; j
--)
1919 if (pMetaEq
->Exists(pipeEquation
[i
][j
- 1]) == FALSE
)
1921 ADDR_ASSERT_ALWAYS();
1926 const UINT_32 numSeLog2
= metaFlag
.rbAligned
? m_seLog2
: 0;
1927 const UINT_32 numRbPeSeLog2
= metaFlag
.rbAligned
? m_rbPerSeLog2
: 0;
1928 const UINT_32 numRbTotalLog2
= numRbPeSeLog2
+ numSeLog2
;
1929 CoordEq origRbEquation
;
1931 GetRbEquation(&origRbEquation
, numRbPeSeLog2
, numSeLog2
);
1933 CoordEq rbEquation
= origRbEquation
;
1935 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
1937 for (UINT_32 j
= rbEquation
[i
].getsize(); j
> 0; j
--)
1939 if (pMetaEq
->Exists(rbEquation
[i
][j
- 1]) == FALSE
)
1941 ADDR_ASSERT_ALWAYS();
1946 if (m_settings
.applyAliasFix
)
1951 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1952 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
1954 for (UINT_32 j
= 0; j
< numPipeTotalLog2
; j
++)
1956 BOOL_32 isRbEquationInPipeEquation
= FALSE
;
1958 if (m_settings
.applyAliasFix
)
1960 CoordTerm filteredPipeEq
;
1961 filteredPipeEq
= pipeEquation
[j
];
1963 filteredPipeEq
.Filter('>', co
, 0, DIM_Z
);
1965 isRbEquationInPipeEquation
= (rbEquation
[i
] == filteredPipeEq
);
1969 isRbEquationInPipeEquation
= (rbEquation
[i
] == pipeEquation
[j
]);
1972 if (isRbEquationInPipeEquation
)
1974 rbEquation
[i
].Clear();
1979 bool rbAppendedWithPipeBits
[1 << (MaxSeLog2
+ MaxRbPerSeLog2
)] = {};
1981 // Loop through each bit of the channel, get the smallest coordinate,
1982 // and remove it from the metaaddr, and rb_equation
1983 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1985 pipeEquation
[i
].getsmallest(co
);
1987 UINT_32 old_size
= pMetaEq
->getsize();
1988 pMetaEq
->Filter('=', co
);
1989 UINT_32 new_size
= pMetaEq
->getsize();
1990 if (new_size
!= old_size
-1)
1992 ADDR_ASSERT_ALWAYS();
1994 pipeEquation
.remove(co
);
1995 for (UINT_32 j
= 0; j
< numRbTotalLog2
; j
++)
1997 if (rbEquation
[j
].remove(co
))
1999 // if we actually removed something from this bit, then add the remaining
2000 // channel bits, as these can be removed for this bit
2001 for (UINT_32 k
= 0; k
< pipeEquation
[i
].getsize(); k
++)
2003 if (pipeEquation
[i
][k
] != co
)
2005 rbEquation
[j
].add(pipeEquation
[i
][k
]);
2006 rbAppendedWithPipeBits
[j
] = true;
2013 // Loop through the rb bits and see what remain;
2014 // filter out the smallest coordinate if it remains
2015 UINT_32 rbBitsLeft
= 0;
2016 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
2018 BOOL_32 isRbEqAppended
= FALSE
;
2020 if (m_settings
.applyAliasFix
)
2022 isRbEqAppended
= (rbEquation
[i
].getsize() > (rbAppendedWithPipeBits
[i
] ? 1 : 0));
2026 isRbEqAppended
= (rbEquation
[i
].getsize() > 0);
2032 rbEquation
[i
].getsmallest(co
);
2033 UINT_32 old_size
= pMetaEq
->getsize();
2034 pMetaEq
->Filter('=', co
);
2035 UINT_32 new_size
= pMetaEq
->getsize();
2036 if (new_size
!= old_size
- 1)
2040 for (UINT_32 j
= i
+ 1; j
< numRbTotalLog2
; j
++)
2042 if (rbEquation
[j
].remove(co
))
2044 // if we actually removed something from this bit, then add the remaining
2045 // rb bits, as these can be removed for this bit
2046 for (UINT_32 k
= 0; k
< rbEquation
[i
].getsize(); k
++)
2048 if (rbEquation
[i
][k
] != co
)
2050 rbEquation
[j
].add(rbEquation
[i
][k
]);
2051 rbAppendedWithPipeBits
[j
] |= rbAppendedWithPipeBits
[i
];
2059 // capture the size of the metaaddr
2060 UINT_32 metaSize
= pMetaEq
->getsize();
2061 // resize to 49 bits...make this a nibble address
2062 pMetaEq
->resize(49);
2063 // Concatenate the macro address above the current address
2064 for (UINT_32 i
= metaSize
, j
= 0; i
< 49; i
++, j
++)
2067 (*pMetaEq
)[i
].add(co
);
2070 // Multiply by meta element size (in nibbles)
2071 if (dataSurfaceType
== Gfx9DataColor
)
2075 else if (dataSurfaceType
== Gfx9DataDepthStencil
)
2080 //------------------------------------------------------------------------------------------
2081 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2082 // Shift up from pipe interleave number of channel
2083 // and rb bits left, and uncompressed fragments
2084 //------------------------------------------------------------------------------------------
2086 pMetaEq
->shift(numPipeTotalLog2
+ rbBitsLeft
+ uncompFragLog2
, pipeInterleaveLog2
+ 1);
2088 // Put in the channel bits
2089 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
2091 origPipeEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+1 + i
]);
2094 // Put in remaining rb bits
2095 for (UINT_32 i
= 0, j
= 0; j
< rbBitsLeft
; i
= (i
+ 1) % numRbTotalLog2
)
2097 BOOL_32 isRbEqAppended
= FALSE
;
2099 if (m_settings
.applyAliasFix
)
2101 isRbEqAppended
= (rbEquation
[i
].getsize() > (rbAppendedWithPipeBits
[i
] ? 1 : 0));
2105 isRbEqAppended
= (rbEquation
[i
].getsize() > 0);
2110 origRbEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+ 1 + numPipeTotalLog2
+ j
]);
2111 // Mark any rb bit we add in to the rb mask
2116 //------------------------------------------------------------------------------------------
2117 // Put in the uncompressed fragment bits
2118 //------------------------------------------------------------------------------------------
2119 for (UINT_32 i
= 0; i
< uncompFragLog2
; i
++)
2121 co
.set(DIM_S
, compFragLog2
+ i
);
2122 (*pMetaEq
)[pipeInterleaveLog2
+ 1 + numPipeTotalLog2
+ rbBitsLeft
+ i
].add(co
);
2128 ************************************************************************************************************************
2129 * Gfx9Lib::IsEquationSupported
2132 * Check if equation is supported for given swizzle mode and resource type.
2136 ************************************************************************************************************************
2138 BOOL_32
Gfx9Lib::IsEquationSupported(
2139 AddrResourceType rsrcType
,
2140 AddrSwizzleMode swMode
,
2141 UINT_32 elementBytesLog2
) const
2143 BOOL_32 supported
= (elementBytesLog2
< MaxElementBytesLog2
) &&
2144 (IsValidSwMode(swMode
) == TRUE
) &&
2145 (IsLinear(swMode
) == FALSE
) &&
2146 (((IsTex2d(rsrcType
) == TRUE
) &&
2147 ((elementBytesLog2
< 4) ||
2148 ((IsRotateSwizzle(swMode
) == FALSE
) &&
2149 (IsZOrderSwizzle(swMode
) == FALSE
)))) ||
2150 ((IsTex3d(rsrcType
) == TRUE
) &&
2151 (IsRotateSwizzle(swMode
) == FALSE
) &&
2152 (IsBlock256b(swMode
) == FALSE
)));
2158 ************************************************************************************************************************
2159 * Gfx9Lib::InitEquationTable
2162 * Initialize Equation table.
2166 ************************************************************************************************************************
2168 VOID
Gfx9Lib::InitEquationTable()
2170 memset(m_equationTable
, 0, sizeof(m_equationTable
));
2172 // Loop all possible resource type (2D/3D)
2173 for (UINT_32 rsrcTypeIdx
= 0; rsrcTypeIdx
< MaxRsrcType
; rsrcTypeIdx
++)
2175 AddrResourceType rsrcType
= static_cast<AddrResourceType
>(rsrcTypeIdx
+ ADDR_RSRC_TEX_2D
);
2177 // Loop all possible swizzle mode
2178 for (UINT_32 swModeIdx
= 0; swModeIdx
< MaxSwModeType
; swModeIdx
++)
2180 AddrSwizzleMode swMode
= static_cast<AddrSwizzleMode
>(swModeIdx
);
2182 // Loop all possible bpp
2183 for (UINT_32 bppIdx
= 0; bppIdx
< MaxElementBytesLog2
; bppIdx
++)
2185 UINT_32 equationIndex
= ADDR_INVALID_EQUATION_INDEX
;
2187 // Check if the input is supported
2188 if (IsEquationSupported(rsrcType
, swMode
, bppIdx
))
2190 ADDR_EQUATION equation
;
2191 ADDR_E_RETURNCODE retCode
;
2193 memset(&equation
, 0, sizeof(ADDR_EQUATION
));
2195 // Generate the equation
2196 if (IsBlock256b(swMode
) && IsTex2d(rsrcType
))
2198 retCode
= ComputeBlock256Equation(rsrcType
, swMode
, bppIdx
, &equation
);
2200 else if (IsThin(rsrcType
, swMode
))
2202 retCode
= ComputeThinEquation(rsrcType
, swMode
, bppIdx
, &equation
);
2206 retCode
= ComputeThickEquation(rsrcType
, swMode
, bppIdx
, &equation
);
2209 // Only fill the equation into the table if the return code is ADDR_OK,
2210 // otherwise if the return code is not ADDR_OK, it indicates this is not
2211 // a valid input, we do nothing but just fill invalid equation index
2212 // into the lookup table.
2213 if (retCode
== ADDR_OK
)
2215 equationIndex
= m_numEquations
;
2216 ADDR_ASSERT(equationIndex
< EquationTableSize
);
2218 m_equationTable
[equationIndex
] = equation
;
2224 ADDR_ASSERT_ALWAYS();
2228 // Fill the index into the lookup table, if the combination is not supported
2229 // fill the invalid equation index
2230 m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][bppIdx
] = equationIndex
;
2237 ************************************************************************************************************************
2238 * Gfx9Lib::HwlGetEquationIndex
2241 * Interface function stub of GetEquationIndex
2245 ************************************************************************************************************************
2247 UINT_32
Gfx9Lib::HwlGetEquationIndex(
2248 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
,
2249 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
2252 AddrResourceType rsrcType
= pIn
->resourceType
;
2253 AddrSwizzleMode swMode
= pIn
->swizzleMode
;
2254 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
2255 UINT_32 index
= ADDR_INVALID_EQUATION_INDEX
;
2257 if (IsEquationSupported(rsrcType
, swMode
, elementBytesLog2
))
2259 UINT_32 rsrcTypeIdx
= static_cast<UINT_32
>(rsrcType
) - 1;
2260 UINT_32 swModeIdx
= static_cast<UINT_32
>(swMode
);
2262 index
= m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][elementBytesLog2
];
2265 if (pOut
->pMipInfo
!= NULL
)
2267 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
2269 pOut
->pMipInfo
[i
].equationIndex
= index
;
2277 ************************************************************************************************************************
2278 * Gfx9Lib::HwlComputeBlock256Equation
2281 * Interface function stub of ComputeBlock256Equation
2285 ************************************************************************************************************************
2287 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeBlock256Equation(
2288 AddrResourceType rsrcType
,
2289 AddrSwizzleMode swMode
,
2290 UINT_32 elementBytesLog2
,
2291 ADDR_EQUATION
* pEquation
) const
2293 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2295 pEquation
->numBits
= 8;
2298 for (; i
< elementBytesLog2
; i
++)
2300 InitChannel(1, 0 , i
, &pEquation
->addr
[i
]);
2303 ADDR_CHANNEL_SETTING
* pixelBit
= &pEquation
->addr
[elementBytesLog2
];
2305 const UINT_32 maxBitsUsed
= 4;
2306 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2307 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2309 for (i
= 0; i
< maxBitsUsed
; i
++)
2311 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2312 InitChannel(1, 1, i
, &y
[i
]);
2315 if (IsStandardSwizzle(rsrcType
, swMode
))
2317 switch (elementBytesLog2
)
2360 ADDR_ASSERT_ALWAYS();
2361 ret
= ADDR_INVALIDPARAMS
;
2365 else if (IsDisplaySwizzle(rsrcType
, swMode
))
2367 switch (elementBytesLog2
)
2410 ADDR_ASSERT_ALWAYS();
2411 ret
= ADDR_INVALIDPARAMS
;
2415 else if (IsRotateSwizzle(swMode
))
2417 switch (elementBytesLog2
)
2454 ADDR_ASSERT_ALWAYS();
2456 ret
= ADDR_INVALIDPARAMS
;
2462 ADDR_ASSERT_ALWAYS();
2463 ret
= ADDR_INVALIDPARAMS
;
2469 ASSERTED Dim2d microBlockDim
= Block256_2d
[elementBytesLog2
];
2470 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation
->addr
, 8, 0)) ==
2471 (microBlockDim
.w
* (1 << elementBytesLog2
)));
2472 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation
->addr
, 8, 1)) == microBlockDim
.h
);
2479 ************************************************************************************************************************
2480 * Gfx9Lib::HwlComputeThinEquation
2483 * Interface function stub of ComputeThinEquation
2487 ************************************************************************************************************************
2489 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeThinEquation(
2490 AddrResourceType rsrcType
,
2491 AddrSwizzleMode swMode
,
2492 UINT_32 elementBytesLog2
,
2493 ADDR_EQUATION
* pEquation
) const
2495 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2497 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swMode
);
2499 UINT_32 maxXorBits
= blockSizeLog2
;
2500 if (IsNonPrtXor(swMode
))
2502 // For non-prt-xor, maybe need to initialize some more bits for xor
2503 // The highest xor bit used in equation will be max the following 3 items:
2504 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2505 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2508 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+ 2 * GetPipeXorBits(blockSizeLog2
));
2509 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+
2510 GetPipeXorBits(blockSizeLog2
) +
2511 2 * GetBankXorBits(blockSizeLog2
));
2514 const UINT_32 maxBitsUsed
= 14;
2515 ADDR_ASSERT((2 * maxBitsUsed
) >= maxXorBits
);
2516 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2517 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2519 const UINT_32 extraXorBits
= 16;
2520 ADDR_ASSERT(extraXorBits
>= maxXorBits
- blockSizeLog2
);
2521 ADDR_CHANNEL_SETTING xorExtra
[extraXorBits
] = {};
2523 for (UINT_32 i
= 0; i
< maxBitsUsed
; i
++)
2525 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2526 InitChannel(1, 1, i
, &y
[i
]);
2529 ADDR_CHANNEL_SETTING
* pixelBit
= pEquation
->addr
;
2531 for (UINT_32 i
= 0; i
< elementBytesLog2
; i
++)
2533 InitChannel(1, 0 , i
, &pixelBit
[i
]);
2538 UINT_32 lowBits
= 0;
2540 if (IsZOrderSwizzle(swMode
))
2542 if (elementBytesLog2
<= 3)
2544 for (UINT_32 i
= elementBytesLog2
; i
< 6; i
++)
2546 pixelBit
[i
] = (((i
- elementBytesLog2
) & 1) == 0) ? x
[xIdx
++] : y
[yIdx
++];
2553 ret
= ADDR_INVALIDPARAMS
;
2558 ret
= HwlComputeBlock256Equation(rsrcType
, swMode
, elementBytesLog2
, pEquation
);
2562 Dim2d microBlockDim
= Block256_2d
[elementBytesLog2
];
2563 xIdx
= Log2(microBlockDim
.w
);
2564 yIdx
= Log2(microBlockDim
.h
);
2571 for (UINT_32 i
= lowBits
; i
< blockSizeLog2
; i
++)
2573 pixelBit
[i
] = ((i
& 1) == 0) ? y
[yIdx
++] : x
[xIdx
++];
2576 for (UINT_32 i
= blockSizeLog2
; i
< maxXorBits
; i
++)
2578 xorExtra
[i
- blockSizeLog2
] = ((i
& 1) == 0) ? y
[yIdx
++] : x
[xIdx
++];
2584 UINT_32 pipeStart
= m_pipeInterleaveLog2
;
2585 UINT_32 pipeXorBits
= GetPipeXorBits(blockSizeLog2
);
2587 UINT_32 bankStart
= pipeStart
+ pipeXorBits
;
2588 UINT_32 bankXorBits
= GetBankXorBits(blockSizeLog2
);
2590 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2592 UINT_32 xor1BitPos
= pipeStart
+ 2 * pipeXorBits
- 1 - i
;
2593 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2594 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2596 InitChannel(&pEquation
->xor1
[pipeStart
+ i
], pXor1Src
);
2599 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2601 UINT_32 xor1BitPos
= bankStart
+ 2 * bankXorBits
- 1 - i
;
2602 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2603 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2605 InitChannel(&pEquation
->xor1
[bankStart
+ i
], pXor1Src
);
2608 if (IsPrt(swMode
) == FALSE
)
2610 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2612 InitChannel(1, 2, pipeXorBits
- i
- 1, &pEquation
->xor2
[pipeStart
+ i
]);
2615 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2617 InitChannel(1, 2, bankXorBits
- i
- 1 + pipeXorBits
, &pEquation
->xor2
[bankStart
+ i
]);
2622 pEquation
->numBits
= blockSizeLog2
;
2629 ************************************************************************************************************************
2630 * Gfx9Lib::HwlComputeThickEquation
2633 * Interface function stub of ComputeThickEquation
2637 ************************************************************************************************************************
2639 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeThickEquation(
2640 AddrResourceType rsrcType
,
2641 AddrSwizzleMode swMode
,
2642 UINT_32 elementBytesLog2
,
2643 ADDR_EQUATION
* pEquation
) const
2645 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2647 ADDR_ASSERT(IsTex3d(rsrcType
));
2649 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swMode
);
2651 UINT_32 maxXorBits
= blockSizeLog2
;
2652 if (IsNonPrtXor(swMode
))
2654 // For non-prt-xor, maybe need to initialize some more bits for xor
2655 // The highest xor bit used in equation will be max the following 3:
2656 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2657 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2660 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+ 3 * GetPipeXorBits(blockSizeLog2
));
2661 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+
2662 GetPipeXorBits(blockSizeLog2
) +
2663 3 * GetBankXorBits(blockSizeLog2
));
2666 for (UINT_32 i
= 0; i
< elementBytesLog2
; i
++)
2668 InitChannel(1, 0 , i
, &pEquation
->addr
[i
]);
2671 ADDR_CHANNEL_SETTING
* pixelBit
= &pEquation
->addr
[elementBytesLog2
];
2673 const UINT_32 maxBitsUsed
= 12;
2674 ADDR_ASSERT((3 * maxBitsUsed
) >= maxXorBits
);
2675 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2676 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2677 ADDR_CHANNEL_SETTING z
[maxBitsUsed
] = {};
2679 const UINT_32 extraXorBits
= 24;
2680 ADDR_ASSERT(extraXorBits
>= maxXorBits
- blockSizeLog2
);
2681 ADDR_CHANNEL_SETTING xorExtra
[extraXorBits
] = {};
2683 for (UINT_32 i
= 0; i
< maxBitsUsed
; i
++)
2685 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2686 InitChannel(1, 1, i
, &y
[i
]);
2687 InitChannel(1, 2, i
, &z
[i
]);
2690 if (IsZOrderSwizzle(swMode
))
2692 switch (elementBytesLog2
)
2745 ADDR_ASSERT_ALWAYS();
2746 ret
= ADDR_INVALIDPARAMS
;
2750 else if (IsStandardSwizzle(rsrcType
, swMode
))
2752 switch (elementBytesLog2
)
2805 ADDR_ASSERT_ALWAYS();
2806 ret
= ADDR_INVALIDPARAMS
;
2812 ADDR_ASSERT_ALWAYS();
2813 ret
= ADDR_INVALIDPARAMS
;
2818 Dim3d microBlockDim
= Block1K_3d
[elementBytesLog2
];
2819 UINT_32 xIdx
= Log2(microBlockDim
.w
);
2820 UINT_32 yIdx
= Log2(microBlockDim
.h
);
2821 UINT_32 zIdx
= Log2(microBlockDim
.d
);
2823 pixelBit
= pEquation
->addr
;
2825 const UINT_32 lowBits
= 10;
2826 ADDR_ASSERT(pEquation
->addr
[lowBits
- 1].valid
== 1);
2827 ADDR_ASSERT(pEquation
->addr
[lowBits
].valid
== 0);
2829 for (UINT_32 i
= lowBits
; i
< blockSizeLog2
; i
++)
2833 pixelBit
[i
] = x
[xIdx
++];
2835 else if ((i
% 3) == 1)
2837 pixelBit
[i
] = z
[zIdx
++];
2841 pixelBit
[i
] = y
[yIdx
++];
2845 for (UINT_32 i
= blockSizeLog2
; i
< maxXorBits
; i
++)
2849 xorExtra
[i
- blockSizeLog2
] = x
[xIdx
++];
2851 else if ((i
% 3) == 1)
2853 xorExtra
[i
- blockSizeLog2
] = z
[zIdx
++];
2857 xorExtra
[i
- blockSizeLog2
] = y
[yIdx
++];
2864 UINT_32 pipeStart
= m_pipeInterleaveLog2
;
2865 UINT_32 pipeXorBits
= GetPipeXorBits(blockSizeLog2
);
2866 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2868 UINT_32 xor1BitPos
= pipeStart
+ (3 * pipeXorBits
) - 1 - (2 * i
);
2869 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2870 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2872 InitChannel(&pEquation
->xor1
[pipeStart
+ i
], pXor1Src
);
2874 UINT_32 xor2BitPos
= pipeStart
+ (3 * pipeXorBits
) - 2 - (2 * i
);
2875 ADDR_CHANNEL_SETTING
* pXor2Src
= (xor2BitPos
< blockSizeLog2
) ?
2876 &pEquation
->addr
[xor2BitPos
] : &xorExtra
[xor2BitPos
- blockSizeLog2
];
2878 InitChannel(&pEquation
->xor2
[pipeStart
+ i
], pXor2Src
);
2881 UINT_32 bankStart
= pipeStart
+ pipeXorBits
;
2882 UINT_32 bankXorBits
= GetBankXorBits(blockSizeLog2
);
2883 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2885 UINT_32 xor1BitPos
= bankStart
+ (3 * bankXorBits
) - 1 - (2 * i
);
2886 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2887 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2889 InitChannel(&pEquation
->xor1
[bankStart
+ i
], pXor1Src
);
2891 UINT_32 xor2BitPos
= bankStart
+ (3 * bankXorBits
) - 2 - (2 * i
);
2892 ADDR_CHANNEL_SETTING
* pXor2Src
= (xor2BitPos
< blockSizeLog2
) ?
2893 &pEquation
->addr
[xor2BitPos
] : &xorExtra
[xor2BitPos
- blockSizeLog2
];
2895 InitChannel(&pEquation
->xor2
[bankStart
+ i
], pXor2Src
);
2899 pEquation
->numBits
= blockSizeLog2
;
2906 ************************************************************************************************************************
2907 * Gfx9Lib::IsValidDisplaySwizzleMode
2910 * Check if a swizzle mode is supported by display engine
2913 * TRUE is swizzle mode is supported by display engine
2914 ************************************************************************************************************************
2916 BOOL_32
Gfx9Lib::IsValidDisplaySwizzleMode(
2917 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
2919 BOOL_32 support
= FALSE
;
2921 if (m_settings
.isDce12
)
2923 switch (pIn
->swizzleMode
)
2925 case ADDR_SW_256B_D
:
2926 case ADDR_SW_256B_R
:
2927 support
= (pIn
->bpp
== 32);
2930 case ADDR_SW_LINEAR
:
2933 case ADDR_SW_64KB_D
:
2934 case ADDR_SW_64KB_R
:
2935 case ADDR_SW_4KB_D_X
:
2936 case ADDR_SW_4KB_R_X
:
2937 case ADDR_SW_64KB_D_X
:
2938 case ADDR_SW_64KB_R_X
:
2939 support
= (pIn
->bpp
<= 64);
2946 else if (m_settings
.isDcn1
)
2948 switch (pIn
->swizzleMode
)
2951 case ADDR_SW_64KB_D
:
2952 case ADDR_SW_64KB_D_T
:
2953 case ADDR_SW_4KB_D_X
:
2954 case ADDR_SW_64KB_D_X
:
2955 support
= (pIn
->bpp
== 64);
2958 case ADDR_SW_LINEAR
:
2960 case ADDR_SW_64KB_S
:
2961 case ADDR_SW_64KB_S_T
:
2962 case ADDR_SW_4KB_S_X
:
2963 case ADDR_SW_64KB_S_X
:
2964 support
= (pIn
->bpp
<= 64);
2973 ADDR_NOT_IMPLEMENTED();
2980 ************************************************************************************************************************
2981 * Gfx9Lib::HwlComputePipeBankXor
2984 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2988 ************************************************************************************************************************
2990 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputePipeBankXor(
2991 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT
* pIn
,
2992 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
* pOut
) const
2994 if (IsXor(pIn
->swizzleMode
))
2996 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
2997 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
2998 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
3000 UINT_32 pipeXor
= 0;
3001 UINT_32 bankXor
= 0;
3003 const UINT_32 bankMask
= (1 << bankBits
) - 1;
3004 const UINT_32 index
= pIn
->surfIndex
& bankMask
;
3006 const UINT_32 bpp
= pIn
->flags
.fmask
?
3007 GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
) : GetElemLib()->GetBitsPerPixel(pIn
->format
);
3010 static const UINT_32 BankXorSmallBpp
[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3011 static const UINT_32 BankXorLargeBpp
[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3013 bankXor
= (bpp
<= 32) ? BankXorSmallBpp
[index
] : BankXorLargeBpp
[index
];
3015 else if (bankBits
> 0)
3017 UINT_32 bankIncrease
= (1 << (bankBits
- 1)) - 1;
3018 bankIncrease
= (bankIncrease
== 0) ? 1 : bankIncrease
;
3019 bankXor
= (index
* bankIncrease
) & bankMask
;
3022 pOut
->pipeBankXor
= (bankXor
<< pipeBits
) | pipeXor
;
3026 pOut
->pipeBankXor
= 0;
3033 ************************************************************************************************************************
3034 * Gfx9Lib::HwlComputeSlicePipeBankXor
3037 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3041 ************************************************************************************************************************
3043 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSlicePipeBankXor(
3044 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT
* pIn
,
3045 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
* pOut
) const
3047 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
3048 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
3049 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
3051 UINT_32 pipeXor
= ReverseBitVector(pIn
->slice
, pipeBits
);
3052 UINT_32 bankXor
= ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
);
3054 pOut
->pipeBankXor
= pIn
->basePipeBankXor
^ (pipeXor
| (bankXor
<< pipeBits
));
3060 ************************************************************************************************************************
3061 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3064 * Compute sub resource offset to support swizzle pattern
3068 ************************************************************************************************************************
3070 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3071 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
* pIn
,
3072 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
* pOut
) const
3074 ADDR_ASSERT(IsThin(pIn
->resourceType
, pIn
->swizzleMode
));
3076 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
3077 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
3078 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
3079 UINT_32 pipeXor
= ReverseBitVector(pIn
->slice
, pipeBits
);
3080 UINT_32 bankXor
= ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
);
3081 UINT_32 pipeBankXor
= ((pipeXor
| (bankXor
<< pipeBits
)) ^ (pIn
->pipeBankXor
)) << m_pipeInterleaveLog2
;
3083 pOut
->offset
= pIn
->slice
* pIn
->sliceSize
+
3084 pIn
->macroBlockOffset
+
3085 (pIn
->mipTailOffset
^ pipeBankXor
) -
3086 static_cast<UINT_64
>(pipeBankXor
);
3091 ************************************************************************************************************************
3092 * Gfx9Lib::ValidateNonSwModeParams
3095 * Validate compute surface info params except swizzle mode
3098 * TRUE if parameters are valid, FALSE otherwise
3099 ************************************************************************************************************************
3101 BOOL_32
Gfx9Lib::ValidateNonSwModeParams(
3102 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
3104 BOOL_32 valid
= TRUE
;
3106 if ((pIn
->bpp
== 0) || (pIn
->bpp
> 128) || (pIn
->width
== 0) || (pIn
->numFrags
> 8) || (pIn
->numSamples
> 16))
3108 ADDR_ASSERT_ALWAYS();
3112 if (pIn
->resourceType
>= ADDR_RSRC_MAX_TYPE
)
3114 ADDR_ASSERT_ALWAYS();
3118 const BOOL_32 mipmap
= (pIn
->numMipLevels
> 1);
3119 const BOOL_32 msaa
= (pIn
->numFrags
> 1);
3120 const BOOL_32 isBc
= ElemLib::IsBlockCompressed(pIn
->format
);
3122 const AddrResourceType rsrcType
= pIn
->resourceType
;
3123 const BOOL_32 tex3d
= IsTex3d(rsrcType
);
3124 const BOOL_32 tex2d
= IsTex2d(rsrcType
);
3125 const BOOL_32 tex1d
= IsTex1d(rsrcType
);
3127 const ADDR2_SURFACE_FLAGS flags
= pIn
->flags
;
3128 const BOOL_32 zbuffer
= flags
.depth
|| flags
.stencil
;
3129 const BOOL_32 display
= flags
.display
|| flags
.rotated
;
3130 const BOOL_32 stereo
= flags
.qbStereo
;
3131 const BOOL_32 fmask
= flags
.fmask
;
3133 // Resource type check
3136 if (msaa
|| zbuffer
|| display
|| stereo
|| isBc
|| fmask
)
3138 ADDR_ASSERT_ALWAYS();
3144 if ((msaa
&& mipmap
) || (stereo
&& msaa
) || (stereo
&& mipmap
))
3146 ADDR_ASSERT_ALWAYS();
3152 if (msaa
|| zbuffer
|| display
|| stereo
|| fmask
)
3154 ADDR_ASSERT_ALWAYS();
3160 ADDR_ASSERT_ALWAYS();
3168 ************************************************************************************************************************
3169 * Gfx9Lib::ValidateSwModeParams
3172 * Validate compute surface info related to swizzle mode
3175 * TRUE if parameters are valid, FALSE otherwise
3176 ************************************************************************************************************************
3178 BOOL_32
Gfx9Lib::ValidateSwModeParams(
3179 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
3181 BOOL_32 valid
= TRUE
;
3183 if ((pIn
->swizzleMode
>= ADDR_SW_MAX_TYPE
) || (IsValidSwMode(pIn
->swizzleMode
) == FALSE
))
3185 ADDR_ASSERT_ALWAYS();
3189 const BOOL_32 mipmap
= (pIn
->numMipLevels
> 1);
3190 const BOOL_32 msaa
= (pIn
->numFrags
> 1);
3191 const BOOL_32 isBc
= ElemLib::IsBlockCompressed(pIn
->format
);
3192 const BOOL_32 is422
= ElemLib::IsMacroPixelPacked(pIn
->format
);
3194 const AddrResourceType rsrcType
= pIn
->resourceType
;
3195 const BOOL_32 tex3d
= IsTex3d(rsrcType
);
3196 const BOOL_32 tex2d
= IsTex2d(rsrcType
);
3197 const BOOL_32 tex1d
= IsTex1d(rsrcType
);
3199 const AddrSwizzleMode swizzle
= pIn
->swizzleMode
;
3200 const BOOL_32 linear
= IsLinear(swizzle
);
3201 const BOOL_32 blk256B
= IsBlock256b(swizzle
);
3202 const BOOL_32 isNonPrtXor
= IsNonPrtXor(swizzle
);
3204 const ADDR2_SURFACE_FLAGS flags
= pIn
->flags
;
3205 const BOOL_32 zbuffer
= flags
.depth
|| flags
.stencil
;
3206 const BOOL_32 color
= flags
.color
;
3207 const BOOL_32 texture
= flags
.texture
;
3208 const BOOL_32 display
= flags
.display
|| flags
.rotated
;
3209 const BOOL_32 prt
= flags
.prt
;
3210 const BOOL_32 fmask
= flags
.fmask
;
3212 const BOOL_32 thin3d
= tex3d
&& flags
.view3dAs2dArray
;
3213 const BOOL_32 zMaxMip
= tex3d
&& mipmap
&&
3214 (pIn
->numSlices
>= pIn
->width
) && (pIn
->numSlices
>= pIn
->height
);
3217 if (msaa
&& (GetBlockSize(swizzle
) < (m_pipeInterleaveBytes
* pIn
->numFrags
)))
3219 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3220 ADDR_ASSERT_ALWAYS();
3224 if (display
&& (IsValidDisplaySwizzleMode(pIn
) == FALSE
))
3226 ADDR_ASSERT_ALWAYS();
3230 if ((pIn
->bpp
== 96) && (linear
== FALSE
))
3232 ADDR_ASSERT_ALWAYS();
3236 if (prt
&& isNonPrtXor
)
3238 ADDR_ASSERT_ALWAYS();
3242 // Resource type check
3245 if (linear
== FALSE
)
3247 ADDR_ASSERT_ALWAYS();
3252 // Swizzle type check
3255 if (((tex1d
== FALSE
) && prt
) || zbuffer
|| msaa
|| (pIn
->bpp
== 0) ||
3256 ((pIn
->bpp
% 8) != 0) || (isBc
&& texture
) || fmask
)
3258 ADDR_ASSERT_ALWAYS();
3262 else if (IsZOrderSwizzle(swizzle
))
3264 if ((color
&& msaa
) || thin3d
|| isBc
|| is422
|| (tex2d
&& (pIn
->bpp
> 64)) || (msaa
&& (pIn
->bpp
> 32)))
3266 ADDR_ASSERT_ALWAYS();
3270 else if (IsStandardSwizzle(swizzle
))
3272 if (zbuffer
|| thin3d
|| (tex3d
&& (pIn
->bpp
== 128) && color
) || fmask
)
3274 ADDR_ASSERT_ALWAYS();
3278 else if (IsDisplaySwizzle(swizzle
))
3280 if (zbuffer
|| (prt
&& tex3d
) || fmask
|| zMaxMip
)
3282 ADDR_ASSERT_ALWAYS();
3286 else if (IsRotateSwizzle(swizzle
))
3288 if (zbuffer
|| (pIn
->bpp
> 64) || tex3d
|| isBc
|| fmask
)
3290 ADDR_ASSERT_ALWAYS();
3296 ADDR_ASSERT_ALWAYS();
3303 if (prt
|| zbuffer
|| tex3d
|| mipmap
|| msaa
)
3305 ADDR_ASSERT_ALWAYS();
3314 ************************************************************************************************************************
3315 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3318 * Compute surface info sanity check
3321 * ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3322 ************************************************************************************************************************
3324 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3325 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
3327 return ValidateNonSwModeParams(pIn
) && ValidateSwModeParams(pIn
) ? ADDR_OK
: ADDR_INVALIDPARAMS
;
3331 ************************************************************************************************************************
3332 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3335 * Internal function to get suggested surface information for cliet to use
3339 ************************************************************************************************************************
3341 ADDR_E_RETURNCODE
Gfx9Lib::HwlGetPreferredSurfaceSetting(
3342 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
* pIn
,
3343 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
* pOut
) const
3345 ADDR_E_RETURNCODE returnCode
= ADDR_INVALIDPARAMS
;
3346 ElemLib
* pElemLib
= GetElemLib();
3348 UINT_32 bpp
= pIn
->bpp
;
3349 UINT_32 width
= Max(pIn
->width
, 1u);
3350 UINT_32 height
= Max(pIn
->height
, 1u);
3351 UINT_32 numSamples
= Max(pIn
->numSamples
, 1u);
3352 UINT_32 numFrags
= (pIn
->numFrags
== 0) ? numSamples
: pIn
->numFrags
;
3354 if (pIn
->flags
.fmask
)
3356 bpp
= GetFmaskBpp(numSamples
, numFrags
);
3359 pOut
->resourceType
= ADDR_RSRC_TEX_2D
;
3363 // Set format to INVALID will skip this conversion
3364 if (pIn
->format
!= ADDR_FMT_INVALID
)
3366 UINT_32 expandX
, expandY
;
3368 // Don't care for this case
3369 ElemMode elemMode
= ADDR_UNCOMPRESSED
;
3371 // Get compression/expansion factors and element mode which indicates compression/expansion
3372 bpp
= pElemLib
->GetBitsPerPixel(pIn
->format
,
3377 UINT_32 basePitch
= 0;
3378 GetElemLib()->AdjustSurfaceInfo(elemMode
,
3387 // The output may get changed for volume(3D) texture resource in future
3388 pOut
->resourceType
= pIn
->resourceType
;
3391 const UINT_32 numSlices
= Max(pIn
->numSlices
, 1u);
3392 const UINT_32 numMipLevels
= Max(pIn
->numMipLevels
, 1u);
3393 const BOOL_32 msaa
= (numFrags
> 1) || (numSamples
> 1);
3394 const BOOL_32 displayRsrc
= pIn
->flags
.display
|| pIn
->flags
.rotated
;
3396 // Pre sanity check on non swizzle mode parameters
3397 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {};
3398 localIn
.flags
= pIn
->flags
;
3399 localIn
.resourceType
= pOut
->resourceType
;
3400 localIn
.format
= pIn
->format
;
3402 localIn
.width
= width
;
3403 localIn
.height
= height
;
3404 localIn
.numSlices
= numSlices
;
3405 localIn
.numMipLevels
= numMipLevels
;
3406 localIn
.numSamples
= numSamples
;
3407 localIn
.numFrags
= numFrags
;
3409 if (ValidateNonSwModeParams(&localIn
))
3411 // Forbid swizzle mode(s) by client setting
3412 ADDR2_SWMODE_SET allowedSwModeSet
= {};
3413 allowedSwModeSet
.value
|= pIn
->forbiddenBlock
.linear
? 0 : Gfx9LinearSwModeMask
;
3414 allowedSwModeSet
.value
|= pIn
->forbiddenBlock
.micro
? 0 : Gfx9Blk256BSwModeMask
;
3415 allowedSwModeSet
.value
|=
3416 pIn
->forbiddenBlock
.macroThin4KB
? 0 :
3417 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? Gfx9Rsrc3dThin4KBSwModeMask
: Gfx9Blk4KBSwModeMask
);
3418 allowedSwModeSet
.value
|=
3419 pIn
->forbiddenBlock
.macroThick4KB
? 0 :
3420 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? Gfx9Rsrc3dThick4KBSwModeMask
: 0);
3421 allowedSwModeSet
.value
|=
3422 pIn
->forbiddenBlock
.macroThin64KB
? 0 :
3423 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? Gfx9Rsrc3dThin64KBSwModeMask
: Gfx9Blk64KBSwModeMask
);
3424 allowedSwModeSet
.value
|=
3425 pIn
->forbiddenBlock
.macroThick64KB
? 0 :
3426 ((pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ? Gfx9Rsrc3dThick64KBSwModeMask
: 0);
3428 if (pIn
->preferredSwSet
.value
!= 0)
3430 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_Z
? ~0 : ~Gfx9ZSwModeMask
;
3431 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_S
? ~0 : ~Gfx9StandardSwModeMask
;
3432 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_D
? ~0 : ~Gfx9DisplaySwModeMask
;
3433 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_R
? ~0 : ~Gfx9RotateSwModeMask
;
3438 allowedSwModeSet
.value
&= ~Gfx9XorSwModeMask
;
3441 if (pIn
->maxAlign
> 0)
3443 if (pIn
->maxAlign
< Size64K
)
3445 allowedSwModeSet
.value
&= ~Gfx9Blk64KBSwModeMask
;
3448 if (pIn
->maxAlign
< Size4K
)
3450 allowedSwModeSet
.value
&= ~Gfx9Blk4KBSwModeMask
;
3453 if (pIn
->maxAlign
< Size256
)
3455 allowedSwModeSet
.value
&= ~Gfx9Blk256BSwModeMask
;
3459 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3460 switch (pOut
->resourceType
)
3462 case ADDR_RSRC_TEX_1D
:
3463 allowedSwModeSet
.value
&= Gfx9Rsrc1dSwModeMask
;
3466 case ADDR_RSRC_TEX_2D
:
3467 allowedSwModeSet
.value
&= pIn
->flags
.prt
? Gfx9Rsrc2dPrtSwModeMask
: Gfx9Rsrc2dSwModeMask
;
3471 allowedSwModeSet
.value
&= ~(Gfx9RotateSwModeMask
| Gfx9ZSwModeMask
);
3475 case ADDR_RSRC_TEX_3D
:
3476 allowedSwModeSet
.value
&= pIn
->flags
.prt
? Gfx9Rsrc3dPrtSwModeMask
: Gfx9Rsrc3dSwModeMask
;
3478 if ((numMipLevels
> 1) && (numSlices
>= width
) && (numSlices
>= height
))
3480 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3481 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3482 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3483 allowedSwModeSet
.value
&= ~Gfx9DisplaySwModeMask
;
3486 if ((bpp
== 128) && pIn
->flags
.color
)
3488 allowedSwModeSet
.value
&= ~Gfx9StandardSwModeMask
;
3491 if (pIn
->flags
.view3dAs2dArray
)
3493 allowedSwModeSet
.value
&= Gfx9Rsrc3dThinSwModeMask
| Gfx9LinearSwModeMask
;
3498 ADDR_ASSERT_ALWAYS();
3499 allowedSwModeSet
.value
= 0;
3503 if (pIn
->format
== ADDR_FMT_32_32_32
)
3505 allowedSwModeSet
.value
&= Gfx9LinearSwModeMask
;
3508 if (ElemLib::IsBlockCompressed(pIn
->format
))
3510 if (pIn
->flags
.texture
)
3512 allowedSwModeSet
.value
&= Gfx9StandardSwModeMask
| Gfx9DisplaySwModeMask
;
3516 allowedSwModeSet
.value
&= Gfx9StandardSwModeMask
| Gfx9DisplaySwModeMask
| Gfx9LinearSwModeMask
;
3520 if (ElemLib::IsMacroPixelPacked(pIn
->format
) ||
3521 (msaa
&& ((bpp
> 32) || pIn
->flags
.color
|| pIn
->flags
.unordered
)))
3523 allowedSwModeSet
.value
&= ~Gfx9ZSwModeMask
;
3526 if (pIn
->flags
.fmask
|| pIn
->flags
.depth
|| pIn
->flags
.stencil
)
3528 allowedSwModeSet
.value
&= Gfx9ZSwModeMask
;
3530 if (pIn
->flags
.noMetadata
== FALSE
)
3532 if (pIn
->flags
.depth
&&
3533 pIn
->flags
.texture
&&
3534 (((bpp
== 16) && (numFrags
>= 4)) || ((bpp
== 32) && (numFrags
>= 2))))
3536 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3537 // equation from wrong address within memory range a tile covered and use the
3538 // garbage data for compressed Z reading which finally leads to corruption.
3539 allowedSwModeSet
.value
&= ~Gfx9XorSwModeMask
;
3542 if (m_settings
.htileCacheRbConflict
&&
3543 (pIn
->flags
.depth
|| pIn
->flags
.stencil
) &&
3545 (pIn
->flags
.metaRbUnaligned
== FALSE
) &&
3546 (pIn
->flags
.metaPipeUnaligned
== FALSE
))
3548 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3549 allowedSwModeSet
.value
&= ~Gfx9XSwModeMask
;
3556 allowedSwModeSet
.value
&= Gfx9MsaaSwModeMask
;
3559 if ((numFrags
> 1) &&
3560 (Size4K
< (m_pipeInterleaveBytes
* numFrags
)))
3562 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3563 allowedSwModeSet
.value
&= Gfx9Blk64KBSwModeMask
;
3566 if (numMipLevels
> 1)
3568 allowedSwModeSet
.value
&= ~Gfx9Blk256BSwModeMask
;
3573 if (m_settings
.isDce12
)
3575 allowedSwModeSet
.value
&= (bpp
== 32) ? Dce12Bpp32SwModeMask
: Dce12NonBpp32SwModeMask
;
3577 else if (m_settings
.isDcn1
)
3579 allowedSwModeSet
.value
&= (bpp
== 64) ? Dcn1Bpp64SwModeMask
: Dcn1NonBpp64SwModeMask
;
3583 ADDR_NOT_IMPLEMENTED();
3587 if (allowedSwModeSet
.value
!= 0)
3590 // Post sanity check, at least AddrLib should accept the output generated by its own
3591 UINT_32 validateSwModeSet
= allowedSwModeSet
.value
;
3593 for (UINT_32 i
= 0; validateSwModeSet
!= 0; i
++)
3595 if (validateSwModeSet
& 1)
3597 localIn
.swizzleMode
= static_cast<AddrSwizzleMode
>(i
);
3598 ADDR_ASSERT(ValidateSwModeParams(&localIn
));
3601 validateSwModeSet
>>= 1;
3605 pOut
->validSwModeSet
= allowedSwModeSet
;
3606 pOut
->canXor
= (allowedSwModeSet
.value
& Gfx9XorSwModeMask
) ? TRUE
: FALSE
;
3607 pOut
->validBlockSet
= GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
);
3608 pOut
->validSwTypeSet
= GetAllowedSwSet(allowedSwModeSet
);
3610 pOut
->clientPreferredSwSet
= pIn
->preferredSwSet
;
3612 if (pOut
->clientPreferredSwSet
.value
== 0)
3614 pOut
->clientPreferredSwSet
.value
= AddrSwSetAll
;
3617 // Apply optional restrictions
3618 if (pIn
->flags
.needEquation
)
3620 FilterInvalidEqSwizzleMode(allowedSwModeSet
, pIn
->resourceType
, Log2(bpp
>> 3));
3623 if (allowedSwModeSet
.value
== Gfx9LinearSwModeMask
)
3625 pOut
->swizzleMode
= ADDR_SW_LINEAR
;
3629 // Always ignore linear swizzle mode if there is other choice.
3630 allowedSwModeSet
.swLinear
= 0;
3632 ADDR2_BLOCK_SET allowedBlockSet
= GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
);
3634 // Determine block size if there is 2 or more block type candidates
3635 if (IsPow2(allowedBlockSet
.value
) == FALSE
)
3637 AddrSwizzleMode swMode
[AddrBlockMaxTiledType
] = { ADDR_SW_LINEAR
};
3639 swMode
[AddrBlockMicro
] = ADDR_SW_256B_D
;
3640 swMode
[AddrBlockThin4KB
] = ADDR_SW_4KB_D
;
3641 swMode
[AddrBlockThin64KB
] = ADDR_SW_64KB_D
;
3643 if (pOut
->resourceType
== ADDR_RSRC_TEX_3D
)
3645 swMode
[AddrBlockThick4KB
] = ADDR_SW_4KB_S
;
3646 swMode
[AddrBlockThick64KB
] = ADDR_SW_64KB_S
;
3649 Dim3d blkDim
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}, {0}, {0}, {0}};
3650 Dim3d padDim
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}, {0}, {0}, {0}};
3651 UINT_64 padSize
[AddrBlockMaxTiledType
] = {0};
3653 const UINT_32 ratioLow
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 3 : 2);
3654 const UINT_32 ratioHi
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 2 : 1);
3655 const UINT_64 sizeAlignInElement
= Max(NextPow2(pIn
->minSizeAlign
) / (bpp
>> 3), 1u);
3656 UINT_32 minSizeBlk
= AddrBlockMicro
;
3657 UINT_64 minSize
= 0;
3659 for (UINT_32 i
= AddrBlockMicro
; i
< AddrBlockMaxTiledType
; i
++)
3661 if (allowedBlockSet
.value
& (1 << i
))
3663 ComputeBlockDimensionForSurf(&blkDim
[i
].w
,
3673 blkDim
[i
].w
= PowTwoAlign(blkDim
[i
].w
, 32);
3676 padSize
[i
] = ComputePadSize(&blkDim
[i
], width
, height
, numSlices
, &padDim
[i
]);
3677 padSize
[i
] = PowTwoAlign(padSize
[i
] * numFrags
, sizeAlignInElement
);
3679 if ((minSize
== 0) ||
3680 ((padSize
[i
] * ratioHi
) <= (minSize
* ratioLow
)))
3682 minSize
= padSize
[i
];
3688 if ((allowedBlockSet
.micro
== TRUE
) &&
3689 (width
<= blkDim
[AddrBlockMicro
].w
) &&
3690 (height
<= blkDim
[AddrBlockMicro
].h
) &&
3691 (NextPow2(pIn
->minSizeAlign
) <= Size256
))
3693 minSizeBlk
= AddrBlockMicro
;
3696 if (minSizeBlk
== AddrBlockMicro
)
3698 ADDR_ASSERT(pOut
->resourceType
!= ADDR_RSRC_TEX_3D
);
3699 allowedSwModeSet
.value
&= Gfx9Blk256BSwModeMask
;
3701 else if (minSizeBlk
== AddrBlockThick4KB
)
3703 ADDR_ASSERT(pOut
->resourceType
== ADDR_RSRC_TEX_3D
);
3704 allowedSwModeSet
.value
&= Gfx9Rsrc3dThick4KBSwModeMask
;
3706 else if (minSizeBlk
== AddrBlockThin4KB
)
3708 allowedSwModeSet
.value
&= (pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ?
3709 Gfx9Rsrc3dThin4KBSwModeMask
: Gfx9Blk4KBSwModeMask
;
3711 else if (minSizeBlk
== AddrBlockThick64KB
)
3713 ADDR_ASSERT(pOut
->resourceType
== ADDR_RSRC_TEX_3D
);
3714 allowedSwModeSet
.value
&= Gfx9Rsrc3dThick64KBSwModeMask
;
3718 ADDR_ASSERT(minSizeBlk
== AddrBlockThin64KB
);
3719 allowedSwModeSet
.value
&= (pOut
->resourceType
== ADDR_RSRC_TEX_3D
) ?
3720 Gfx9Rsrc3dThin64KBSwModeMask
: Gfx9Blk64KBSwModeMask
;
3724 // Block type should be determined.
3725 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet
, pOut
->resourceType
).value
));
3727 ADDR2_SWTYPE_SET allowedSwSet
= GetAllowedSwSet(allowedSwModeSet
);
3729 // Determine swizzle type if there is 2 or more swizzle type candidates
3730 if (IsPow2(allowedSwSet
.value
) == FALSE
)
3732 if (ElemLib::IsBlockCompressed(pIn
->format
))
3734 if (allowedSwSet
.sw_D
)
3736 allowedSwModeSet
.value
&= Gfx9DisplaySwModeMask
;
3740 ADDR_ASSERT(allowedSwSet
.sw_S
);
3741 allowedSwModeSet
.value
&= Gfx9StandardSwModeMask
;
3744 else if (ElemLib::IsMacroPixelPacked(pIn
->format
))
3746 if (allowedSwSet
.sw_S
)
3748 allowedSwModeSet
.value
&= Gfx9StandardSwModeMask
;
3750 else if (allowedSwSet
.sw_D
)
3752 allowedSwModeSet
.value
&= Gfx9DisplaySwModeMask
;
3756 ADDR_ASSERT(allowedSwSet
.sw_R
);
3757 allowedSwModeSet
.value
&= Gfx9RotateSwModeMask
;
3760 else if (pOut
->resourceType
== ADDR_RSRC_TEX_3D
)
3762 if (pIn
->flags
.color
&& allowedSwSet
.sw_D
)
3764 allowedSwModeSet
.value
&= Gfx9DisplaySwModeMask
;
3766 else if (allowedSwSet
.sw_Z
)
3768 allowedSwModeSet
.value
&= Gfx9ZSwModeMask
;
3772 ADDR_ASSERT(allowedSwSet
.sw_S
);
3773 allowedSwModeSet
.value
&= Gfx9StandardSwModeMask
;
3778 if (pIn
->flags
.rotated
&& allowedSwSet
.sw_R
)
3780 allowedSwModeSet
.value
&= Gfx9RotateSwModeMask
;
3782 else if (allowedSwSet
.sw_D
)
3784 allowedSwModeSet
.value
&= Gfx9DisplaySwModeMask
;
3786 else if (allowedSwSet
.sw_S
)
3788 allowedSwModeSet
.value
&= Gfx9StandardSwModeMask
;
3792 ADDR_ASSERT(allowedSwSet
.sw_Z
);
3793 allowedSwModeSet
.value
&= Gfx9ZSwModeMask
;
3798 // Swizzle type should be determined.
3799 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet
).value
));
3801 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3802 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3803 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3804 pOut
->swizzleMode
= static_cast<AddrSwizzleMode
>(Log2NonPow2(allowedSwModeSet
.value
));
3807 returnCode
= ADDR_OK
;
3811 // Invalid combination...
3812 ADDR_ASSERT_ALWAYS();
3817 // Invalid combination...
3818 ADDR_ASSERT_ALWAYS();
3825 ************************************************************************************************************************
3826 * Gfx9Lib::ComputeStereoInfo
3829 * Compute height alignment and right eye pipeBankXor for stereo surface
3834 ************************************************************************************************************************
3836 ADDR_E_RETURNCODE
Gfx9Lib::ComputeStereoInfo(
3837 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
,
3838 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
,
3839 UINT_32
* pHeightAlign
3842 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
3844 UINT_32 eqIndex
= HwlGetEquationIndex(pIn
, pOut
);
3846 if (eqIndex
< m_numEquations
)
3848 if (IsXor(pIn
->swizzleMode
))
3850 const UINT_32 blkSizeLog2
= GetBlockSizeLog2(pIn
->swizzleMode
);
3851 const UINT_32 numPipeBits
= GetPipeXorBits(blkSizeLog2
);
3852 const UINT_32 numBankBits
= GetBankXorBits(blkSizeLog2
);
3853 const UINT_32 bppLog2
= Log2(pIn
->bpp
>> 3);
3854 const UINT_32 maxYCoordBlock256
= Log2(Block256_2d
[bppLog2
].h
) - 1;
3855 const ADDR_EQUATION
*pEqToCheck
= &m_equationTable
[eqIndex
];
3857 ADDR_ASSERT(maxYCoordBlock256
==
3858 GetMaxValidChannelIndex(&pEqToCheck
->addr
[0], Log2Size256
, 1));
3860 const UINT_32 maxYCoordInBaseEquation
=
3861 (blkSizeLog2
- Log2Size256
) / 2 + maxYCoordBlock256
;
3863 ADDR_ASSERT(maxYCoordInBaseEquation
==
3864 GetMaxValidChannelIndex(&pEqToCheck
->addr
[0], blkSizeLog2
, 1));
3866 const UINT_32 maxYCoordInPipeXor
= (numPipeBits
== 0) ? 0 : maxYCoordBlock256
+ numPipeBits
;
3868 ADDR_ASSERT(maxYCoordInPipeXor
==
3869 GetMaxValidChannelIndex(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
], numPipeBits
, 1));
3871 const UINT_32 maxYCoordInBankXor
= (numBankBits
== 0) ?
3872 0 : maxYCoordBlock256
+ (numPipeBits
+ 1) / 2 + numBankBits
;
3874 ADDR_ASSERT(maxYCoordInBankXor
==
3875 GetMaxValidChannelIndex(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
+ numPipeBits
], numBankBits
, 1));
3877 const UINT_32 maxYCoordInPipeBankXor
= Max(maxYCoordInPipeXor
, maxYCoordInBankXor
);
3879 if (maxYCoordInPipeBankXor
> maxYCoordInBaseEquation
)
3881 *pHeightAlign
= 1u << maxYCoordInPipeBankXor
;
3883 if (pOut
->pStereoInfo
!= NULL
)
3885 pOut
->pStereoInfo
->rightSwizzle
= 0;
3887 if ((PowTwoAlign(pIn
->height
, *pHeightAlign
) % (*pHeightAlign
* 2)) != 0)
3889 if (maxYCoordInPipeXor
== maxYCoordInPipeBankXor
)
3891 pOut
->pStereoInfo
->rightSwizzle
|= (1u << 1);
3894 if (maxYCoordInBankXor
== maxYCoordInPipeBankXor
)
3896 pOut
->pStereoInfo
->rightSwizzle
|=
3897 1u << ((numPipeBits
% 2) ? numPipeBits
: numPipeBits
+ 1);
3900 ADDR_ASSERT(pOut
->pStereoInfo
->rightSwizzle
==
3901 GetCoordActiveMask(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
],
3902 numPipeBits
+ numBankBits
, 1, maxYCoordInPipeBankXor
));
3910 ADDR_ASSERT_ALWAYS();
3911 returnCode
= ADDR_ERROR
;
3918 ************************************************************************************************************************
3919 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3922 * Internal function to calculate alignment for tiled surface
3926 ************************************************************************************************************************
3928 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceInfoTiled(
3929 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
3930 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
3933 ADDR_E_RETURNCODE returnCode
= ComputeBlockDimensionForSurf(&pOut
->blockWidth
,
3941 if (returnCode
== ADDR_OK
)
3943 UINT_32 pitchAlignInElement
= pOut
->blockWidth
;
3945 if ((IsTex2d(pIn
->resourceType
) == TRUE
) &&
3946 (pIn
->flags
.display
|| pIn
->flags
.rotated
) &&
3947 (pIn
->numMipLevels
<= 1) &&
3948 (pIn
->numSamples
<= 1) &&
3949 (pIn
->numFrags
<= 1))
3951 // Display engine needs pitch align to be at least 32 pixels.
3952 pitchAlignInElement
= PowTwoAlign(pitchAlignInElement
, 32);
3955 pOut
->pitch
= PowTwoAlign(pIn
->width
, pitchAlignInElement
);
3957 if ((pIn
->numMipLevels
<= 1) && (pIn
->pitchInElement
> 0))
3959 if ((pIn
->pitchInElement
% pitchAlignInElement
) != 0)
3961 returnCode
= ADDR_INVALIDPARAMS
;
3963 else if (pIn
->pitchInElement
< pOut
->pitch
)
3965 returnCode
= ADDR_INVALIDPARAMS
;
3969 pOut
->pitch
= pIn
->pitchInElement
;
3973 UINT_32 heightAlign
= 0;
3975 if (pIn
->flags
.qbStereo
)
3977 returnCode
= ComputeStereoInfo(pIn
, pOut
, &heightAlign
);
3980 if (returnCode
== ADDR_OK
)
3982 pOut
->height
= PowTwoAlign(pIn
->height
, pOut
->blockHeight
);
3984 if (heightAlign
> 1)
3986 pOut
->height
= PowTwoAlign(pOut
->height
, heightAlign
);
3989 pOut
->numSlices
= PowTwoAlign(pIn
->numSlices
, pOut
->blockSlices
);
3991 pOut
->epitchIsHeight
= FALSE
;
3992 pOut
->mipChainInTail
= FALSE
;
3993 pOut
->firstMipIdInTail
= pIn
->numMipLevels
;
3995 pOut
->mipChainPitch
= pOut
->pitch
;
3996 pOut
->mipChainHeight
= pOut
->height
;
3997 pOut
->mipChainSlice
= pOut
->numSlices
;
3999 if (pIn
->numMipLevels
> 1)
4001 pOut
->firstMipIdInTail
= GetMipChainInfo(pIn
->resourceType
,
4013 const UINT_32 endingMipId
= Min(pOut
->firstMipIdInTail
, pIn
->numMipLevels
- 1);
4015 if (endingMipId
== 0)
4017 const Dim3d tailMaxDim
= GetMipTailDim(pIn
->resourceType
,
4023 pOut
->epitchIsHeight
= TRUE
;
4024 pOut
->pitch
= tailMaxDim
.w
;
4025 pOut
->height
= tailMaxDim
.h
;
4026 pOut
->numSlices
= IsThick(pIn
->resourceType
, pIn
->swizzleMode
) ?
4027 tailMaxDim
.d
: pIn
->numSlices
;
4028 pOut
->mipChainInTail
= TRUE
;
4032 UINT_32 mip0WidthInBlk
= pOut
->pitch
/ pOut
->blockWidth
;
4033 UINT_32 mip0HeightInBlk
= pOut
->height
/ pOut
->blockHeight
;
4035 AddrMajorMode majorMode
= GetMajorMode(pIn
->resourceType
,
4039 pOut
->numSlices
/ pOut
->blockSlices
);
4040 if (majorMode
== ADDR_MAJOR_Y
)
4042 UINT_32 mip1WidthInBlk
= RoundHalf(mip0WidthInBlk
);
4044 if ((mip1WidthInBlk
== 1) && (endingMipId
> 2))
4049 pOut
->mipChainPitch
+= (mip1WidthInBlk
* pOut
->blockWidth
);
4051 pOut
->epitchIsHeight
= FALSE
;
4055 UINT_32 mip1HeightInBlk
= RoundHalf(mip0HeightInBlk
);
4057 if ((mip1HeightInBlk
== 1) && (endingMipId
> 2))
4062 pOut
->mipChainHeight
+= (mip1HeightInBlk
* pOut
->blockHeight
);
4064 pOut
->epitchIsHeight
= TRUE
;
4068 if (pOut
->pMipInfo
!= NULL
)
4070 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
4072 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
4074 Dim3d mipStartPos
= {0};
4075 UINT_32 mipTailOffsetInBytes
= 0;
4077 mipStartPos
= GetMipStartPos(pIn
->resourceType
,
4087 &mipTailOffsetInBytes
);
4089 UINT_32 pitchInBlock
=
4090 pOut
->mipChainPitch
/ pOut
->blockWidth
;
4091 UINT_32 sliceInBlock
=
4092 (pOut
->mipChainHeight
/ pOut
->blockHeight
) * pitchInBlock
;
4093 UINT_64 blockIndex
=
4094 mipStartPos
.d
* sliceInBlock
+ mipStartPos
.h
* pitchInBlock
+ mipStartPos
.w
;
4095 UINT_64 macroBlockOffset
=
4096 blockIndex
<< GetBlockSizeLog2(pIn
->swizzleMode
);
4098 pOut
->pMipInfo
[i
].macroBlockOffset
= macroBlockOffset
;
4099 pOut
->pMipInfo
[i
].mipTailOffset
= mipTailOffsetInBytes
;
4103 else if (pOut
->pMipInfo
!= NULL
)
4105 pOut
->pMipInfo
[0].pitch
= pOut
->pitch
;
4106 pOut
->pMipInfo
[0].height
= pOut
->height
;
4107 pOut
->pMipInfo
[0].depth
= IsTex3d(pIn
->resourceType
)? pOut
->numSlices
: 1;
4108 pOut
->pMipInfo
[0].offset
= 0;
4111 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->mipChainPitch
) * pOut
->mipChainHeight
*
4112 (pIn
->bpp
>> 3) * pIn
->numFrags
;
4113 pOut
->surfSize
= pOut
->sliceSize
* pOut
->mipChainSlice
;
4114 pOut
->baseAlign
= ComputeSurfaceBaseAlignTiled(pIn
->swizzleMode
);
4116 if ((IsBlock256b(pIn
->swizzleMode
) == FALSE
) &&
4117 (pIn
->flags
.color
|| pIn
->flags
.depth
|| pIn
->flags
.stencil
|| pIn
->flags
.fmask
) &&
4118 (pIn
->flags
.texture
== TRUE
) &&
4119 (pIn
->flags
.noMetadata
== FALSE
) &&
4120 (pIn
->flags
.metaPipeUnaligned
== FALSE
))
4122 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4123 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4124 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4125 // them, which may cause invalid metadata to be fetched.
4126 pOut
->baseAlign
= Max(pOut
->baseAlign
, m_pipeInterleaveBytes
* m_pipes
* m_se
);
4131 pOut
->baseAlign
= Max(pOut
->baseAlign
, PrtAlignment
);
4140 ************************************************************************************************************************
4141 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4144 * Internal function to calculate alignment for linear surface
4148 ************************************************************************************************************************
4150 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceInfoLinear(
4151 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
4152 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
4155 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
4157 UINT_32 actualHeight
= 0;
4158 UINT_32 elementBytes
= pIn
->bpp
>> 3;
4159 const UINT_32 alignment
= pIn
->flags
.prt
? PrtAlignment
: 256;
4161 if (IsTex1d(pIn
->resourceType
))
4163 if (pIn
->height
> 1)
4165 returnCode
= ADDR_INVALIDPARAMS
;
4169 const UINT_32 pitchAlignInElement
= alignment
/ elementBytes
;
4171 pitch
= PowTwoAlign(pIn
->width
, pitchAlignInElement
);
4172 actualHeight
= pIn
->numMipLevels
;
4174 if (pIn
->flags
.prt
== FALSE
)
4176 returnCode
= ApplyCustomizedPitchHeight(pIn
, elementBytes
, pitchAlignInElement
,
4177 &pitch
, &actualHeight
);
4180 if (returnCode
== ADDR_OK
)
4182 if (pOut
->pMipInfo
!= NULL
)
4184 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
4186 pOut
->pMipInfo
[i
].offset
= pitch
* elementBytes
* i
;
4187 pOut
->pMipInfo
[i
].pitch
= pitch
;
4188 pOut
->pMipInfo
[i
].height
= 1;
4189 pOut
->pMipInfo
[i
].depth
= 1;
4197 returnCode
= ComputeSurfaceLinearPadding(pIn
, &pitch
, &actualHeight
, pOut
->pMipInfo
);
4200 if ((pitch
== 0) || (actualHeight
== 0))
4202 returnCode
= ADDR_INVALIDPARAMS
;
4205 if (returnCode
== ADDR_OK
)
4207 pOut
->pitch
= pitch
;
4208 pOut
->height
= pIn
->height
;
4209 pOut
->numSlices
= pIn
->numSlices
;
4210 pOut
->mipChainPitch
= pitch
;
4211 pOut
->mipChainHeight
= actualHeight
;
4212 pOut
->mipChainSlice
= pOut
->numSlices
;
4213 pOut
->epitchIsHeight
= (pIn
->numMipLevels
> 1) ? TRUE
: FALSE
;
4214 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->pitch
) * actualHeight
* elementBytes
;
4215 pOut
->surfSize
= pOut
->sliceSize
* pOut
->numSlices
;
4216 pOut
->baseAlign
= (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
) ? (pIn
->bpp
/ 8) : alignment
;
4217 pOut
->blockWidth
= (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
) ? 1 : (256 / elementBytes
);
4218 pOut
->blockHeight
= 1;
4219 pOut
->blockSlices
= 1;
4222 // Post calculation validate
4223 ADDR_ASSERT(pOut
->sliceSize
> 0);
4229 ************************************************************************************************************************
4230 * Gfx9Lib::GetMipChainInfo
4233 * Internal function to get out information about mip chain
4236 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4237 ************************************************************************************************************************
4239 UINT_32
Gfx9Lib::GetMipChainInfo(
4240 AddrResourceType resourceType
,
4241 AddrSwizzleMode swizzleMode
,
4247 UINT_32 blockHeight
,
4249 UINT_32 numMipLevel
,
4250 ADDR2_MIP_INFO
* pMipInfo
) const
4252 const Dim3d tailMaxDim
=
4253 GetMipTailDim(resourceType
, swizzleMode
, blockWidth
, blockHeight
, blockDepth
);
4255 UINT_32 mipPitch
= mip0Width
;
4256 UINT_32 mipHeight
= mip0Height
;
4257 UINT_32 mipDepth
= IsTex3d(resourceType
) ? mip0Depth
: 1;
4259 UINT_32 firstMipIdInTail
= numMipLevel
;
4260 BOOL_32 inTail
= FALSE
;
4261 BOOL_32 finalDim
= FALSE
;
4262 BOOL_32 is3dThick
= IsThick(resourceType
, swizzleMode
);
4263 BOOL_32 is3dThin
= IsTex3d(resourceType
) && (is3dThick
== FALSE
);
4265 for (UINT_32 mipId
= 0; mipId
< numMipLevel
; mipId
++)
4269 if (finalDim
== FALSE
)
4275 mipSize
= mipPitch
* mipHeight
* mipDepth
* (bpp
>> 3);
4279 mipSize
= mipPitch
* mipHeight
* (bpp
>> 3);
4284 UINT_32 index
= Log2(bpp
>> 3);
4288 mipPitch
= Block256_3dZ
[index
].w
;
4289 mipHeight
= Block256_3dZ
[index
].h
;
4290 mipDepth
= Block256_3dZ
[index
].d
;
4294 mipPitch
= Block256_2d
[index
].w
;
4295 mipHeight
= Block256_2d
[index
].h
;
4304 inTail
= IsInMipTail(resourceType
, swizzleMode
, tailMaxDim
,
4305 mipPitch
, mipHeight
, mipDepth
);
4309 firstMipIdInTail
= mipId
;
4310 mipPitch
= tailMaxDim
.w
;
4311 mipHeight
= tailMaxDim
.h
;
4315 mipDepth
= tailMaxDim
.d
;
4320 mipPitch
= PowTwoAlign(mipPitch
, blockWidth
);
4321 mipHeight
= PowTwoAlign(mipHeight
, blockHeight
);
4325 mipDepth
= PowTwoAlign(mipDepth
, blockDepth
);
4330 if (pMipInfo
!= NULL
)
4332 pMipInfo
[mipId
].pitch
= mipPitch
;
4333 pMipInfo
[mipId
].height
= mipHeight
;
4334 pMipInfo
[mipId
].depth
= mipDepth
;
4335 pMipInfo
[mipId
].offset
= offset
;
4338 offset
+= (mipPitch
* mipHeight
* mipDepth
* (bpp
>> 3));
4344 mipDepth
= Max(mipDepth
>> 1, 1u);
4349 mipPitch
= Max(mipPitch
>> 1, 1u);
4350 mipHeight
= Max(mipHeight
>> 1, 1u);
4352 if (is3dThick
|| is3dThin
)
4354 mipDepth
= Max(mipDepth
>> 1, 1u);
4359 return firstMipIdInTail
;
4363 ************************************************************************************************************************
4364 * Gfx9Lib::GetMetaMiptailInfo
4367 * Get mip tail coordinate information.
4371 ************************************************************************************************************************
4373 VOID
Gfx9Lib::GetMetaMiptailInfo(
4374 ADDR2_META_MIP_INFO
* pInfo
, ///< [out] output structure to store per mip coord
4375 Dim3d mipCoord
, ///< [in] mip tail base coord
4376 UINT_32 numMipInTail
, ///< [in] number of mips in tail
4377 Dim3d
* pMetaBlkDim
///< [in] meta block width/height/depth
4380 BOOL_32 isThick
= (pMetaBlkDim
->d
> 1);
4381 UINT_32 mipWidth
= pMetaBlkDim
->w
;
4382 UINT_32 mipHeight
= pMetaBlkDim
->h
>> 1;
4383 UINT_32 mipDepth
= pMetaBlkDim
->d
;
4388 minInc
= (pMetaBlkDim
->h
>= 512) ? 128 : ((pMetaBlkDim
->h
== 256) ? 64 : 32);
4390 else if (pMetaBlkDim
->h
>= 1024)
4394 else if (pMetaBlkDim
->h
== 512)
4403 UINT_32 blk32MipId
= 0xFFFFFFFF;
4405 for (UINT_32 mip
= 0; mip
< numMipInTail
; mip
++)
4407 pInfo
[mip
].inMiptail
= TRUE
;
4408 pInfo
[mip
].startX
= mipCoord
.w
;
4409 pInfo
[mip
].startY
= mipCoord
.h
;
4410 pInfo
[mip
].startZ
= mipCoord
.d
;
4411 pInfo
[mip
].width
= mipWidth
;
4412 pInfo
[mip
].height
= mipHeight
;
4413 pInfo
[mip
].depth
= mipDepth
;
4417 if (blk32MipId
== 0xFFFFFFFF)
4422 mipCoord
.w
= pInfo
[blk32MipId
].startX
;
4423 mipCoord
.h
= pInfo
[blk32MipId
].startY
;
4424 mipCoord
.d
= pInfo
[blk32MipId
].startZ
;
4426 switch (mip
- blk32MipId
)
4429 mipCoord
.w
+= 32; // 16x16
4432 mipCoord
.h
+= 32; // 8x8
4435 mipCoord
.h
+= 32; // 4x4
4439 mipCoord
.h
+= 32; // 2x2
4443 mipCoord
.h
+= 32; // 1x1
4446 // The following are for BC/ASTC formats
4448 mipCoord
.h
+= 48; // 1/2 x 1/2
4451 mipCoord
.h
+= 48; // 1/4 x 1/4
4455 mipCoord
.h
+= 48; // 1/8 x 1/8
4459 mipCoord
.h
+= 48; // 1/16 x 1/16
4463 ADDR_ASSERT_ALWAYS();
4467 mipWidth
= ((mip
- blk32MipId
) == 0) ? 16 : 8;
4468 mipHeight
= mipWidth
;
4472 mipDepth
= mipWidth
;
4477 if (mipWidth
<= minInc
)
4479 // if we're below the minimal increment...
4482 // For 3d, just go in z direction
4483 mipCoord
.d
+= mipDepth
;
4487 // For 2d, first go across, then down
4488 if ((mipWidth
* 2) == minInc
)
4490 // if we're 2 mips below, that's when we go back in x, and down in y
4491 mipCoord
.w
-= minInc
;
4492 mipCoord
.h
+= minInc
;
4496 // otherwise, just go across in x
4497 mipCoord
.w
+= minInc
;
4503 // On even mip, go down, otherwise, go across
4506 mipCoord
.w
+= mipWidth
;
4510 mipCoord
.h
+= mipHeight
;
4513 // Divide the width by 2
4515 // After the first mip in tail, the mip is always a square
4516 mipHeight
= mipWidth
;
4517 // ...or for 3d, a cube
4520 mipDepth
= mipWidth
;
4527 ************************************************************************************************************************
4528 * Gfx9Lib::GetMipStartPos
4531 * Internal function to get out information about mip logical start position
4534 * logical start position in macro block width/heith/depth of one mip level within one slice
4535 ************************************************************************************************************************
4537 Dim3d
Gfx9Lib::GetMipStartPos(
4538 AddrResourceType resourceType
,
4539 AddrSwizzleMode swizzleMode
,
4544 UINT_32 blockHeight
,
4547 UINT_32 log2ElementBytes
,
4548 UINT_32
* pMipTailBytesOffset
) const
4550 Dim3d mipStartPos
= {0};
4551 const Dim3d tailMaxDim
= GetMipTailDim(resourceType
, swizzleMode
, blockWidth
, blockHeight
, blockDepth
);
4553 // Report mip in tail if Mip0 is already in mip tail
4554 BOOL_32 inMipTail
= IsInMipTail(resourceType
, swizzleMode
, tailMaxDim
, width
, height
, depth
);
4555 UINT_32 log2BlkSize
= GetBlockSizeLog2(swizzleMode
);
4556 UINT_32 mipIndexInTail
= mipId
;
4558 if (inMipTail
== FALSE
)
4560 // Mip 0 dimension, unit in block
4561 UINT_32 mipWidthInBlk
= width
/ blockWidth
;
4562 UINT_32 mipHeightInBlk
= height
/ blockHeight
;
4563 UINT_32 mipDepthInBlk
= depth
/ blockDepth
;
4564 AddrMajorMode majorMode
= GetMajorMode(resourceType
,
4570 UINT_32 endingMip
= mipId
+ 1;
4572 for (UINT_32 i
= 1; i
<= mipId
; i
++)
4574 if ((i
== 1) || (i
== 3))
4576 if (majorMode
== ADDR_MAJOR_Y
)
4578 mipStartPos
.w
+= mipWidthInBlk
;
4582 mipStartPos
.h
+= mipHeightInBlk
;
4587 if (majorMode
== ADDR_MAJOR_X
)
4589 mipStartPos
.w
+= mipWidthInBlk
;
4591 else if (majorMode
== ADDR_MAJOR_Y
)
4593 mipStartPos
.h
+= mipHeightInBlk
;
4597 mipStartPos
.d
+= mipDepthInBlk
;
4601 BOOL_32 inTail
= FALSE
;
4603 if (IsThick(resourceType
, swizzleMode
))
4605 UINT_32 dim
= log2BlkSize
% 3;
4610 (mipWidthInBlk
<= 2) && (mipHeightInBlk
== 1) && (mipDepthInBlk
<= 2);
4615 (mipWidthInBlk
== 1) && (mipHeightInBlk
<= 2) && (mipDepthInBlk
<= 2);
4620 (mipWidthInBlk
<= 2) && (mipHeightInBlk
<= 2) && (mipDepthInBlk
== 1);
4625 if (log2BlkSize
& 1)
4627 inTail
= (mipWidthInBlk
<= 2) && (mipHeightInBlk
== 1);
4631 inTail
= (mipWidthInBlk
== 1) && (mipHeightInBlk
<= 2);
4641 mipWidthInBlk
= RoundHalf(mipWidthInBlk
);
4642 mipHeightInBlk
= RoundHalf(mipHeightInBlk
);
4643 mipDepthInBlk
= RoundHalf(mipDepthInBlk
);
4646 if (mipId
>= endingMip
)
4649 mipIndexInTail
= mipId
- endingMip
;
4655 UINT_32 index
= mipIndexInTail
+ MaxMacroBits
- log2BlkSize
;
4656 ADDR_ASSERT(index
< sizeof(MipTailOffset256B
) / sizeof(UINT_32
));
4657 *pMipTailBytesOffset
= MipTailOffset256B
[index
] << 8;
4664 ************************************************************************************************************************
4665 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4668 * Internal function to calculate address from coord for tiled swizzle surface
4672 ************************************************************************************************************************
4674 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4675 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
4676 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
4679 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {0};
4680 localIn
.swizzleMode
= pIn
->swizzleMode
;
4681 localIn
.flags
= pIn
->flags
;
4682 localIn
.resourceType
= pIn
->resourceType
;
4683 localIn
.bpp
= pIn
->bpp
;
4684 localIn
.width
= Max(pIn
->unalignedWidth
, 1u);
4685 localIn
.height
= Max(pIn
->unalignedHeight
, 1u);
4686 localIn
.numSlices
= Max(pIn
->numSlices
, 1u);
4687 localIn
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
4688 localIn
.numSamples
= Max(pIn
->numSamples
, 1u);
4689 localIn
.numFrags
= Max(pIn
->numFrags
, 1u);
4690 if (localIn
.numMipLevels
<= 1)
4692 localIn
.pitchInElement
= pIn
->pitchInElement
;
4695 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut
= {0};
4696 ADDR_E_RETURNCODE returnCode
= ComputeSurfaceInfoTiled(&localIn
, &localOut
);
4698 BOOL_32 valid
= (returnCode
== ADDR_OK
) &&
4699 (IsThin(pIn
->resourceType
, pIn
->swizzleMode
) ||
4700 IsThick(pIn
->resourceType
, pIn
->swizzleMode
)) &&
4701 ((pIn
->pipeBankXor
== 0) || (IsXor(pIn
->swizzleMode
)));
4705 UINT_32 log2ElementBytes
= Log2(pIn
->bpp
>> 3);
4706 Dim3d mipStartPos
= {0};
4707 UINT_32 mipTailBytesOffset
= 0;
4709 if (pIn
->numMipLevels
> 1)
4711 // Mip-map chain cannot be MSAA surface
4712 ADDR_ASSERT((pIn
->numSamples
<= 1) && (pIn
->numFrags
<= 1));
4714 mipStartPos
= GetMipStartPos(pIn
->resourceType
,
4719 localOut
.blockWidth
,
4720 localOut
.blockHeight
,
4721 localOut
.blockSlices
,
4724 &mipTailBytesOffset
);
4727 UINT_32 interleaveOffset
= 0;
4728 UINT_32 pipeBits
= 0;
4729 UINT_32 pipeXor
= 0;
4730 UINT_32 bankBits
= 0;
4731 UINT_32 bankXor
= 0;
4733 if (IsThin(pIn
->resourceType
, pIn
->swizzleMode
))
4735 UINT_32 blockOffset
= 0;
4736 UINT_32 log2BlkSize
= GetBlockSizeLog2(pIn
->swizzleMode
);
4738 if (IsZOrderSwizzle(pIn
->swizzleMode
))
4740 // Morton generation
4741 if ((log2ElementBytes
== 0) || (log2ElementBytes
== 2))
4743 UINT_32 totalLowBits
= 6 - log2ElementBytes
;
4744 UINT_32 mortBits
= totalLowBits
/ 2;
4745 UINT_32 lowBitsValue
= MortonGen2d(pIn
->y
, pIn
->x
, mortBits
);
4746 // Are 9 bits enough?
4747 UINT_32 highBitsValue
=
4748 MortonGen2d(pIn
->x
>> mortBits
, pIn
->y
>> mortBits
, 9) << totalLowBits
;
4749 blockOffset
= lowBitsValue
| highBitsValue
;
4750 ADDR_ASSERT(blockOffset
== lowBitsValue
+ highBitsValue
);
4754 blockOffset
= MortonGen2d(pIn
->y
, pIn
->x
, 13);
4757 // Fill LSBs with sample bits
4758 if (pIn
->numSamples
> 1)
4760 blockOffset
*= pIn
->numSamples
;
4761 blockOffset
|= pIn
->sample
;
4764 // Shift according to BytesPP
4765 blockOffset
<<= log2ElementBytes
;
4769 // Micro block offset
4770 UINT_32 microBlockOffset
= ComputeSurface2DMicroBlockOffset(pIn
);
4771 blockOffset
= microBlockOffset
;
4773 // Micro block dimension
4774 ADDR_ASSERT(log2ElementBytes
< MaxNumOfBpp
);
4775 Dim2d microBlockDim
= Block256_2d
[log2ElementBytes
];
4776 // Morton generation, does 12 bit enough?
4778 MortonGen2d((pIn
->x
/ microBlockDim
.w
), (pIn
->y
/ microBlockDim
.h
), 12) << 8;
4780 // Sample bits start location
4781 UINT_32 sampleStart
= log2BlkSize
- Log2(pIn
->numSamples
);
4782 // Join sample bits information to the highest Macro block bits
4783 if (IsNonPrtXor(pIn
->swizzleMode
))
4785 // Non-prt-Xor : xor highest Macro block bits with sample bits
4786 blockOffset
= blockOffset
^ (pIn
->sample
<< sampleStart
);
4790 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4791 // after this op, the blockOffset only contains log2 Macro block size bits
4792 blockOffset
%= (1 << sampleStart
);
4793 blockOffset
|= (pIn
->sample
<< sampleStart
);
4794 ADDR_ASSERT((blockOffset
>> log2BlkSize
) == 0);
4798 if (IsXor(pIn
->swizzleMode
))
4800 // Mask off bits above Macro block bits to keep page synonyms working for prt
4801 if (IsPrt(pIn
->swizzleMode
))
4803 blockOffset
&= ((1 << log2BlkSize
) - 1);
4806 // Preserve offset inside pipe interleave
4807 interleaveOffset
= blockOffset
& ((1 << m_pipeInterleaveLog2
) - 1);
4808 blockOffset
>>= m_pipeInterleaveLog2
;
4811 pipeBits
= GetPipeXorBits(log2BlkSize
);
4813 pipeXor
= FoldXor2d(blockOffset
, pipeBits
);
4814 blockOffset
>>= pipeBits
;
4817 bankBits
= GetBankXorBits(log2BlkSize
);
4819 bankXor
= FoldXor2d(blockOffset
, bankBits
);
4820 blockOffset
>>= bankBits
;
4822 // Put all the part back together
4823 blockOffset
<<= bankBits
;
4824 blockOffset
|= bankXor
;
4825 blockOffset
<<= pipeBits
;
4826 blockOffset
|= pipeXor
;
4827 blockOffset
<<= m_pipeInterleaveLog2
;
4828 blockOffset
|= interleaveOffset
;
4831 ADDR_ASSERT((blockOffset
| mipTailBytesOffset
) == (blockOffset
+ mipTailBytesOffset
));
4832 ADDR_ASSERT((mipTailBytesOffset
== 0u) || (blockOffset
< (1u << log2BlkSize
)));
4834 blockOffset
|= mipTailBytesOffset
;
4836 if (IsNonPrtXor(pIn
->swizzleMode
) && (pIn
->numSamples
<= 1))
4838 // Apply slice xor if not MSAA/PRT
4839 blockOffset
^= (ReverseBitVector(pIn
->slice
, pipeBits
) << m_pipeInterleaveLog2
);
4840 blockOffset
^= (ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
) <<
4841 (m_pipeInterleaveLog2
+ pipeBits
));
4844 returnCode
= ApplyCustomerPipeBankXor(pIn
->swizzleMode
, pIn
->pipeBankXor
,
4845 bankBits
, pipeBits
, &blockOffset
);
4847 blockOffset
%= (1 << log2BlkSize
);
4849 UINT_32 pitchInMacroBlock
= localOut
.mipChainPitch
/ localOut
.blockWidth
;
4850 UINT_32 paddedHeightInMacroBlock
= localOut
.mipChainHeight
/ localOut
.blockHeight
;
4851 UINT_32 sliceSizeInMacroBlock
= pitchInMacroBlock
* paddedHeightInMacroBlock
;
4852 UINT_64 macroBlockIndex
=
4853 (pIn
->slice
+ mipStartPos
.d
) * sliceSizeInMacroBlock
+
4854 ((pIn
->y
/ localOut
.blockHeight
) + mipStartPos
.h
) * pitchInMacroBlock
+
4855 ((pIn
->x
/ localOut
.blockWidth
) + mipStartPos
.w
);
4857 pOut
->addr
= blockOffset
| (macroBlockIndex
<< log2BlkSize
);
4861 UINT_32 log2BlkSize
= GetBlockSizeLog2(pIn
->swizzleMode
);
4863 Dim3d microBlockDim
= Block1K_3d
[log2ElementBytes
];
4865 UINT_32 blockOffset
= MortonGen3d((pIn
->x
/ microBlockDim
.w
),
4866 (pIn
->y
/ microBlockDim
.h
),
4867 (pIn
->slice
/ microBlockDim
.d
),
4871 blockOffset
|= ComputeSurface3DMicroBlockOffset(pIn
);
4873 if (IsXor(pIn
->swizzleMode
))
4875 // Mask off bits above Macro block bits to keep page synonyms working for prt
4876 if (IsPrt(pIn
->swizzleMode
))
4878 blockOffset
&= ((1 << log2BlkSize
) - 1);
4881 // Preserve offset inside pipe interleave
4882 interleaveOffset
= blockOffset
& ((1 << m_pipeInterleaveLog2
) - 1);
4883 blockOffset
>>= m_pipeInterleaveLog2
;
4886 pipeBits
= GetPipeXorBits(log2BlkSize
);
4888 pipeXor
= FoldXor3d(blockOffset
, pipeBits
);
4889 blockOffset
>>= pipeBits
;
4892 bankBits
= GetBankXorBits(log2BlkSize
);
4894 bankXor
= FoldXor3d(blockOffset
, bankBits
);
4895 blockOffset
>>= bankBits
;
4897 // Put all the part back together
4898 blockOffset
<<= bankBits
;
4899 blockOffset
|= bankXor
;
4900 blockOffset
<<= pipeBits
;
4901 blockOffset
|= pipeXor
;
4902 blockOffset
<<= m_pipeInterleaveLog2
;
4903 blockOffset
|= interleaveOffset
;
4906 ADDR_ASSERT((blockOffset
| mipTailBytesOffset
) == (blockOffset
+ mipTailBytesOffset
));
4907 ADDR_ASSERT((mipTailBytesOffset
== 0u) || (blockOffset
< (1u << log2BlkSize
)));
4908 blockOffset
|= mipTailBytesOffset
;
4910 returnCode
= ApplyCustomerPipeBankXor(pIn
->swizzleMode
, pIn
->pipeBankXor
,
4911 bankBits
, pipeBits
, &blockOffset
);
4913 blockOffset
%= (1 << log2BlkSize
);
4915 UINT_32 xb
= pIn
->x
/ localOut
.blockWidth
+ mipStartPos
.w
;
4916 UINT_32 yb
= pIn
->y
/ localOut
.blockHeight
+ mipStartPos
.h
;
4917 UINT_32 zb
= pIn
->slice
/ localOut
.blockSlices
+ + mipStartPos
.d
;
4919 UINT_32 pitchInBlock
= localOut
.mipChainPitch
/ localOut
.blockWidth
;
4920 UINT_32 sliceSizeInBlock
=
4921 (localOut
.mipChainHeight
/ localOut
.blockHeight
) * pitchInBlock
;
4922 UINT_64 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
4924 pOut
->addr
= blockOffset
| (blockIndex
<< log2BlkSize
);
4929 returnCode
= ADDR_INVALIDPARAMS
;
4936 ************************************************************************************************************************
4937 * Gfx9Lib::ComputeSurfaceInfoLinear
4940 * Internal function to calculate padding for linear swizzle 2D/3D surface
4944 ************************************************************************************************************************
4946 ADDR_E_RETURNCODE
Gfx9Lib::ComputeSurfaceLinearPadding(
4947 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input srtucture
4948 UINT_32
* pMipmap0PaddedWidth
, ///< [out] padded width in element
4949 UINT_32
* pSlice0PaddedHeight
, ///< [out] padded height for HW
4950 ADDR2_MIP_INFO
* pMipInfo
///< [out] per mip information
4953 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
4955 UINT_32 elementBytes
= pIn
->bpp
>> 3;
4956 UINT_32 pitchAlignInElement
= 0;
4958 if (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
)
4960 ADDR_ASSERT(pIn
->numMipLevels
<= 1);
4961 ADDR_ASSERT(pIn
->numSlices
<= 1);
4962 pitchAlignInElement
= 1;
4966 pitchAlignInElement
= (256 / elementBytes
);
4969 UINT_32 mipChainWidth
= PowTwoAlign(pIn
->width
, pitchAlignInElement
);
4970 UINT_32 slice0PaddedHeight
= pIn
->height
;
4972 returnCode
= ApplyCustomizedPitchHeight(pIn
, elementBytes
, pitchAlignInElement
,
4973 &mipChainWidth
, &slice0PaddedHeight
);
4975 if (returnCode
== ADDR_OK
)
4977 UINT_32 mipChainHeight
= 0;
4978 UINT_32 mipHeight
= pIn
->height
;
4979 UINT_32 mipDepth
= (pIn
->resourceType
== ADDR_RSRC_TEX_3D
) ? pIn
->numSlices
: 1;
4981 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
4983 if (pMipInfo
!= NULL
)
4985 pMipInfo
[i
].offset
= mipChainWidth
* mipChainHeight
* elementBytes
;
4986 pMipInfo
[i
].pitch
= mipChainWidth
;
4987 pMipInfo
[i
].height
= mipHeight
;
4988 pMipInfo
[i
].depth
= mipDepth
;
4991 mipChainHeight
+= mipHeight
;
4992 mipHeight
= RoundHalf(mipHeight
);
4993 mipHeight
= Max(mipHeight
, 1u);
4996 *pMipmap0PaddedWidth
= mipChainWidth
;
4997 *pSlice0PaddedHeight
= (pIn
->numMipLevels
> 1) ? mipChainHeight
: slice0PaddedHeight
;
5004 ************************************************************************************************************************
5005 * Gfx9Lib::ComputeThinBlockDimension
5008 * Internal function to get thin block width/height/depth in element from surface input params.
5012 ************************************************************************************************************************
5014 VOID
Gfx9Lib::ComputeThinBlockDimension(
5020 AddrResourceType resourceType
,
5021 AddrSwizzleMode swizzleMode
) const
5023 ADDR_ASSERT(IsThin(resourceType
, swizzleMode
));
5025 const UINT_32 log2BlkSize
= GetBlockSizeLog2(swizzleMode
);
5026 const UINT_32 eleBytes
= bpp
>> 3;
5027 const UINT_32 microBlockSizeTableIndex
= Log2(eleBytes
);
5028 const UINT_32 log2blkSizeIn256B
= log2BlkSize
- 8;
5029 const UINT_32 widthAmp
= log2blkSizeIn256B
/ 2;
5030 const UINT_32 heightAmp
= log2blkSizeIn256B
- widthAmp
;
5032 ADDR_ASSERT(microBlockSizeTableIndex
< sizeof(Block256_2d
) / sizeof(Block256_2d
[0]));
5034 *pWidth
= (Block256_2d
[microBlockSizeTableIndex
].w
<< widthAmp
);
5035 *pHeight
= (Block256_2d
[microBlockSizeTableIndex
].h
<< heightAmp
);
5040 const UINT_32 log2sample
= Log2(numSamples
);
5041 const UINT_32 q
= log2sample
>> 1;
5042 const UINT_32 r
= log2sample
& 1;
5044 if (log2BlkSize
& 1)
5047 *pHeight
>>= (q
+ r
);
5051 *pWidth
>>= (q
+ r
);