2 * Copyright © 2007-2018 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
34 #include "gfx9addrlib.h"
36 #include "gfx9_gb_reg.h"
38 #include "amdgpu_asic_addr.h"
40 #include "util/macros.h"
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
49 ************************************************************************************************************************
53 * Creates an Gfx9Lib object.
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
59 Addr::Lib
* Gfx9HwlInit(const Client
* pClient
)
61 return V2::Gfx9Lib::CreateObj(pClient
);
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
71 const SwizzleModeFlags
Gfx9Lib::SwizzleModeTable
[ADDR_SW_MAX_TYPE
] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
88 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
89 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
90 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
91 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
108 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
109 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
110 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
111 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
115 const UINT_32
Gfx9Lib::MipTailOffset256B
[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
116 8, 6, 5, 4, 3, 2, 1, 0};
118 const Dim3d
Gfx9Lib::Block256_3dS
[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
120 const Dim3d
Gfx9Lib::Block256_3dZ
[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
123 ************************************************************************************************************************
129 ************************************************************************************************************************
131 Gfx9Lib::Gfx9Lib(const Client
* pClient
)
136 m_class
= AI_ADDRLIB
;
137 memset(&m_settings
, 0, sizeof(m_settings
));
138 memcpy(m_swizzleModeTable
, SwizzleModeTable
, sizeof(SwizzleModeTable
));
139 m_metaEqOverrideIndex
= 0;
140 memset(m_cachedMetaEqKey
, 0, sizeof(m_cachedMetaEqKey
));
144 ************************************************************************************************************************
149 ************************************************************************************************************************
156 ************************************************************************************************************************
157 * Gfx9Lib::HwlComputeHtileInfo
160 * Interface function stub of AddrComputeHtilenfo
164 ************************************************************************************************************************
166 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileInfo(
167 const ADDR2_COMPUTE_HTILE_INFO_INPUT
* pIn
, ///< [in] input structure
168 ADDR2_COMPUTE_HTILE_INFO_OUTPUT
* pOut
///< [out] output structure
171 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
174 UINT_32 numRbTotal
= pIn
->hTileFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
176 UINT_32 numCompressBlkPerMetaBlk
, numCompressBlkPerMetaBlkLog2
;
178 if ((numPipeTotal
== 1) && (numRbTotal
== 1))
180 numCompressBlkPerMetaBlkLog2
= 10;
184 if (m_settings
.applyAliasFix
)
186 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ Max(10u, m_pipeInterleaveLog2
);
190 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ 10;
194 numCompressBlkPerMetaBlk
= 1 << numCompressBlkPerMetaBlkLog2
;
196 Dim3d metaBlkDim
= {8, 8, 1};
197 UINT_32 totalAmpBits
= numCompressBlkPerMetaBlkLog2
;
198 UINT_32 widthAmp
= (pIn
->numMipLevels
> 1) ? (totalAmpBits
>> 1) : RoundHalf(totalAmpBits
);
199 UINT_32 heightAmp
= totalAmpBits
- widthAmp
;
200 metaBlkDim
.w
<<= widthAmp
;
201 metaBlkDim
.h
<<= heightAmp
;
204 Dim3d metaBlkDimDbg
= {8, 8, 1};
205 for (UINT_32 index
= 0; index
< numCompressBlkPerMetaBlkLog2
; index
++)
207 if ((metaBlkDimDbg
.h
< metaBlkDimDbg
.w
) ||
208 ((pIn
->numMipLevels
> 1) && (metaBlkDimDbg
.h
== metaBlkDimDbg
.w
)))
210 metaBlkDimDbg
.h
<<= 1;
214 metaBlkDimDbg
.w
<<= 1;
217 ADDR_ASSERT((metaBlkDimDbg
.w
== metaBlkDim
.w
) && (metaBlkDimDbg
.h
== metaBlkDim
.h
));
224 GetMetaMipInfo(pIn
->numMipLevels
, &metaBlkDim
, FALSE
, pOut
->pMipInfo
,
225 pIn
->unalignedWidth
, pIn
->unalignedHeight
, pIn
->numSlices
,
226 &numMetaBlkX
, &numMetaBlkY
, &numMetaBlkZ
);
228 const UINT_32 metaBlkSize
= numCompressBlkPerMetaBlk
<< 2;
229 UINT_32 align
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
231 if ((IsXor(pIn
->swizzleMode
) == FALSE
) && (numPipeTotal
> 2))
233 align
*= (numPipeTotal
>> 1);
236 align
= Max(align
, metaBlkSize
);
238 if (m_settings
.metaBaseAlignFix
)
240 align
= Max(align
, GetBlockSize(pIn
->swizzleMode
));
243 if (m_settings
.htileAlignFix
)
245 const INT_32 metaBlkSizeLog2
= numCompressBlkPerMetaBlkLog2
+ 2;
246 const INT_32 htileCachelineSizeLog2
= 11;
247 const INT_32 maxNumOfRbMaskBits
= 1 + Log2(numPipeTotal
) + Log2(numRbTotal
);
249 INT_32 rbMaskPadding
= Max(0, htileCachelineSizeLog2
- (metaBlkSizeLog2
- maxNumOfRbMaskBits
));
251 align
<<= rbMaskPadding
;
254 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
255 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
256 pOut
->sliceSize
= numMetaBlkX
* numMetaBlkY
* metaBlkSize
;
258 pOut
->metaBlkWidth
= metaBlkDim
.w
;
259 pOut
->metaBlkHeight
= metaBlkDim
.h
;
260 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
262 pOut
->baseAlign
= align
;
263 pOut
->htileBytes
= PowTwoAlign(pOut
->sliceSize
* numMetaBlkZ
, align
);
269 ************************************************************************************************************************
270 * Gfx9Lib::HwlComputeCmaskInfo
273 * Interface function stub of AddrComputeCmaskInfo
277 ************************************************************************************************************************
279 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeCmaskInfo(
280 const ADDR2_COMPUTE_CMASK_INFO_INPUT
* pIn
, ///< [in] input structure
281 ADDR2_COMPUTE_CMASK_INFO_OUTPUT
* pOut
///< [out] output structure
284 // TODO: Clarify with AddrLib team
285 // ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
287 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pIn
->cMaskFlags
.pipeAligned
,
290 UINT_32 numRbTotal
= pIn
->cMaskFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
292 UINT_32 numCompressBlkPerMetaBlkLog2
, numCompressBlkPerMetaBlk
;
294 if ((numPipeTotal
== 1) && (numRbTotal
== 1))
296 numCompressBlkPerMetaBlkLog2
= 13;
300 if (m_settings
.applyAliasFix
)
302 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ Max(10u, m_pipeInterleaveLog2
);
306 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ 10;
309 numCompressBlkPerMetaBlkLog2
= Max(numCompressBlkPerMetaBlkLog2
, 13u);
312 numCompressBlkPerMetaBlk
= 1 << numCompressBlkPerMetaBlkLog2
;
314 Dim2d metaBlkDim
= {8, 8};
315 UINT_32 totalAmpBits
= numCompressBlkPerMetaBlkLog2
;
316 UINT_32 heightAmp
= totalAmpBits
>> 1;
317 UINT_32 widthAmp
= totalAmpBits
- heightAmp
;
318 metaBlkDim
.w
<<= widthAmp
;
319 metaBlkDim
.h
<<= heightAmp
;
322 Dim2d metaBlkDimDbg
= {8, 8};
323 for (UINT_32 index
= 0; index
< numCompressBlkPerMetaBlkLog2
; index
++)
325 if (metaBlkDimDbg
.h
< metaBlkDimDbg
.w
)
327 metaBlkDimDbg
.h
<<= 1;
331 metaBlkDimDbg
.w
<<= 1;
334 ADDR_ASSERT((metaBlkDimDbg
.w
== metaBlkDim
.w
) && (metaBlkDimDbg
.h
== metaBlkDim
.h
));
337 UINT_32 numMetaBlkX
= (pIn
->unalignedWidth
+ metaBlkDim
.w
- 1) / metaBlkDim
.w
;
338 UINT_32 numMetaBlkY
= (pIn
->unalignedHeight
+ metaBlkDim
.h
- 1) / metaBlkDim
.h
;
339 UINT_32 numMetaBlkZ
= Max(pIn
->numSlices
, 1u);
341 UINT_32 sizeAlign
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
343 if (m_settings
.metaBaseAlignFix
)
345 sizeAlign
= Max(sizeAlign
, GetBlockSize(pIn
->swizzleMode
));
348 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
349 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
350 pOut
->sliceSize
= (numMetaBlkX
* numMetaBlkY
* numCompressBlkPerMetaBlk
) >> 1;
351 pOut
->cmaskBytes
= PowTwoAlign(pOut
->sliceSize
* numMetaBlkZ
, sizeAlign
);
352 pOut
->baseAlign
= Max(numCompressBlkPerMetaBlk
>> 1, sizeAlign
);
354 pOut
->metaBlkWidth
= metaBlkDim
.w
;
355 pOut
->metaBlkHeight
= metaBlkDim
.h
;
357 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
363 ************************************************************************************************************************
364 * Gfx9Lib::GetMetaMipInfo
371 ************************************************************************************************************************
373 VOID
Gfx9Lib::GetMetaMipInfo(
374 UINT_32 numMipLevels
, ///< [in] number of mip levels
375 Dim3d
* pMetaBlkDim
, ///< [in] meta block dimension
376 BOOL_32 dataThick
, ///< [in] data surface is thick
377 ADDR2_META_MIP_INFO
* pInfo
, ///< [out] meta mip info
378 UINT_32 mip0Width
, ///< [in] mip0 width
379 UINT_32 mip0Height
, ///< [in] mip0 height
380 UINT_32 mip0Depth
, ///< [in] mip0 depth
381 UINT_32
* pNumMetaBlkX
, ///< [out] number of metablock X in mipchain
382 UINT_32
* pNumMetaBlkY
, ///< [out] number of metablock Y in mipchain
383 UINT_32
* pNumMetaBlkZ
) ///< [out] number of metablock Z in mipchain
386 UINT_32 numMetaBlkX
= (mip0Width
+ pMetaBlkDim
->w
- 1) / pMetaBlkDim
->w
;
387 UINT_32 numMetaBlkY
= (mip0Height
+ pMetaBlkDim
->h
- 1) / pMetaBlkDim
->h
;
388 UINT_32 numMetaBlkZ
= (mip0Depth
+ pMetaBlkDim
->d
- 1) / pMetaBlkDim
->d
;
389 UINT_32 tailWidth
= pMetaBlkDim
->w
;
390 UINT_32 tailHeight
= pMetaBlkDim
->h
>> 1;
391 UINT_32 tailDepth
= pMetaBlkDim
->d
;
392 BOOL_32 inTail
= FALSE
;
393 AddrMajorMode major
= ADDR_MAJOR_MAX_TYPE
;
395 if (numMipLevels
> 1)
397 if (dataThick
&& (numMetaBlkZ
> numMetaBlkX
) && (numMetaBlkZ
> numMetaBlkY
))
400 major
= ADDR_MAJOR_Z
;
402 else if (numMetaBlkX
>= numMetaBlkY
)
405 major
= ADDR_MAJOR_X
;
410 major
= ADDR_MAJOR_Y
;
413 inTail
= ((mip0Width
<= tailWidth
) &&
414 (mip0Height
<= tailHeight
) &&
415 ((dataThick
== FALSE
) || (mip0Depth
<= tailDepth
)));
423 if (major
== ADDR_MAJOR_Z
)
426 pMipDim
= &numMetaBlkY
;
427 pOrderDim
= &numMetaBlkZ
;
430 else if (major
== ADDR_MAJOR_X
)
433 pMipDim
= &numMetaBlkY
;
434 pOrderDim
= &numMetaBlkX
;
440 pMipDim
= &numMetaBlkX
;
441 pOrderDim
= &numMetaBlkY
;
445 if ((*pMipDim
< 3) && (*pOrderDim
> orderLimit
) && (numMipLevels
> 3))
451 *pMipDim
+= ((*pMipDim
/ 2) + (*pMipDim
& 1));
458 UINT_32 mipWidth
= mip0Width
;
459 UINT_32 mipHeight
= mip0Height
;
460 UINT_32 mipDepth
= mip0Depth
;
461 Dim3d mipCoord
= {0};
463 for (UINT_32 mip
= 0; mip
< numMipLevels
; mip
++)
467 GetMetaMiptailInfo(&pInfo
[mip
], mipCoord
, numMipLevels
- mip
,
473 mipWidth
= PowTwoAlign(mipWidth
, pMetaBlkDim
->w
);
474 mipHeight
= PowTwoAlign(mipHeight
, pMetaBlkDim
->h
);
475 mipDepth
= PowTwoAlign(mipDepth
, pMetaBlkDim
->d
);
477 pInfo
[mip
].inMiptail
= FALSE
;
478 pInfo
[mip
].startX
= mipCoord
.w
;
479 pInfo
[mip
].startY
= mipCoord
.h
;
480 pInfo
[mip
].startZ
= mipCoord
.d
;
481 pInfo
[mip
].width
= mipWidth
;
482 pInfo
[mip
].height
= mipHeight
;
483 pInfo
[mip
].depth
= dataThick
? mipDepth
: 1;
485 if ((mip
>= 3) || (mip
& 1))
490 mipCoord
.w
+= mipWidth
;
493 mipCoord
.h
+= mipHeight
;
496 mipCoord
.d
+= mipDepth
;
507 mipCoord
.h
+= mipHeight
;
510 mipCoord
.w
+= mipWidth
;
513 mipCoord
.h
+= mipHeight
;
520 mipWidth
= Max(mipWidth
>> 1, 1u);
521 mipHeight
= Max(mipHeight
>> 1, 1u);
522 mipDepth
= Max(mipDepth
>> 1, 1u);
524 inTail
= ((mipWidth
<= tailWidth
) &&
525 (mipHeight
<= tailHeight
) &&
526 ((dataThick
== FALSE
) || (mipDepth
<= tailDepth
)));
531 *pNumMetaBlkX
= numMetaBlkX
;
532 *pNumMetaBlkY
= numMetaBlkY
;
533 *pNumMetaBlkZ
= numMetaBlkZ
;
537 ************************************************************************************************************************
538 * Gfx9Lib::HwlComputeDccInfo
541 * Interface function to compute DCC key info
545 ************************************************************************************************************************
547 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeDccInfo(
548 const ADDR2_COMPUTE_DCCINFO_INPUT
* pIn
, ///< [in] input structure
549 ADDR2_COMPUTE_DCCINFO_OUTPUT
* pOut
///< [out] output structure
552 BOOL_32 dataLinear
= IsLinear(pIn
->swizzleMode
);
553 BOOL_32 metaLinear
= pIn
->dccKeyFlags
.linear
;
554 BOOL_32 pipeAligned
= pIn
->dccKeyFlags
.pipeAligned
;
560 else if (metaLinear
== TRUE
)
565 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pipeAligned
, pIn
->swizzleMode
);
569 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
570 ADDR_ASSERT_ALWAYS();
572 pOut
->dccRamBaseAlign
= numPipeTotal
* m_pipeInterleaveBytes
;
573 pOut
->dccRamSize
= PowTwoAlign((pIn
->dataSurfaceSize
/ 256), pOut
->dccRamBaseAlign
);
577 BOOL_32 dataThick
= IsThick(pIn
->resourceType
, pIn
->swizzleMode
);
579 UINT_32 minMetaBlkSize
= dataThick
? 65536 : 4096;
581 UINT_32 numFrags
= Max(pIn
->numFrags
, 1u);
582 UINT_32 numSlices
= Max(pIn
->numSlices
, 1u);
584 minMetaBlkSize
/= numFrags
;
586 UINT_32 numCompressBlkPerMetaBlk
= minMetaBlkSize
;
588 UINT_32 numRbTotal
= pIn
->dccKeyFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
590 if ((numPipeTotal
> 1) || (numRbTotal
> 1))
592 const UINT_32 thinBlkSize
= 1 << (m_settings
.applyAliasFix
? Max(10u, m_pipeInterleaveLog2
) : 10);
594 numCompressBlkPerMetaBlk
=
595 Max(numCompressBlkPerMetaBlk
, m_se
* m_rbPerSe
* (dataThick
? 262144 : thinBlkSize
));
597 if (numCompressBlkPerMetaBlk
> 65536 * pIn
->bpp
)
599 numCompressBlkPerMetaBlk
= 65536 * pIn
->bpp
;
603 Dim3d compressBlkDim
= GetDccCompressBlk(pIn
->resourceType
, pIn
->swizzleMode
, pIn
->bpp
);
604 Dim3d metaBlkDim
= compressBlkDim
;
606 for (UINT_32 index
= 1; index
< numCompressBlkPerMetaBlk
; index
<<= 1)
608 if ((metaBlkDim
.h
< metaBlkDim
.w
) ||
609 ((pIn
->numMipLevels
> 1) && (metaBlkDim
.h
== metaBlkDim
.w
)))
611 if ((dataThick
== FALSE
) || (metaBlkDim
.h
<= metaBlkDim
.d
))
622 if ((dataThick
== FALSE
) || (metaBlkDim
.w
<= metaBlkDim
.d
))
637 GetMetaMipInfo(pIn
->numMipLevels
, &metaBlkDim
, dataThick
, pOut
->pMipInfo
,
638 pIn
->unalignedWidth
, pIn
->unalignedHeight
, numSlices
,
639 &numMetaBlkX
, &numMetaBlkY
, &numMetaBlkZ
);
641 UINT_32 sizeAlign
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
643 if (numFrags
> m_maxCompFrag
)
645 sizeAlign
*= (numFrags
/ m_maxCompFrag
);
648 if (m_settings
.metaBaseAlignFix
)
650 sizeAlign
= Max(sizeAlign
, GetBlockSize(pIn
->swizzleMode
));
653 pOut
->dccRamSize
= numMetaBlkX
* numMetaBlkY
* numMetaBlkZ
*
654 numCompressBlkPerMetaBlk
* numFrags
;
655 pOut
->dccRamSize
= PowTwoAlign(pOut
->dccRamSize
, sizeAlign
);
656 pOut
->dccRamBaseAlign
= Max(numCompressBlkPerMetaBlk
, sizeAlign
);
658 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
659 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
660 pOut
->depth
= numMetaBlkZ
* metaBlkDim
.d
;
662 pOut
->compressBlkWidth
= compressBlkDim
.w
;
663 pOut
->compressBlkHeight
= compressBlkDim
.h
;
664 pOut
->compressBlkDepth
= compressBlkDim
.d
;
666 pOut
->metaBlkWidth
= metaBlkDim
.w
;
667 pOut
->metaBlkHeight
= metaBlkDim
.h
;
668 pOut
->metaBlkDepth
= metaBlkDim
.d
;
670 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
671 pOut
->fastClearSizePerSlice
=
672 pOut
->metaBlkNumPerSlice
* numCompressBlkPerMetaBlk
* Min(numFrags
, m_maxCompFrag
);
679 ************************************************************************************************************************
680 * Gfx9Lib::HwlComputeMaxBaseAlignments
683 * Gets maximum alignments
686 ************************************************************************************************************************
688 UINT_32
Gfx9Lib::HwlComputeMaxBaseAlignments() const
690 return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB
);
694 ************************************************************************************************************************
695 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
698 * Gets maximum alignments for metadata
700 * maximum alignments for metadata
701 ************************************************************************************************************************
703 UINT_32
Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
705 // Max base alignment for Htile
706 const UINT_32 maxNumPipeTotal
= GetPipeNumForMetaAddressing(TRUE
, ADDR_SW_64KB_Z
);
707 const UINT_32 maxNumRbTotal
= m_se
* m_rbPerSe
;
709 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
710 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
711 ADDR_ASSERT((m_settings
.applyAliasFix
== FALSE
) || (m_pipeInterleaveLog2
<= 10u));
712 const UINT_32 maxNumCompressBlkPerMetaBlk
= 1u << (m_seLog2
+ m_rbPerSeLog2
+ 10u);
714 UINT_32 maxBaseAlignHtile
= maxNumPipeTotal
* maxNumRbTotal
* m_pipeInterleaveBytes
;
716 if (maxNumPipeTotal
> 2)
718 maxBaseAlignHtile
*= (maxNumPipeTotal
>> 1);
721 maxBaseAlignHtile
= Max(maxNumCompressBlkPerMetaBlk
<< 2, maxBaseAlignHtile
);
723 if (m_settings
.metaBaseAlignFix
)
725 maxBaseAlignHtile
= Max(maxBaseAlignHtile
, GetBlockSize(ADDR_SW_64KB
));
728 if (m_settings
.htileAlignFix
)
730 maxBaseAlignHtile
*= maxNumPipeTotal
;
733 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
735 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
736 UINT_32 maxBaseAlignDcc3D
= 65536;
738 if ((maxNumPipeTotal
> 1) || (maxNumRbTotal
> 1))
740 maxBaseAlignDcc3D
= Min(m_se
* m_rbPerSe
* 262144, 65536 * 128u);
743 // Max base alignment for Msaa Dcc
744 UINT_32 maxBaseAlignDccMsaa
= maxNumPipeTotal
* maxNumRbTotal
* m_pipeInterleaveBytes
* (8 / m_maxCompFrag
);
746 if (m_settings
.metaBaseAlignFix
)
748 maxBaseAlignDccMsaa
= Max(maxBaseAlignDccMsaa
, GetBlockSize(ADDR_SW_64KB
));
751 return Max(maxBaseAlignHtile
, Max(maxBaseAlignDccMsaa
, maxBaseAlignDcc3D
));
755 ************************************************************************************************************************
756 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
759 * Interface function stub of AddrComputeCmaskAddrFromCoord
763 ************************************************************************************************************************
765 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeCmaskAddrFromCoord(
766 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
767 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
769 ADDR2_COMPUTE_CMASK_INFO_INPUT input
= {0};
770 input
.size
= sizeof(input
);
771 input
.cMaskFlags
= pIn
->cMaskFlags
;
772 input
.colorFlags
= pIn
->colorFlags
;
773 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
774 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
775 input
.numSlices
= Max(pIn
->numSlices
, 1u);
776 input
.swizzleMode
= pIn
->swizzleMode
;
777 input
.resourceType
= pIn
->resourceType
;
779 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output
= {0};
780 output
.size
= sizeof(output
);
782 ADDR_E_RETURNCODE returnCode
= ComputeCmaskInfo(&input
, &output
);
784 if (returnCode
== ADDR_OK
)
786 UINT_32 fmaskBpp
= GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
);
787 UINT_32 fmaskElementBytesLog2
= Log2(fmaskBpp
>> 3);
788 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
789 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
791 MetaEqParams metaEqParams
= {0, fmaskElementBytesLog2
, 0, pIn
->cMaskFlags
,
792 Gfx9DataFmask
, pIn
->swizzleMode
, pIn
->resourceType
,
793 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0};
795 const CoordEq
* pMetaEq
= GetMetaEquation(metaEqParams
);
797 UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
798 UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
799 UINT_32 zb
= pIn
->slice
;
801 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
802 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
803 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
805 UINT_64 address
= pMetaEq
->solve(pIn
->x
, pIn
->y
, pIn
->slice
, 0, blockIndex
);
807 pOut
->addr
= address
>> 1;
808 pOut
->bitPosition
= static_cast<UINT_32
>((address
& 1) << 2);
810 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->cMaskFlags
.pipeAligned
,
813 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
815 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
822 ************************************************************************************************************************
823 * Gfx9Lib::HwlComputeHtileAddrFromCoord
826 * Interface function stub of AddrComputeHtileAddrFromCoord
830 ************************************************************************************************************************
832 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileAddrFromCoord(
833 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
834 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
836 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
838 if (pIn
->numMipLevels
> 1)
840 returnCode
= ADDR_NOTIMPLEMENTED
;
844 ADDR2_COMPUTE_HTILE_INFO_INPUT input
= {0};
845 input
.size
= sizeof(input
);
846 input
.hTileFlags
= pIn
->hTileFlags
;
847 input
.depthFlags
= pIn
->depthflags
;
848 input
.swizzleMode
= pIn
->swizzleMode
;
849 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
850 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
851 input
.numSlices
= Max(pIn
->numSlices
, 1u);
852 input
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
854 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output
= {0};
855 output
.size
= sizeof(output
);
857 returnCode
= ComputeHtileInfo(&input
, &output
);
859 if (returnCode
== ADDR_OK
)
861 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
862 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
863 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
864 UINT_32 numSamplesLog2
= Log2(pIn
->numSamples
);
866 MetaEqParams metaEqParams
= {0, elementBytesLog2
, numSamplesLog2
, pIn
->hTileFlags
,
867 Gfx9DataDepthStencil
, pIn
->swizzleMode
, ADDR_RSRC_TEX_2D
,
868 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0};
870 const CoordEq
* pMetaEq
= GetMetaEquation(metaEqParams
);
872 UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
873 UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
874 UINT_32 zb
= pIn
->slice
;
876 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
877 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
878 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
880 UINT_64 address
= pMetaEq
->solve(pIn
->x
, pIn
->y
, pIn
->slice
, 0, blockIndex
);
882 pOut
->addr
= address
>> 1;
884 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
887 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
889 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
897 ************************************************************************************************************************
898 * Gfx9Lib::HwlComputeHtileCoordFromAddr
901 * Interface function stub of AddrComputeHtileCoordFromAddr
905 ************************************************************************************************************************
907 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileCoordFromAddr(
908 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT
* pIn
, ///< [in] input structure
909 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
* pOut
) ///< [out] output structure
911 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
913 if (pIn
->numMipLevels
> 1)
915 returnCode
= ADDR_NOTIMPLEMENTED
;
919 ADDR2_COMPUTE_HTILE_INFO_INPUT input
= {0};
920 input
.size
= sizeof(input
);
921 input
.hTileFlags
= pIn
->hTileFlags
;
922 input
.swizzleMode
= pIn
->swizzleMode
;
923 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
924 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
925 input
.numSlices
= Max(pIn
->numSlices
, 1u);
926 input
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
928 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output
= {0};
929 output
.size
= sizeof(output
);
931 returnCode
= ComputeHtileInfo(&input
, &output
);
933 if (returnCode
== ADDR_OK
)
935 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
936 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
937 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
938 UINT_32 numSamplesLog2
= Log2(pIn
->numSamples
);
940 MetaEqParams metaEqParams
= {0, elementBytesLog2
, numSamplesLog2
, pIn
->hTileFlags
,
941 Gfx9DataDepthStencil
, pIn
->swizzleMode
, ADDR_RSRC_TEX_2D
,
942 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0};
944 const CoordEq
* pMetaEq
= GetMetaEquation(metaEqParams
);
946 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
949 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
951 UINT_64 nibbleAddress
= (pIn
->addr
^ (pipeXor
<< m_pipeInterleaveLog2
)) << 1;
953 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
954 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
956 UINT_32 x
, y
, z
, s
, m
;
957 pMetaEq
->solveAddr(nibbleAddress
, sliceSizeInBlock
, x
, y
, z
, s
, m
);
959 pOut
->slice
= m
/ sliceSizeInBlock
;
960 pOut
->y
= ((m
% sliceSizeInBlock
) / pitchInBlock
) * output
.metaBlkHeight
+ y
;
961 pOut
->x
= (m
% pitchInBlock
) * output
.metaBlkWidth
+ x
;
969 ************************************************************************************************************************
970 * Gfx9Lib::HwlComputeDccAddrFromCoord
973 * Interface function stub of AddrComputeDccAddrFromCoord
977 ************************************************************************************************************************
979 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeDccAddrFromCoord(
980 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
* pIn
,
981 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT
* pOut
)
983 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
985 if ((pIn
->numMipLevels
> 1) || (pIn
->mipId
> 1) || pIn
->dccKeyFlags
.linear
)
987 returnCode
= ADDR_NOTIMPLEMENTED
;
991 ADDR2_COMPUTE_DCCINFO_INPUT input
= {0};
992 input
.size
= sizeof(input
);
993 input
.dccKeyFlags
= pIn
->dccKeyFlags
;
994 input
.colorFlags
= pIn
->colorFlags
;
995 input
.swizzleMode
= pIn
->swizzleMode
;
996 input
.resourceType
= pIn
->resourceType
;
997 input
.bpp
= pIn
->bpp
;
998 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
999 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
1000 input
.numSlices
= Max(pIn
->numSlices
, 1u);
1001 input
.numFrags
= Max(pIn
->numFrags
, 1u);
1002 input
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
1004 ADDR2_COMPUTE_DCCINFO_OUTPUT output
= {0};
1005 output
.size
= sizeof(output
);
1007 returnCode
= ComputeDccInfo(&input
, &output
);
1009 if (returnCode
== ADDR_OK
)
1011 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
1012 UINT_32 numSamplesLog2
= Log2(pIn
->numFrags
);
1013 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
1014 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
1015 UINT_32 metaBlkDepthLog2
= Log2(output
.metaBlkDepth
);
1016 UINT_32 compBlkWidthLog2
= Log2(output
.compressBlkWidth
);
1017 UINT_32 compBlkHeightLog2
= Log2(output
.compressBlkHeight
);
1018 UINT_32 compBlkDepthLog2
= Log2(output
.compressBlkDepth
);
1020 MetaEqParams metaEqParams
= {pIn
->mipId
, elementBytesLog2
, numSamplesLog2
, pIn
->dccKeyFlags
,
1021 Gfx9DataColor
, pIn
->swizzleMode
, pIn
->resourceType
,
1022 metaBlkWidthLog2
, metaBlkHeightLog2
, metaBlkDepthLog2
,
1023 compBlkWidthLog2
, compBlkHeightLog2
, compBlkDepthLog2
};
1025 const CoordEq
* pMetaEq
= GetMetaEquation(metaEqParams
);
1027 UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
1028 UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
1029 UINT_32 zb
= pIn
->slice
/ output
.metaBlkDepth
;
1031 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
1032 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
1033 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
1035 UINT_64 address
= pMetaEq
->solve(pIn
->x
, pIn
->y
, pIn
->slice
, pIn
->sample
, blockIndex
);
1037 pOut
->addr
= address
>> 1;
1039 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->dccKeyFlags
.pipeAligned
,
1042 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
1044 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
1052 ************************************************************************************************************************
1053 * Gfx9Lib::HwlInitGlobalParams
1056 * Initializes global parameters
1059 * TRUE if all settings are valid
1061 ************************************************************************************************************************
1063 BOOL_32
Gfx9Lib::HwlInitGlobalParams(
1064 const ADDR_CREATE_INPUT
* pCreateIn
) ///< [in] create input
1066 BOOL_32 valid
= TRUE
;
1068 if (m_settings
.isArcticIsland
)
1070 GB_ADDR_CONFIG gbAddrConfig
;
1072 gbAddrConfig
.u32All
= pCreateIn
->regValue
.gbAddrConfig
;
1074 // These values are copied from CModel code
1075 switch (gbAddrConfig
.bits
.NUM_PIPES
)
1077 case ADDR_CONFIG_1_PIPE
:
1081 case ADDR_CONFIG_2_PIPE
:
1085 case ADDR_CONFIG_4_PIPE
:
1089 case ADDR_CONFIG_8_PIPE
:
1093 case ADDR_CONFIG_16_PIPE
:
1097 case ADDR_CONFIG_32_PIPE
:
1102 ADDR_ASSERT_ALWAYS();
1106 switch (gbAddrConfig
.bits
.PIPE_INTERLEAVE_SIZE
)
1108 case ADDR_CONFIG_PIPE_INTERLEAVE_256B
:
1109 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_256B
;
1110 m_pipeInterleaveLog2
= 8;
1112 case ADDR_CONFIG_PIPE_INTERLEAVE_512B
:
1113 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_512B
;
1114 m_pipeInterleaveLog2
= 9;
1116 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB
:
1117 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_1KB
;
1118 m_pipeInterleaveLog2
= 10;
1120 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB
:
1121 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_2KB
;
1122 m_pipeInterleaveLog2
= 11;
1125 ADDR_ASSERT_ALWAYS();
1129 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1130 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1131 ADDR_ASSERT(m_pipeInterleaveBytes
== ADDR_PIPEINTERLEAVE_256B
);
1133 switch (gbAddrConfig
.bits
.NUM_BANKS
)
1135 case ADDR_CONFIG_1_BANK
:
1139 case ADDR_CONFIG_2_BANK
:
1143 case ADDR_CONFIG_4_BANK
:
1147 case ADDR_CONFIG_8_BANK
:
1151 case ADDR_CONFIG_16_BANK
:
1156 ADDR_ASSERT_ALWAYS();
1160 switch (gbAddrConfig
.bits
.NUM_SHADER_ENGINES
)
1162 case ADDR_CONFIG_1_SHADER_ENGINE
:
1166 case ADDR_CONFIG_2_SHADER_ENGINE
:
1170 case ADDR_CONFIG_4_SHADER_ENGINE
:
1174 case ADDR_CONFIG_8_SHADER_ENGINE
:
1179 ADDR_ASSERT_ALWAYS();
1183 switch (gbAddrConfig
.bits
.NUM_RB_PER_SE
)
1185 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE
:
1189 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE
:
1193 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE
:
1198 ADDR_ASSERT_ALWAYS();
1202 switch (gbAddrConfig
.bits
.MAX_COMPRESSED_FRAGS
)
1204 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS
:
1206 m_maxCompFragLog2
= 0;
1208 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS
:
1210 m_maxCompFragLog2
= 1;
1212 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS
:
1214 m_maxCompFragLog2
= 2;
1216 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS
:
1218 m_maxCompFragLog2
= 3;
1221 ADDR_ASSERT_ALWAYS();
1225 m_blockVarSizeLog2
= pCreateIn
->regValue
.blockVarSizeLog2
;
1226 ADDR_ASSERT((m_blockVarSizeLog2
== 0) ||
1227 ((m_blockVarSizeLog2
>= 17u) && (m_blockVarSizeLog2
<= 20u)));
1228 m_blockVarSizeLog2
= Min(Max(17u, m_blockVarSizeLog2
), 20u);
1230 if ((m_rbPerSeLog2
== 1) &&
1231 (((m_pipesLog2
== 1) && ((m_seLog2
== 2) || (m_seLog2
== 3))) ||
1232 ((m_pipesLog2
== 2) && ((m_seLog2
== 1) || (m_seLog2
== 2)))))
1234 ADDR_ASSERT(m_settings
.isVega10
== FALSE
);
1235 ADDR_ASSERT(m_settings
.isRaven
== FALSE
);
1236 ADDR_ASSERT(m_settings
.isVega20
== FALSE
);
1238 if (m_settings
.isVega12
)
1240 m_settings
.htileCacheRbConflict
= 1;
1247 ADDR_NOT_IMPLEMENTED();
1252 InitEquationTable();
1259 ************************************************************************************************************************
1260 * Gfx9Lib::HwlConvertChipFamily
1263 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1266 ************************************************************************************************************************
1268 ChipFamily
Gfx9Lib::HwlConvertChipFamily(
1269 UINT_32 uChipFamily
, ///< [in] chip family defined in atiih.h
1270 UINT_32 uChipRevision
) ///< [in] chip revision defined in "asic_family"_id.h
1272 ChipFamily family
= ADDR_CHIP_FAMILY_AI
;
1274 switch (uChipFamily
)
1277 m_settings
.isArcticIsland
= 1;
1278 m_settings
.isVega10
= ASICREV_IS_VEGA10_P(uChipRevision
);
1279 m_settings
.isVega12
= ASICREV_IS_VEGA12_P(uChipRevision
);
1280 m_settings
.isVega20
= ASICREV_IS_VEGA20_P(uChipRevision
);
1281 m_settings
.isDce12
= 1;
1283 if (m_settings
.isVega10
== 0)
1285 m_settings
.htileAlignFix
= 1;
1286 m_settings
.applyAliasFix
= 1;
1289 m_settings
.metaBaseAlignFix
= 1;
1291 m_settings
.depthPipeXorDisable
= 1;
1294 m_settings
.isArcticIsland
= 1;
1296 if (ASICREV_IS_RAVEN(uChipRevision
))
1298 m_settings
.isRaven
= 1;
1300 m_settings
.depthPipeXorDisable
= 1;
1303 if (ASICREV_IS_RAVEN2(uChipRevision
))
1305 m_settings
.isRaven
= 1;
1308 if (m_settings
.isRaven
== 0)
1310 m_settings
.htileAlignFix
= 1;
1311 m_settings
.applyAliasFix
= 1;
1314 m_settings
.isDcn1
= m_settings
.isRaven
;
1316 m_settings
.metaBaseAlignFix
= 1;
1320 ADDR_ASSERT(!"This should be a Fusion");
1328 ************************************************************************************************************************
1329 * Gfx9Lib::InitRbEquation
1335 ************************************************************************************************************************
1337 VOID
Gfx9Lib::GetRbEquation(
1338 CoordEq
* pRbEq
, ///< [out] rb equation
1339 UINT_32 numRbPerSeLog2
, ///< [in] number of rb per shader engine
1340 UINT_32 numSeLog2
) ///< [in] number of shader engine
1343 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1344 UINT_32 rbRegion
= (numRbPerSeLog2
== 0) ? 5 : 4;
1345 Coordinate
cx('x', rbRegion
);
1346 Coordinate
cy('y', rbRegion
);
1349 UINT_32 numRbTotalLog2
= numRbPerSeLog2
+ numSeLog2
;
1351 // Clear the rb equation
1353 pRbEq
->resize(numRbTotalLog2
);
1355 if ((numSeLog2
> 0) && (numRbPerSeLog2
== 1))
1357 // Special case when more than 1 SE, and 2 RB per SE
1358 (*pRbEq
)[0].add(cx
);
1359 (*pRbEq
)[0].add(cy
);
1363 if (m_settings
.applyAliasFix
== false)
1365 (*pRbEq
)[0].add(cy
);
1368 (*pRbEq
)[0].add(cy
);
1372 UINT_32 numBits
= 2 * (numRbTotalLog2
- start
);
1374 for (UINT_32 i
= 0; i
< numBits
; i
++)
1377 start
+ (((start
+ i
) >= numRbTotalLog2
) ? (2 * (numRbTotalLog2
- start
) - i
- 1) : i
);
1381 (*pRbEq
)[idx
].add(cx
);
1386 (*pRbEq
)[idx
].add(cy
);
1393 ************************************************************************************************************************
1394 * Gfx9Lib::GetDataEquation
1397 * Get data equation for fmask and Z
1400 ************************************************************************************************************************
1402 VOID
Gfx9Lib::GetDataEquation(
1403 CoordEq
* pDataEq
, ///< [out] data surface equation
1404 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1405 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1406 AddrResourceType resourceType
, ///< [in] data surface resource type
1407 UINT_32 elementBytesLog2
, ///< [in] data surface element bytes
1408 UINT_32 numSamplesLog2
) ///< [in] data surface sample count
1411 Coordinate
cx('x', 0);
1412 Coordinate
cy('y', 0);
1413 Coordinate
cz('z', 0);
1414 Coordinate
cs('s', 0);
1416 // Clear the equation
1418 pDataEq
->resize(27);
1420 if (dataSurfaceType
== Gfx9DataColor
)
1422 if (IsLinear(swizzleMode
))
1424 Coordinate
cm('m', 0);
1426 pDataEq
->resize(49);
1428 for (UINT_32 i
= 0; i
< 49; i
++)
1430 (*pDataEq
)[i
].add(cm
);
1434 else if (IsThick(resourceType
, swizzleMode
))
1436 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1438 if (IsStandardSwizzle(resourceType
, swizzleMode
))
1440 // Standard 3d swizzle
1441 // Fill in bottom x bits
1442 for (i
= elementBytesLog2
; i
< 4; i
++)
1444 (*pDataEq
)[i
].add(cx
);
1447 // Fill in 2 bits of y and then z
1448 for (i
= 4; i
< 6; i
++)
1450 (*pDataEq
)[i
].add(cy
);
1453 for (i
= 6; i
< 8; i
++)
1455 (*pDataEq
)[i
].add(cz
);
1458 if (elementBytesLog2
< 2)
1460 // fill in z & y bit
1461 (*pDataEq
)[8].add(cz
);
1462 (*pDataEq
)[9].add(cy
);
1466 else if (elementBytesLog2
== 2)
1468 // fill in y and x bit
1469 (*pDataEq
)[8].add(cy
);
1470 (*pDataEq
)[9].add(cx
);
1477 (*pDataEq
)[8].add(cx
);
1479 (*pDataEq
)[9].add(cx
);
1486 UINT_32 m2dEnd
= (elementBytesLog2
==0) ? 3 : ((elementBytesLog2
< 4) ? 4 : 5);
1487 UINT_32 numZs
= (elementBytesLog2
== 0 || elementBytesLog2
== 4) ?
1488 2 : ((elementBytesLog2
== 1) ? 3 : 1);
1489 pDataEq
->mort2d(cx
, cy
, elementBytesLog2
, m2dEnd
);
1490 for (i
= m2dEnd
+ 1; i
<= m2dEnd
+ numZs
; i
++)
1492 (*pDataEq
)[i
].add(cz
);
1495 if ((elementBytesLog2
== 0) || (elementBytesLog2
== 3))
1498 (*pDataEq
)[6].add(cx
);
1499 (*pDataEq
)[7].add(cz
);
1503 else if (elementBytesLog2
== 2)
1506 (*pDataEq
)[6].add(cy
);
1507 (*pDataEq
)[7].add(cz
);
1512 (*pDataEq
)[8].add(cy
);
1513 (*pDataEq
)[9].add(cx
);
1517 // Fill in bit 10 and up
1518 pDataEq
->mort3d( cz
, cy
, cx
, 10 );
1520 else if (IsThin(resourceType
, swizzleMode
))
1522 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swizzleMode
);
1524 UINT_32 microYBits
= (8 - elementBytesLog2
) / 2;
1525 UINT_32 tileSplitStart
= blockSizeLog2
- numSamplesLog2
;
1527 // Fill in bottom x bits
1528 for (i
= elementBytesLog2
; i
< 4; i
++)
1530 (*pDataEq
)[i
].add(cx
);
1533 // Fill in bottom y bits
1534 for (i
= 4; i
< 4 + microYBits
; i
++)
1536 (*pDataEq
)[i
].add(cy
);
1539 // Fill in last of the micro_x bits
1540 for (i
= 4 + microYBits
; i
< 8; i
++)
1542 (*pDataEq
)[i
].add(cx
);
1545 // Fill in x/y bits below sample split
1546 pDataEq
->mort2d(cy
, cx
, 8, tileSplitStart
- 1);
1547 // Fill in sample bits
1548 for (i
= 0; i
< numSamplesLog2
; i
++)
1551 (*pDataEq
)[tileSplitStart
+ i
].add(cs
);
1553 // Fill in x/y bits above sample split
1554 if ((numSamplesLog2
& 1) ^ (blockSizeLog2
& 1))
1556 pDataEq
->mort2d(cx
, cy
, blockSizeLog2
);
1560 pDataEq
->mort2d(cy
, cx
, blockSizeLog2
);
1565 ADDR_ASSERT_ALWAYS();
1571 UINT_32 sampleStart
= elementBytesLog2
;
1572 UINT_32 pixelStart
= elementBytesLog2
+ numSamplesLog2
;
1573 UINT_32 ymajStart
= 6 + numSamplesLog2
;
1575 for (UINT_32 s
= 0; s
< numSamplesLog2
; s
++)
1578 (*pDataEq
)[sampleStart
+ s
].add(cs
);
1581 // Put in the x-major order pixel bits
1582 pDataEq
->mort2d(cx
, cy
, pixelStart
, ymajStart
- 1);
1583 // Put in the y-major order pixel bits
1584 pDataEq
->mort2d(cy
, cx
, ymajStart
);
1589 ************************************************************************************************************************
1590 * Gfx9Lib::GetPipeEquation
1596 ************************************************************************************************************************
1598 VOID
Gfx9Lib::GetPipeEquation(
1599 CoordEq
* pPipeEq
, ///< [out] pipe equation
1600 CoordEq
* pDataEq
, ///< [in] data equation
1601 UINT_32 pipeInterleaveLog2
, ///< [in] pipe interleave
1602 UINT_32 numPipeLog2
, ///< [in] number of pipes
1603 UINT_32 numSamplesLog2
, ///< [in] data surface sample count
1604 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1605 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1606 AddrResourceType resourceType
///< [in] data surface resource type
1609 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swizzleMode
);
1612 pDataEq
->copy(dataEq
);
1614 if (dataSurfaceType
== Gfx9DataColor
)
1616 INT_32 shift
= static_cast<INT_32
>(numSamplesLog2
);
1617 dataEq
.shift(-shift
, blockSizeLog2
- numSamplesLog2
);
1620 dataEq
.copy(*pPipeEq
, pipeInterleaveLog2
, numPipeLog2
);
1622 // This section should only apply to z/stencil, maybe fmask
1623 // If the pipe bit is below the comp block size,
1624 // then keep moving up the address until we find a bit that is above
1625 UINT_32 pipeStart
= 0;
1627 if (dataSurfaceType
!= Gfx9DataColor
)
1629 Coordinate
tileMin('x', 3);
1631 while (dataEq
[pipeInterleaveLog2
+ pipeStart
][0] < tileMin
)
1636 // if pipe is 0, then the first pipe bit is above the comp block size,
1637 // so we don't need to do anything
1638 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1639 // we will get the same pipe equation
1642 for (UINT_32 i
= 0; i
< numPipeLog2
; i
++)
1644 // Copy the jth bit above pipe interleave to the current pipe equation bit
1645 dataEq
[pipeInterleaveLog2
+ pipeStart
+ i
].copyto((*pPipeEq
)[i
]);
1650 if (IsPrt(swizzleMode
))
1652 // Clear out bits above the block size if prt's are enabled
1653 dataEq
.resize(blockSizeLog2
);
1657 if (IsXor(swizzleMode
))
1661 if (IsThick(resourceType
, swizzleMode
))
1665 dataEq
.copy(xorMask2
, pipeInterleaveLog2
+ numPipeLog2
, 2 * numPipeLog2
);
1667 xorMask
.resize(numPipeLog2
);
1669 for (UINT_32 pipeIdx
= 0; pipeIdx
< numPipeLog2
; pipeIdx
++)
1671 xorMask
[pipeIdx
].add(xorMask2
[2 * pipeIdx
]);
1672 xorMask
[pipeIdx
].add(xorMask2
[2 * pipeIdx
+ 1]);
1677 // Xor in the bits above the pipe+gpu bits
1678 dataEq
.copy(xorMask
, pipeInterleaveLog2
+ pipeStart
+ numPipeLog2
, numPipeLog2
);
1680 if ((numSamplesLog2
== 0) && (IsPrt(swizzleMode
) == FALSE
))
1684 // if 1xaa and not prt, then xor in the z bits
1686 xorMask2
.resize(numPipeLog2
);
1687 for (UINT_32 pipeIdx
= 0; pipeIdx
< numPipeLog2
; pipeIdx
++)
1689 co
.set('z', numPipeLog2
- 1 - pipeIdx
);
1690 xorMask2
[pipeIdx
].add(co
);
1693 pPipeEq
->xorin(xorMask2
);
1698 pPipeEq
->xorin(xorMask
);
1702 ************************************************************************************************************************
1703 * Gfx9Lib::GetMetaEquation
1706 * Get meta equation for cmask/htile/DCC
1708 * Pointer to a calculated meta equation
1709 ************************************************************************************************************************
1711 const CoordEq
* Gfx9Lib::GetMetaEquation(
1712 const MetaEqParams
& metaEqParams
)
1714 UINT_32 cachedMetaEqIndex
;
1716 for (cachedMetaEqIndex
= 0; cachedMetaEqIndex
< MaxCachedMetaEq
; cachedMetaEqIndex
++)
1718 if (memcmp(&metaEqParams
,
1719 &m_cachedMetaEqKey
[cachedMetaEqIndex
],
1720 static_cast<UINT_32
>(sizeof(metaEqParams
))) == 0)
1726 CoordEq
* pMetaEq
= NULL
;
1728 if (cachedMetaEqIndex
< MaxCachedMetaEq
)
1730 pMetaEq
= &m_cachedMetaEq
[cachedMetaEqIndex
];
1734 m_cachedMetaEqKey
[m_metaEqOverrideIndex
] = metaEqParams
;
1736 pMetaEq
= &m_cachedMetaEq
[m_metaEqOverrideIndex
++];
1738 m_metaEqOverrideIndex
%= MaxCachedMetaEq
;
1740 GenMetaEquation(pMetaEq
,
1741 metaEqParams
.maxMip
,
1742 metaEqParams
.elementBytesLog2
,
1743 metaEqParams
.numSamplesLog2
,
1744 metaEqParams
.metaFlag
,
1745 metaEqParams
.dataSurfaceType
,
1746 metaEqParams
.swizzleMode
,
1747 metaEqParams
.resourceType
,
1748 metaEqParams
.metaBlkWidthLog2
,
1749 metaEqParams
.metaBlkHeightLog2
,
1750 metaEqParams
.metaBlkDepthLog2
,
1751 metaEqParams
.compBlkWidthLog2
,
1752 metaEqParams
.compBlkHeightLog2
,
1753 metaEqParams
.compBlkDepthLog2
);
1760 ************************************************************************************************************************
1761 * Gfx9Lib::GenMetaEquation
1764 * Get meta equation for cmask/htile/DCC
1767 ************************************************************************************************************************
1769 VOID
Gfx9Lib::GenMetaEquation(
1770 CoordEq
* pMetaEq
, ///< [out] meta equation
1771 UINT_32 maxMip
, ///< [in] max mip Id
1772 UINT_32 elementBytesLog2
, ///< [in] data surface element bytes
1773 UINT_32 numSamplesLog2
, ///< [in] data surface sample count
1774 ADDR2_META_FLAGS metaFlag
, ///< [in] meta falg
1775 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1776 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1777 AddrResourceType resourceType
, ///< [in] data surface resource type
1778 UINT_32 metaBlkWidthLog2
, ///< [in] meta block width
1779 UINT_32 metaBlkHeightLog2
, ///< [in] meta block height
1780 UINT_32 metaBlkDepthLog2
, ///< [in] meta block depth
1781 UINT_32 compBlkWidthLog2
, ///< [in] compress block width
1782 UINT_32 compBlkHeightLog2
, ///< [in] compress block height
1783 UINT_32 compBlkDepthLog2
) ///< [in] compress block depth
1786 UINT_32 numPipeTotalLog2
= GetPipeLog2ForMetaAddressing(metaFlag
.pipeAligned
, swizzleMode
);
1787 UINT_32 pipeInterleaveLog2
= m_pipeInterleaveLog2
;
1789 // Get the correct data address and rb equation
1791 GetDataEquation(&dataEq
, dataSurfaceType
, swizzleMode
, resourceType
,
1792 elementBytesLog2
, numSamplesLog2
);
1794 // Get pipe and rb equations
1795 CoordEq pipeEquation
;
1796 GetPipeEquation(&pipeEquation
, &dataEq
, pipeInterleaveLog2
, numPipeTotalLog2
,
1797 numSamplesLog2
, dataSurfaceType
, swizzleMode
, resourceType
);
1798 numPipeTotalLog2
= pipeEquation
.getsize();
1800 if (metaFlag
.linear
)
1802 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1803 ADDR_ASSERT_ALWAYS();
1805 ADDR_ASSERT(dataSurfaceType
== Gfx9DataColor
);
1807 dataEq
.copy(*pMetaEq
);
1809 if (IsLinear(swizzleMode
))
1811 if (metaFlag
.pipeAligned
)
1813 // Remove the pipe bits
1814 INT_32 shift
= static_cast<INT_32
>(numPipeTotalLog2
);
1815 pMetaEq
->shift(-shift
, pipeInterleaveLog2
);
1817 // Divide by comp block size, which for linear (which is always color) is 256 B
1820 if (metaFlag
.pipeAligned
)
1822 // Put pipe bits back in
1823 pMetaEq
->shift(numPipeTotalLog2
, pipeInterleaveLog2
);
1825 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1827 pipeEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+ i
]);
1836 UINT_32 maxCompFragLog2
= static_cast<INT_32
>(m_maxCompFragLog2
);
1837 UINT_32 compFragLog2
=
1838 ((dataSurfaceType
== Gfx9DataColor
) && (numSamplesLog2
> maxCompFragLog2
)) ?
1839 maxCompFragLog2
: numSamplesLog2
;
1841 UINT_32 uncompFragLog2
= numSamplesLog2
- compFragLog2
;
1843 // Make sure the metaaddr is cleared
1845 pMetaEq
->resize(27);
1847 if (IsThick(resourceType
, swizzleMode
))
1849 Coordinate
cx('x', 0);
1850 Coordinate
cy('y', 0);
1851 Coordinate
cz('z', 0);
1855 pMetaEq
->mort3d(cy
, cx
, cz
);
1859 pMetaEq
->mort3d(cx
, cy
, cz
);
1864 Coordinate
cx('x', 0);
1865 Coordinate
cy('y', 0);
1870 pMetaEq
->mort2d(cy
, cx
, compFragLog2
);
1874 pMetaEq
->mort2d(cx
, cy
, compFragLog2
);
1877 //------------------------------------------------------------------------------------------------------------------------
1878 // Put the compressible fragments at the lsb
1879 // the uncompressible frags will be at the msb of the micro address
1880 //------------------------------------------------------------------------------------------------------------------------
1881 for (UINT_32 s
= 0; s
< compFragLog2
; s
++)
1884 (*pMetaEq
)[s
].add(cs
);
1888 // Keep a copy of the pipe equations
1889 CoordEq origPipeEquation
;
1890 pipeEquation
.copy(origPipeEquation
);
1893 // filter out everything under the compressed block size
1894 co
.set('x', compBlkWidthLog2
);
1895 pMetaEq
->Filter('<', co
, 0, 'x');
1896 co
.set('y', compBlkHeightLog2
);
1897 pMetaEq
->Filter('<', co
, 0, 'y');
1898 co
.set('z', compBlkDepthLog2
);
1899 pMetaEq
->Filter('<', co
, 0, 'z');
1901 // For non-color, filter out sample bits
1902 if (dataSurfaceType
!= Gfx9DataColor
)
1905 pMetaEq
->Filter('<', co
, 0, 's');
1908 // filter out everything above the metablock size
1909 co
.set('x', metaBlkWidthLog2
- 1);
1910 pMetaEq
->Filter('>', co
, 0, 'x');
1911 co
.set('y', metaBlkHeightLog2
- 1);
1912 pMetaEq
->Filter('>', co
, 0, 'y');
1913 co
.set('z', metaBlkDepthLog2
- 1);
1914 pMetaEq
->Filter('>', co
, 0, 'z');
1916 // filter out everything above the metablock size for the channel bits
1917 co
.set('x', metaBlkWidthLog2
- 1);
1918 pipeEquation
.Filter('>', co
, 0, 'x');
1919 co
.set('y', metaBlkHeightLog2
- 1);
1920 pipeEquation
.Filter('>', co
, 0, 'y');
1921 co
.set('z', metaBlkDepthLog2
- 1);
1922 pipeEquation
.Filter('>', co
, 0, 'z');
1924 // Make sure we still have the same number of channel bits
1925 if (pipeEquation
.getsize() != numPipeTotalLog2
)
1927 ADDR_ASSERT_ALWAYS();
1930 // Loop through all channel and rb bits,
1931 // and make sure these components exist in the metadata address
1932 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1934 for (UINT_32 j
= pipeEquation
[i
].getsize(); j
> 0; j
--)
1936 if (pMetaEq
->Exists(pipeEquation
[i
][j
- 1]) == FALSE
)
1938 ADDR_ASSERT_ALWAYS();
1943 const UINT_32 numSeLog2
= metaFlag
.rbAligned
? m_seLog2
: 0;
1944 const UINT_32 numRbPeSeLog2
= metaFlag
.rbAligned
? m_rbPerSeLog2
: 0;
1945 const UINT_32 numRbTotalLog2
= numRbPeSeLog2
+ numSeLog2
;
1946 CoordEq origRbEquation
;
1948 GetRbEquation(&origRbEquation
, numRbPeSeLog2
, numSeLog2
);
1950 CoordEq rbEquation
= origRbEquation
;
1952 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
1954 for (UINT_32 j
= rbEquation
[i
].getsize(); j
> 0; j
--)
1956 if (pMetaEq
->Exists(rbEquation
[i
][j
- 1]) == FALSE
)
1958 ADDR_ASSERT_ALWAYS();
1963 if (m_settings
.applyAliasFix
)
1968 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1969 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
1971 for (UINT_32 j
= 0; j
< numPipeTotalLog2
; j
++)
1973 BOOL_32 isRbEquationInPipeEquation
= FALSE
;
1975 if (m_settings
.applyAliasFix
)
1977 CoordTerm filteredPipeEq
;
1978 filteredPipeEq
= pipeEquation
[j
];
1980 filteredPipeEq
.Filter('>', co
, 0, 'z');
1982 isRbEquationInPipeEquation
= (rbEquation
[i
] == filteredPipeEq
);
1986 isRbEquationInPipeEquation
= (rbEquation
[i
] == pipeEquation
[j
]);
1989 if (isRbEquationInPipeEquation
)
1991 rbEquation
[i
].Clear();
1996 bool rbAppendedWithPipeBits
[1 << (MaxSeLog2
+ MaxRbPerSeLog2
)] = {};
1998 // Loop through each bit of the channel, get the smallest coordinate,
1999 // and remove it from the metaaddr, and rb_equation
2000 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
2002 pipeEquation
[i
].getsmallest(co
);
2004 UINT_32 old_size
= pMetaEq
->getsize();
2005 pMetaEq
->Filter('=', co
);
2006 UINT_32 new_size
= pMetaEq
->getsize();
2007 if (new_size
!= old_size
-1)
2009 ADDR_ASSERT_ALWAYS();
2011 pipeEquation
.remove(co
);
2012 for (UINT_32 j
= 0; j
< numRbTotalLog2
; j
++)
2014 if (rbEquation
[j
].remove(co
))
2016 // if we actually removed something from this bit, then add the remaining
2017 // channel bits, as these can be removed for this bit
2018 for (UINT_32 k
= 0; k
< pipeEquation
[i
].getsize(); k
++)
2020 if (pipeEquation
[i
][k
] != co
)
2022 rbEquation
[j
].add(pipeEquation
[i
][k
]);
2023 rbAppendedWithPipeBits
[j
] = true;
2030 // Loop through the rb bits and see what remain;
2031 // filter out the smallest coordinate if it remains
2032 UINT_32 rbBitsLeft
= 0;
2033 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
2035 BOOL_32 isRbEqAppended
= FALSE
;
2037 if (m_settings
.applyAliasFix
)
2039 isRbEqAppended
= (rbEquation
[i
].getsize() > (rbAppendedWithPipeBits
[i
] ? 1 : 0));
2043 isRbEqAppended
= (rbEquation
[i
].getsize() > 0);
2049 rbEquation
[i
].getsmallest(co
);
2050 UINT_32 old_size
= pMetaEq
->getsize();
2051 pMetaEq
->Filter('=', co
);
2052 UINT_32 new_size
= pMetaEq
->getsize();
2053 if (new_size
!= old_size
- 1)
2057 for (UINT_32 j
= i
+ 1; j
< numRbTotalLog2
; j
++)
2059 if (rbEquation
[j
].remove(co
))
2061 // if we actually removed something from this bit, then add the remaining
2062 // rb bits, as these can be removed for this bit
2063 for (UINT_32 k
= 0; k
< rbEquation
[i
].getsize(); k
++)
2065 if (rbEquation
[i
][k
] != co
)
2067 rbEquation
[j
].add(rbEquation
[i
][k
]);
2068 rbAppendedWithPipeBits
[j
] |= rbAppendedWithPipeBits
[i
];
2076 // capture the size of the metaaddr
2077 UINT_32 metaSize
= pMetaEq
->getsize();
2078 // resize to 49 bits...make this a nibble address
2079 pMetaEq
->resize(49);
2080 // Concatenate the macro address above the current address
2081 for (UINT_32 i
= metaSize
, j
= 0; i
< 49; i
++, j
++)
2084 (*pMetaEq
)[i
].add(co
);
2087 // Multiply by meta element size (in nibbles)
2088 if (dataSurfaceType
== Gfx9DataColor
)
2092 else if (dataSurfaceType
== Gfx9DataDepthStencil
)
2097 //------------------------------------------------------------------------------------------
2098 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2099 // Shift up from pipe interleave number of channel
2100 // and rb bits left, and uncompressed fragments
2101 //------------------------------------------------------------------------------------------
2103 pMetaEq
->shift(numPipeTotalLog2
+ rbBitsLeft
+ uncompFragLog2
, pipeInterleaveLog2
+ 1);
2105 // Put in the channel bits
2106 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
2108 origPipeEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+1 + i
]);
2111 // Put in remaining rb bits
2112 for (UINT_32 i
= 0, j
= 0; j
< rbBitsLeft
; i
= (i
+ 1) % numRbTotalLog2
)
2114 BOOL_32 isRbEqAppended
= FALSE
;
2116 if (m_settings
.applyAliasFix
)
2118 isRbEqAppended
= (rbEquation
[i
].getsize() > (rbAppendedWithPipeBits
[i
] ? 1 : 0));
2122 isRbEqAppended
= (rbEquation
[i
].getsize() > 0);
2127 origRbEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+ 1 + numPipeTotalLog2
+ j
]);
2128 // Mark any rb bit we add in to the rb mask
2133 //------------------------------------------------------------------------------------------
2134 // Put in the uncompressed fragment bits
2135 //------------------------------------------------------------------------------------------
2136 for (UINT_32 i
= 0; i
< uncompFragLog2
; i
++)
2138 co
.set('s', compFragLog2
+ i
);
2139 (*pMetaEq
)[pipeInterleaveLog2
+ 1 + numPipeTotalLog2
+ rbBitsLeft
+ i
].add(co
);
2145 ************************************************************************************************************************
2146 * Gfx9Lib::IsEquationSupported
2149 * Check if equation is supported for given swizzle mode and resource type.
2153 ************************************************************************************************************************
2155 BOOL_32
Gfx9Lib::IsEquationSupported(
2156 AddrResourceType rsrcType
,
2157 AddrSwizzleMode swMode
,
2158 UINT_32 elementBytesLog2
) const
2160 BOOL_32 supported
= (elementBytesLog2
< MaxElementBytesLog2
) &&
2161 (IsLinear(swMode
) == FALSE
) &&
2162 (((IsTex2d(rsrcType
) == TRUE
) &&
2163 ((elementBytesLog2
< 4) ||
2164 ((IsRotateSwizzle(swMode
) == FALSE
) &&
2165 (IsZOrderSwizzle(swMode
) == FALSE
)))) ||
2166 ((IsTex3d(rsrcType
) == TRUE
) &&
2167 (IsRotateSwizzle(swMode
) == FALSE
) &&
2168 (IsBlock256b(swMode
) == FALSE
)));
2174 ************************************************************************************************************************
2175 * Gfx9Lib::InitEquationTable
2178 * Initialize Equation table.
2182 ************************************************************************************************************************
2184 VOID
Gfx9Lib::InitEquationTable()
2186 memset(m_equationTable
, 0, sizeof(m_equationTable
));
2188 // Loop all possible resource type (2D/3D)
2189 for (UINT_32 rsrcTypeIdx
= 0; rsrcTypeIdx
< MaxRsrcType
; rsrcTypeIdx
++)
2191 AddrResourceType rsrcType
= static_cast<AddrResourceType
>(rsrcTypeIdx
+ ADDR_RSRC_TEX_2D
);
2193 // Loop all possible swizzle mode
2194 for (UINT_32 swModeIdx
= 0; swModeIdx
< MaxSwMode
; swModeIdx
++)
2196 AddrSwizzleMode swMode
= static_cast<AddrSwizzleMode
>(swModeIdx
);
2198 // Loop all possible bpp
2199 for (UINT_32 bppIdx
= 0; bppIdx
< MaxElementBytesLog2
; bppIdx
++)
2201 UINT_32 equationIndex
= ADDR_INVALID_EQUATION_INDEX
;
2203 // Check if the input is supported
2204 if (IsEquationSupported(rsrcType
, swMode
, bppIdx
))
2206 ADDR_EQUATION equation
;
2207 ADDR_E_RETURNCODE retCode
;
2209 memset(&equation
, 0, sizeof(ADDR_EQUATION
));
2211 // Generate the equation
2212 if (IsBlock256b(swMode
) && IsTex2d(rsrcType
))
2214 retCode
= ComputeBlock256Equation(rsrcType
, swMode
, bppIdx
, &equation
);
2216 else if (IsThin(rsrcType
, swMode
))
2218 retCode
= ComputeThinEquation(rsrcType
, swMode
, bppIdx
, &equation
);
2222 retCode
= ComputeThickEquation(rsrcType
, swMode
, bppIdx
, &equation
);
2225 // Only fill the equation into the table if the return code is ADDR_OK,
2226 // otherwise if the return code is not ADDR_OK, it indicates this is not
2227 // a valid input, we do nothing but just fill invalid equation index
2228 // into the lookup table.
2229 if (retCode
== ADDR_OK
)
2231 equationIndex
= m_numEquations
;
2232 ADDR_ASSERT(equationIndex
< EquationTableSize
);
2234 m_equationTable
[equationIndex
] = equation
;
2240 ADDR_ASSERT_ALWAYS();
2244 // Fill the index into the lookup table, if the combination is not supported
2245 // fill the invalid equation index
2246 m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][bppIdx
] = equationIndex
;
2253 ************************************************************************************************************************
2254 * Gfx9Lib::HwlGetEquationIndex
2257 * Interface function stub of GetEquationIndex
2261 ************************************************************************************************************************
2263 UINT_32
Gfx9Lib::HwlGetEquationIndex(
2264 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
,
2265 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
2268 AddrResourceType rsrcType
= pIn
->resourceType
;
2269 AddrSwizzleMode swMode
= pIn
->swizzleMode
;
2270 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
2271 UINT_32 index
= ADDR_INVALID_EQUATION_INDEX
;
2273 if (IsEquationSupported(rsrcType
, swMode
, elementBytesLog2
))
2275 UINT_32 rsrcTypeIdx
= static_cast<UINT_32
>(rsrcType
) - 1;
2276 UINT_32 swModeIdx
= static_cast<UINT_32
>(swMode
);
2278 index
= m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][elementBytesLog2
];
2281 if (pOut
->pMipInfo
!= NULL
)
2283 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
2285 pOut
->pMipInfo
[i
].equationIndex
= index
;
2293 ************************************************************************************************************************
2294 * Gfx9Lib::HwlComputeBlock256Equation
2297 * Interface function stub of ComputeBlock256Equation
2301 ************************************************************************************************************************
2303 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeBlock256Equation(
2304 AddrResourceType rsrcType
,
2305 AddrSwizzleMode swMode
,
2306 UINT_32 elementBytesLog2
,
2307 ADDR_EQUATION
* pEquation
) const
2309 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2311 pEquation
->numBits
= 8;
2314 for (; i
< elementBytesLog2
; i
++)
2316 InitChannel(1, 0 , i
, &pEquation
->addr
[i
]);
2319 ADDR_CHANNEL_SETTING
* pixelBit
= &pEquation
->addr
[elementBytesLog2
];
2321 const UINT_32 maxBitsUsed
= 4;
2322 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2323 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2325 for (i
= 0; i
< maxBitsUsed
; i
++)
2327 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2328 InitChannel(1, 1, i
, &y
[i
]);
2331 if (IsStandardSwizzle(rsrcType
, swMode
))
2333 switch (elementBytesLog2
)
2376 ADDR_ASSERT_ALWAYS();
2377 ret
= ADDR_INVALIDPARAMS
;
2381 else if (IsDisplaySwizzle(rsrcType
, swMode
))
2383 switch (elementBytesLog2
)
2426 ADDR_ASSERT_ALWAYS();
2427 ret
= ADDR_INVALIDPARAMS
;
2431 else if (IsRotateSwizzle(swMode
))
2433 switch (elementBytesLog2
)
2470 ADDR_ASSERT_ALWAYS();
2472 ret
= ADDR_INVALIDPARAMS
;
2478 ADDR_ASSERT_ALWAYS();
2479 ret
= ADDR_INVALIDPARAMS
;
2485 MAYBE_UNUSED Dim2d microBlockDim
= Block256_2d
[elementBytesLog2
];
2486 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation
->addr
, 8, 0)) ==
2487 (microBlockDim
.w
* (1 << elementBytesLog2
)));
2488 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation
->addr
, 8, 1)) == microBlockDim
.h
);
2495 ************************************************************************************************************************
2496 * Gfx9Lib::HwlComputeThinEquation
2499 * Interface function stub of ComputeThinEquation
2503 ************************************************************************************************************************
2505 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeThinEquation(
2506 AddrResourceType rsrcType
,
2507 AddrSwizzleMode swMode
,
2508 UINT_32 elementBytesLog2
,
2509 ADDR_EQUATION
* pEquation
) const
2511 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2513 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swMode
);
2515 UINT_32 maxXorBits
= blockSizeLog2
;
2516 if (IsNonPrtXor(swMode
))
2518 // For non-prt-xor, maybe need to initialize some more bits for xor
2519 // The highest xor bit used in equation will be max the following 3 items:
2520 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2521 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2524 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+ 2 * GetPipeXorBits(blockSizeLog2
));
2525 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+
2526 GetPipeXorBits(blockSizeLog2
) +
2527 2 * GetBankXorBits(blockSizeLog2
));
2530 const UINT_32 maxBitsUsed
= 14;
2531 ADDR_ASSERT((2 * maxBitsUsed
) >= maxXorBits
);
2532 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2533 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2535 const UINT_32 extraXorBits
= 16;
2536 ADDR_ASSERT(extraXorBits
>= maxXorBits
- blockSizeLog2
);
2537 ADDR_CHANNEL_SETTING xorExtra
[extraXorBits
] = {};
2539 for (UINT_32 i
= 0; i
< maxBitsUsed
; i
++)
2541 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2542 InitChannel(1, 1, i
, &y
[i
]);
2545 ADDR_CHANNEL_SETTING
* pixelBit
= pEquation
->addr
;
2547 for (UINT_32 i
= 0; i
< elementBytesLog2
; i
++)
2549 InitChannel(1, 0 , i
, &pixelBit
[i
]);
2554 UINT_32 lowBits
= 0;
2556 if (IsZOrderSwizzle(swMode
))
2558 if (elementBytesLog2
<= 3)
2560 for (UINT_32 i
= elementBytesLog2
; i
< 6; i
++)
2562 pixelBit
[i
] = (((i
- elementBytesLog2
) & 1) == 0) ? x
[xIdx
++] : y
[yIdx
++];
2569 ret
= ADDR_INVALIDPARAMS
;
2574 ret
= HwlComputeBlock256Equation(rsrcType
, swMode
, elementBytesLog2
, pEquation
);
2578 Dim2d microBlockDim
= Block256_2d
[elementBytesLog2
];
2579 xIdx
= Log2(microBlockDim
.w
);
2580 yIdx
= Log2(microBlockDim
.h
);
2587 for (UINT_32 i
= lowBits
; i
< blockSizeLog2
; i
++)
2589 pixelBit
[i
] = ((i
& 1) == 0) ? y
[yIdx
++] : x
[xIdx
++];
2592 for (UINT_32 i
= blockSizeLog2
; i
< maxXorBits
; i
++)
2594 xorExtra
[i
- blockSizeLog2
] = ((i
& 1) == 0) ? y
[yIdx
++] : x
[xIdx
++];
2600 UINT_32 pipeStart
= m_pipeInterleaveLog2
;
2601 UINT_32 pipeXorBits
= GetPipeXorBits(blockSizeLog2
);
2603 UINT_32 bankStart
= pipeStart
+ pipeXorBits
;
2604 UINT_32 bankXorBits
= GetBankXorBits(blockSizeLog2
);
2606 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2608 UINT_32 xor1BitPos
= pipeStart
+ 2 * pipeXorBits
- 1 - i
;
2609 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2610 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2612 InitChannel(&pEquation
->xor1
[pipeStart
+ i
], pXor1Src
);
2615 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2617 UINT_32 xor1BitPos
= bankStart
+ 2 * bankXorBits
- 1 - i
;
2618 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2619 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2621 InitChannel(&pEquation
->xor1
[bankStart
+ i
], pXor1Src
);
2624 if (IsPrt(swMode
) == FALSE
)
2626 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2628 InitChannel(1, 2, pipeXorBits
- i
- 1, &pEquation
->xor2
[pipeStart
+ i
]);
2631 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2633 InitChannel(1, 2, bankXorBits
- i
- 1 + pipeXorBits
, &pEquation
->xor2
[bankStart
+ i
]);
2638 pEquation
->numBits
= blockSizeLog2
;
2645 ************************************************************************************************************************
2646 * Gfx9Lib::HwlComputeThickEquation
2649 * Interface function stub of ComputeThickEquation
2653 ************************************************************************************************************************
2655 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeThickEquation(
2656 AddrResourceType rsrcType
,
2657 AddrSwizzleMode swMode
,
2658 UINT_32 elementBytesLog2
,
2659 ADDR_EQUATION
* pEquation
) const
2661 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2663 ADDR_ASSERT(IsTex3d(rsrcType
));
2665 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swMode
);
2667 UINT_32 maxXorBits
= blockSizeLog2
;
2668 if (IsNonPrtXor(swMode
))
2670 // For non-prt-xor, maybe need to initialize some more bits for xor
2671 // The highest xor bit used in equation will be max the following 3:
2672 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2673 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2676 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+ 3 * GetPipeXorBits(blockSizeLog2
));
2677 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+
2678 GetPipeXorBits(blockSizeLog2
) +
2679 3 * GetBankXorBits(blockSizeLog2
));
2682 for (UINT_32 i
= 0; i
< elementBytesLog2
; i
++)
2684 InitChannel(1, 0 , i
, &pEquation
->addr
[i
]);
2687 ADDR_CHANNEL_SETTING
* pixelBit
= &pEquation
->addr
[elementBytesLog2
];
2689 const UINT_32 maxBitsUsed
= 12;
2690 ADDR_ASSERT((3 * maxBitsUsed
) >= maxXorBits
);
2691 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2692 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2693 ADDR_CHANNEL_SETTING z
[maxBitsUsed
] = {};
2695 const UINT_32 extraXorBits
= 24;
2696 ADDR_ASSERT(extraXorBits
>= maxXorBits
- blockSizeLog2
);
2697 ADDR_CHANNEL_SETTING xorExtra
[extraXorBits
] = {};
2699 for (UINT_32 i
= 0; i
< maxBitsUsed
; i
++)
2701 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2702 InitChannel(1, 1, i
, &y
[i
]);
2703 InitChannel(1, 2, i
, &z
[i
]);
2706 if (IsZOrderSwizzle(swMode
))
2708 switch (elementBytesLog2
)
2761 ADDR_ASSERT_ALWAYS();
2762 ret
= ADDR_INVALIDPARAMS
;
2766 else if (IsStandardSwizzle(rsrcType
, swMode
))
2768 switch (elementBytesLog2
)
2821 ADDR_ASSERT_ALWAYS();
2822 ret
= ADDR_INVALIDPARAMS
;
2828 ADDR_ASSERT_ALWAYS();
2829 ret
= ADDR_INVALIDPARAMS
;
2834 Dim3d microBlockDim
= Block1K_3d
[elementBytesLog2
];
2835 UINT_32 xIdx
= Log2(microBlockDim
.w
);
2836 UINT_32 yIdx
= Log2(microBlockDim
.h
);
2837 UINT_32 zIdx
= Log2(microBlockDim
.d
);
2839 pixelBit
= pEquation
->addr
;
2841 const UINT_32 lowBits
= 10;
2842 ADDR_ASSERT(pEquation
->addr
[lowBits
- 1].valid
== 1);
2843 ADDR_ASSERT(pEquation
->addr
[lowBits
].valid
== 0);
2845 for (UINT_32 i
= lowBits
; i
< blockSizeLog2
; i
++)
2849 pixelBit
[i
] = x
[xIdx
++];
2851 else if ((i
% 3) == 1)
2853 pixelBit
[i
] = z
[zIdx
++];
2857 pixelBit
[i
] = y
[yIdx
++];
2861 for (UINT_32 i
= blockSizeLog2
; i
< maxXorBits
; i
++)
2865 xorExtra
[i
- blockSizeLog2
] = x
[xIdx
++];
2867 else if ((i
% 3) == 1)
2869 xorExtra
[i
- blockSizeLog2
] = z
[zIdx
++];
2873 xorExtra
[i
- blockSizeLog2
] = y
[yIdx
++];
2880 UINT_32 pipeStart
= m_pipeInterleaveLog2
;
2881 UINT_32 pipeXorBits
= GetPipeXorBits(blockSizeLog2
);
2882 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2884 UINT_32 xor1BitPos
= pipeStart
+ (3 * pipeXorBits
) - 1 - (2 * i
);
2885 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2886 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2888 InitChannel(&pEquation
->xor1
[pipeStart
+ i
], pXor1Src
);
2890 UINT_32 xor2BitPos
= pipeStart
+ (3 * pipeXorBits
) - 2 - (2 * i
);
2891 ADDR_CHANNEL_SETTING
* pXor2Src
= (xor2BitPos
< blockSizeLog2
) ?
2892 &pEquation
->addr
[xor2BitPos
] : &xorExtra
[xor2BitPos
- blockSizeLog2
];
2894 InitChannel(&pEquation
->xor2
[pipeStart
+ i
], pXor2Src
);
2897 UINT_32 bankStart
= pipeStart
+ pipeXorBits
;
2898 UINT_32 bankXorBits
= GetBankXorBits(blockSizeLog2
);
2899 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2901 UINT_32 xor1BitPos
= bankStart
+ (3 * bankXorBits
) - 1 - (2 * i
);
2902 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2903 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2905 InitChannel(&pEquation
->xor1
[bankStart
+ i
], pXor1Src
);
2907 UINT_32 xor2BitPos
= bankStart
+ (3 * bankXorBits
) - 2 - (2 * i
);
2908 ADDR_CHANNEL_SETTING
* pXor2Src
= (xor2BitPos
< blockSizeLog2
) ?
2909 &pEquation
->addr
[xor2BitPos
] : &xorExtra
[xor2BitPos
- blockSizeLog2
];
2911 InitChannel(&pEquation
->xor2
[bankStart
+ i
], pXor2Src
);
2915 pEquation
->numBits
= blockSizeLog2
;
2922 ************************************************************************************************************************
2923 * Gfx9Lib::IsValidDisplaySwizzleMode
2926 * Check if a swizzle mode is supported by display engine
2929 * TRUE is swizzle mode is supported by display engine
2930 ************************************************************************************************************************
2932 BOOL_32
Gfx9Lib::IsValidDisplaySwizzleMode(
2933 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
2935 BOOL_32 support
= FALSE
;
2937 const AddrResourceType resourceType
= pIn
->resourceType
;
2939 const AddrSwizzleMode swizzleMode
= pIn
->swizzleMode
;
2941 if (m_settings
.isDce12
)
2943 switch (swizzleMode
)
2945 case ADDR_SW_256B_D
:
2946 case ADDR_SW_256B_R
:
2947 support
= (pIn
->bpp
== 32);
2950 case ADDR_SW_LINEAR
:
2953 case ADDR_SW_64KB_D
:
2954 case ADDR_SW_64KB_R
:
2957 case ADDR_SW_4KB_D_X
:
2958 case ADDR_SW_4KB_R_X
:
2959 case ADDR_SW_64KB_D_X
:
2960 case ADDR_SW_64KB_R_X
:
2961 case ADDR_SW_VAR_D_X
:
2962 case ADDR_SW_VAR_R_X
:
2963 support
= (pIn
->bpp
<= 64);
2970 else if (m_settings
.isDcn1
)
2972 switch (swizzleMode
)
2975 case ADDR_SW_64KB_D
:
2977 case ADDR_SW_64KB_D_T
:
2978 case ADDR_SW_4KB_D_X
:
2979 case ADDR_SW_64KB_D_X
:
2980 case ADDR_SW_VAR_D_X
:
2981 support
= (pIn
->bpp
== 64);
2984 case ADDR_SW_LINEAR
:
2986 case ADDR_SW_64KB_S
:
2988 case ADDR_SW_64KB_S_T
:
2989 case ADDR_SW_4KB_S_X
:
2990 case ADDR_SW_64KB_S_X
:
2991 case ADDR_SW_VAR_S_X
:
2992 support
= (pIn
->bpp
<= 64);
3001 ADDR_NOT_IMPLEMENTED();
3008 ************************************************************************************************************************
3009 * Gfx9Lib::HwlComputePipeBankXor
3012 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3016 ************************************************************************************************************************
3018 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputePipeBankXor(
3019 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT
* pIn
,
3020 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
* pOut
) const
3022 if (IsXor(pIn
->swizzleMode
))
3024 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
3025 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
3026 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
3028 UINT_32 pipeXor
= 0;
3029 UINT_32 bankXor
= 0;
3031 const UINT_32 bankMask
= (1 << bankBits
) - 1;
3032 const UINT_32 index
= pIn
->surfIndex
& bankMask
;
3034 const UINT_32 bpp
= pIn
->flags
.fmask
?
3035 GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
) : GetElemLib()->GetBitsPerPixel(pIn
->format
);
3038 static const UINT_32 BankXorSmallBpp
[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3039 static const UINT_32 BankXorLargeBpp
[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3041 bankXor
= (bpp
<= 32) ? BankXorSmallBpp
[index
] : BankXorLargeBpp
[index
];
3043 else if (bankBits
> 0)
3045 UINT_32 bankIncrease
= (1 << (bankBits
- 1)) - 1;
3046 bankIncrease
= (bankIncrease
== 0) ? 1 : bankIncrease
;
3047 bankXor
= (index
* bankIncrease
) & bankMask
;
3050 pOut
->pipeBankXor
= (bankXor
<< pipeBits
) | pipeXor
;
3054 pOut
->pipeBankXor
= 0;
3061 ************************************************************************************************************************
3062 * Gfx9Lib::HwlComputeSlicePipeBankXor
3065 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3069 ************************************************************************************************************************
3071 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSlicePipeBankXor(
3072 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT
* pIn
,
3073 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
* pOut
) const
3075 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
3076 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
3077 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
3079 UINT_32 pipeXor
= ReverseBitVector(pIn
->slice
, pipeBits
);
3080 UINT_32 bankXor
= ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
);
3082 pOut
->pipeBankXor
= pIn
->basePipeBankXor
^ (pipeXor
| (bankXor
<< pipeBits
));
3088 ************************************************************************************************************************
3089 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3092 * Compute sub resource offset to support swizzle pattern
3096 ************************************************************************************************************************
3098 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3099 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
* pIn
,
3100 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
* pOut
) const
3102 ADDR_ASSERT(IsThin(pIn
->resourceType
, pIn
->swizzleMode
));
3104 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
3105 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
3106 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
3107 UINT_32 pipeXor
= ReverseBitVector(pIn
->slice
, pipeBits
);
3108 UINT_32 bankXor
= ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
);
3109 UINT_32 pipeBankXor
= ((pipeXor
| (bankXor
<< pipeBits
)) ^ (pIn
->pipeBankXor
)) << m_pipeInterleaveLog2
;
3111 pOut
->offset
= pIn
->slice
* pIn
->sliceSize
+
3112 pIn
->macroBlockOffset
+
3113 (pIn
->mipTailOffset
^ pipeBankXor
) -
3114 static_cast<UINT_64
>(pipeBankXor
);
3119 ************************************************************************************************************************
3120 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3123 * Compute surface info sanity check
3127 ************************************************************************************************************************
3129 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3130 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
3132 BOOL_32 invalid
= FALSE
;
3134 if ((pIn
->bpp
> 128) || (pIn
->width
== 0) || (pIn
->numFrags
> 8) || (pIn
->numSamples
> 16))
3138 else if ((pIn
->swizzleMode
>= ADDR_SW_MAX_TYPE
) ||
3139 (pIn
->resourceType
>= ADDR_RSRC_MAX_TYPE
))
3144 BOOL_32 mipmap
= (pIn
->numMipLevels
> 1);
3145 BOOL_32 msaa
= (pIn
->numFrags
> 1);
3147 ADDR2_SURFACE_FLAGS flags
= pIn
->flags
;
3148 BOOL_32 zbuffer
= (flags
.depth
|| flags
.stencil
);
3149 BOOL_32 color
= flags
.color
;
3150 BOOL_32 display
= flags
.display
|| flags
.rotated
;
3152 AddrResourceType rsrcType
= pIn
->resourceType
;
3153 BOOL_32 tex3d
= IsTex3d(rsrcType
);
3154 BOOL_32 thin3d
= tex3d
&& flags
.view3dAs2dArray
;
3155 AddrSwizzleMode swizzle
= pIn
->swizzleMode
;
3156 BOOL_32 linear
= IsLinear(swizzle
);
3157 BOOL_32 blk256B
= IsBlock256b(swizzle
);
3158 BOOL_32 blkVar
= IsBlockVariable(swizzle
);
3159 BOOL_32 isNonPrtXor
= IsNonPrtXor(swizzle
);
3160 BOOL_32 prt
= flags
.prt
;
3161 BOOL_32 stereo
= flags
.qbStereo
;
3163 if (invalid
== FALSE
)
3165 if ((pIn
->numFrags
> 1) &&
3166 (GetBlockSize(swizzle
) < (m_pipeInterleaveBytes
* pIn
->numFrags
)))
3168 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3173 if (invalid
== FALSE
)
3177 case ADDR_RSRC_TEX_1D
:
3178 invalid
= msaa
|| zbuffer
|| display
|| (linear
== FALSE
) || stereo
;
3180 case ADDR_RSRC_TEX_2D
:
3181 invalid
= (msaa
&& mipmap
) || (stereo
&& msaa
) || (stereo
&& mipmap
);
3183 case ADDR_RSRC_TEX_3D
:
3184 invalid
= msaa
|| zbuffer
|| display
|| stereo
;
3192 if (invalid
== FALSE
)
3196 invalid
= (IsValidDisplaySwizzleMode(pIn
) == FALSE
);
3200 if (invalid
== FALSE
)
3204 invalid
= ((ADDR_RSRC_TEX_1D
!= rsrcType
) && prt
) ||
3205 zbuffer
|| msaa
|| (pIn
->bpp
== 0) || ((pIn
->bpp
% 8) != 0);
3209 if (blk256B
|| blkVar
|| isNonPrtXor
)
3214 invalid
= invalid
|| zbuffer
|| tex3d
|| mipmap
|| msaa
;
3218 if (invalid
== FALSE
)
3220 if (IsZOrderSwizzle(swizzle
))
3222 invalid
= (color
&& msaa
) || thin3d
;
3224 else if (IsStandardSwizzle(swizzle
))
3226 invalid
= zbuffer
|| thin3d
;
3228 else if (IsDisplaySwizzle(swizzle
))
3230 invalid
= zbuffer
|| (prt
&& (ADDR_RSRC_TEX_3D
== rsrcType
));
3232 else if (IsRotateSwizzle(swizzle
))
3234 invalid
= zbuffer
|| (pIn
->bpp
> 64) || tex3d
;
3238 ADDR_ASSERT(!"invalid swizzle mode");
3245 ADDR_ASSERT(invalid
== FALSE
);
3247 return invalid
? ADDR_INVALIDPARAMS
: ADDR_OK
;
3251 ************************************************************************************************************************
3252 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3255 * Internal function to get suggested surface information for cliet to use
3259 ************************************************************************************************************************
3261 ADDR_E_RETURNCODE
Gfx9Lib::HwlGetPreferredSurfaceSetting(
3262 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
* pIn
,
3263 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
* pOut
) const
3265 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
3266 ElemLib
* pElemLib
= GetElemLib();
3268 UINT_32 bpp
= pIn
->bpp
;
3269 UINT_32 width
= pIn
->width
;
3270 UINT_32 height
= pIn
->height
;
3271 UINT_32 numSamples
= Max(pIn
->numSamples
, 1u);
3272 UINT_32 numFrags
= (pIn
->numFrags
== 0) ? numSamples
: pIn
->numFrags
;
3274 if (pIn
->flags
.fmask
)
3276 bpp
= GetFmaskBpp(numSamples
, numFrags
);
3279 pOut
->resourceType
= ADDR_RSRC_TEX_2D
;
3283 // Set format to INVALID will skip this conversion
3284 if (pIn
->format
!= ADDR_FMT_INVALID
)
3286 UINT_32 expandX
, expandY
;
3288 // Don't care for this case
3289 ElemMode elemMode
= ADDR_UNCOMPRESSED
;
3291 // Get compression/expansion factors and element mode which indicates compression/expansion
3292 bpp
= pElemLib
->GetBitsPerPixel(pIn
->format
,
3297 UINT_32 basePitch
= 0;
3298 GetElemLib()->AdjustSurfaceInfo(elemMode
,
3307 // The output may get changed for volume(3D) texture resource in future
3308 pOut
->resourceType
= pIn
->resourceType
;
3311 const UINT_32 numSlices
= Max(pIn
->numSlices
, 1u);
3312 const UINT_32 numMipLevels
= Max(pIn
->numMipLevels
, 1u);
3313 const BOOL_32 msaa
= (numFrags
> 1) || (numSamples
> 1);
3314 const BOOL_32 displayRsrc
= pIn
->flags
.display
|| pIn
->flags
.rotated
;
3316 // Forbid swizzle mode(s) by client setting, for simplicity we never allow VAR swizzle mode for GFX9
3317 ADDR2_SWMODE_SET allowedSwModeSet
= {};
3318 allowedSwModeSet
.value
|= pIn
->forbiddenBlock
.linear
? 0 : Gfx9LinearSwModeMask
;
3319 allowedSwModeSet
.value
|= pIn
->forbiddenBlock
.micro
? 0 : Gfx9Blk256BSwModeMask
;
3320 allowedSwModeSet
.value
|= pIn
->forbiddenBlock
.macro4KB
? 0 : Gfx9Blk4KBSwModeMask
;
3321 allowedSwModeSet
.value
|= pIn
->forbiddenBlock
.macro64KB
? 0 : Gfx9Blk64KBSwModeMask
;
3323 if (pIn
->preferredSwSet
.value
!= 0)
3325 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_Z
? ~0 : ~Gfx9ZSwModeMask
;
3326 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_S
? ~0 : ~Gfx9StandardSwModeMask
;
3327 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_D
? ~0 : ~Gfx9DisplaySwModeMask
;
3328 allowedSwModeSet
.value
&= pIn
->preferredSwSet
.sw_R
? ~0 : ~Gfx9RotateSwModeMask
;
3333 allowedSwModeSet
.value
&= ~Gfx9XorSwModeMask
;
3336 if (pIn
->maxAlign
> 0)
3338 if (pIn
->maxAlign
< GetBlockSize(ADDR_SW_64KB
))
3340 allowedSwModeSet
.value
&= ~Gfx9Blk64KBSwModeMask
;
3343 if (pIn
->maxAlign
< GetBlockSize(ADDR_SW_4KB
))
3345 allowedSwModeSet
.value
&= ~Gfx9Blk4KBSwModeMask
;
3348 if (pIn
->maxAlign
< GetBlockSize(ADDR_SW_256B
))
3350 allowedSwModeSet
.value
&= ~Gfx9Blk256BSwModeMask
;
3354 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3355 switch (pOut
->resourceType
)
3357 case ADDR_RSRC_TEX_1D
:
3358 allowedSwModeSet
.value
&= Gfx9Rsrc1dSwModeMask
;
3361 case ADDR_RSRC_TEX_2D
:
3362 allowedSwModeSet
.value
&= pIn
->flags
.prt
? Gfx9Rsrc2dPrtSwModeMask
: Gfx9Rsrc2dSwModeMask
;
3366 allowedSwModeSet
.value
&= ~(Gfx9RotateSwModeMask
| Gfx9ZSwModeMask
);
3370 case ADDR_RSRC_TEX_3D
:
3371 allowedSwModeSet
.value
&= pIn
->flags
.prt
? Gfx9Rsrc3dPrtSwModeMask
: Gfx9Rsrc3dSwModeMask
;
3373 if ((numMipLevels
> 1) && (numSlices
>= width
) && (numSlices
>= height
))
3375 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3376 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3377 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3378 allowedSwModeSet
.value
&= ~Gfx9DisplaySwModeMask
;
3381 if ((bpp
== 128) && pIn
->flags
.color
)
3383 allowedSwModeSet
.value
&= ~Gfx9StandardSwModeMask
;
3386 if (pIn
->flags
.view3dAs2dArray
)
3388 allowedSwModeSet
.value
&= Gfx9Rsrc3dThinSwModeMask
| Gfx9LinearSwModeMask
;
3393 ADDR_ASSERT_ALWAYS();
3394 allowedSwModeSet
.value
= 0;
3398 if (pIn
->format
== ADDR_FMT_32_32_32
)
3400 allowedSwModeSet
.value
&= Gfx9LinearSwModeMask
;
3403 if (ElemLib::IsBlockCompressed(pIn
->format
))
3405 if (pIn
->flags
.texture
)
3407 allowedSwModeSet
.value
&= Gfx9StandardSwModeMask
| Gfx9DisplaySwModeMask
;
3411 allowedSwModeSet
.value
&= Gfx9StandardSwModeMask
| Gfx9DisplaySwModeMask
| Gfx9LinearSwModeMask
;
3415 if (ElemLib::IsMacroPixelPacked(pIn
->format
) ||
3416 (msaa
&& ((bpp
> 32) || pIn
->flags
.color
|| pIn
->flags
.unordered
)))
3418 allowedSwModeSet
.value
&= ~Gfx9ZSwModeMask
;
3421 if (pIn
->flags
.fmask
|| pIn
->flags
.depth
|| pIn
->flags
.stencil
)
3423 allowedSwModeSet
.value
&= Gfx9ZSwModeMask
;
3425 if (pIn
->flags
.noMetadata
== FALSE
)
3427 if (pIn
->flags
.depth
&&
3428 pIn
->flags
.texture
&&
3429 (((bpp
== 16) && (numFrags
>= 4)) || ((bpp
== 32) && (numFrags
>= 2))))
3431 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3432 // equation from wrong address within memory range a tile covered and use the
3433 // garbage data for compressed Z reading which finally leads to corruption.
3434 allowedSwModeSet
.value
&= ~Gfx9XorSwModeMask
;
3437 if (m_settings
.htileCacheRbConflict
&&
3438 (pIn
->flags
.depth
|| pIn
->flags
.stencil
) &&
3440 (pIn
->flags
.metaRbUnaligned
== FALSE
) &&
3441 (pIn
->flags
.metaPipeUnaligned
== FALSE
))
3443 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3444 allowedSwModeSet
.value
&= ~Gfx9XSwModeMask
;
3451 allowedSwModeSet
.value
&= Gfx9MsaaSwModeMask
;
3454 if ((numFrags
> 1) &&
3455 (GetBlockSize(ADDR_SW_4KB
) < (m_pipeInterleaveBytes
* numFrags
)))
3457 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3458 allowedSwModeSet
.value
&= Gfx9Blk64KBSwModeMask
;
3461 if (numMipLevels
> 1)
3463 allowedSwModeSet
.value
&= ~Gfx9Blk256BSwModeMask
;
3468 if (m_settings
.isDce12
)
3470 allowedSwModeSet
.value
&= (bpp
== 32) ? Dce12Bpp32SwModeMask
: Dce12NonBpp32SwModeMask
;
3472 else if (m_settings
.isDcn1
)
3474 allowedSwModeSet
.value
&= (bpp
== 64) ? Dcn1Bpp64SwModeMask
: Dcn1NonBpp64SwModeMask
;
3478 ADDR_NOT_IMPLEMENTED();
3482 if (allowedSwModeSet
.value
!= 0)
3485 // Post sanity check, at least AddrLib should accept the output generated by its own
3486 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {};
3487 localIn
.flags
= pIn
->flags
;
3488 localIn
.resourceType
= pOut
->resourceType
;
3489 localIn
.format
= pIn
->format
;
3491 localIn
.width
= width
;
3492 localIn
.height
= height
;
3493 localIn
.numSlices
= numSlices
;
3494 localIn
.numMipLevels
= numMipLevels
;
3495 localIn
.numSamples
= numSamples
;
3496 localIn
.numFrags
= numFrags
;
3498 UINT_32 validateSwModeSet
= allowedSwModeSet
.value
;
3499 for (UINT_32 i
= 0; validateSwModeSet
!= 0; i
++)
3501 if (validateSwModeSet
& 1)
3503 localIn
.swizzleMode
= static_cast<AddrSwizzleMode
>(i
);
3504 HwlComputeSurfaceInfoSanityCheck(&localIn
);
3507 validateSwModeSet
>>= 1;
3511 pOut
->validSwModeSet
= allowedSwModeSet
;
3512 pOut
->canXor
= (allowedSwModeSet
.value
& Gfx9XorSwModeMask
) ? TRUE
: FALSE
;
3513 pOut
->validBlockSet
= GetAllowedBlockSet(allowedSwModeSet
);
3514 pOut
->validSwTypeSet
= GetAllowedSwSet(allowedSwModeSet
);
3516 pOut
->clientPreferredSwSet
= pIn
->preferredSwSet
;
3518 if (pOut
->clientPreferredSwSet
.value
== 0)
3520 pOut
->clientPreferredSwSet
.value
= AddrSwSetAll
;
3523 if (allowedSwModeSet
.value
== Gfx9LinearSwModeMask
)
3525 pOut
->swizzleMode
= ADDR_SW_LINEAR
;
3529 // Always ignore linear swizzle mode if there is other choice.
3530 allowedSwModeSet
.swLinear
= 0;
3532 ADDR2_BLOCK_SET allowedBlockSet
= GetAllowedBlockSet(allowedSwModeSet
);
3534 // Determine block size if there is 2 or more block type candidates
3535 if (IsPow2(allowedBlockSet
.value
) == FALSE
)
3537 const AddrSwizzleMode swMode
[AddrBlockMaxTiledType
] = {ADDR_SW_256B
, ADDR_SW_4KB
, ADDR_SW_64KB
};
3538 Dim3d blkDim
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}};
3539 Dim3d padDim
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}};
3540 UINT_64 padSize
[AddrBlockMaxTiledType
] = {0};
3542 const UINT_32 ratioLow
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 3 : 2);
3543 const UINT_32 ratioHi
= pIn
->flags
.minimizeAlign
? 1 : (pIn
->flags
.opt4space
? 2 : 1);
3544 const UINT_64 sizeAlignInElement
= Max(NextPow2(pIn
->minSizeAlign
) / (bpp
>> 3), 1u);
3545 UINT_32 minSizeBlk
= AddrBlockMicro
;
3546 UINT_64 minSize
= 0;
3548 for (UINT_32 i
= AddrBlockMicro
; i
< AddrBlockMaxTiledType
; i
++)
3550 if (allowedBlockSet
.value
& (1 << i
))
3552 ComputeBlockDimensionForSurf(&blkDim
[i
].w
,
3562 blkDim
[i
].w
= PowTwoAlign(blkDim
[i
].w
, 32);
3565 padSize
[i
] = ComputePadSize(&blkDim
[i
], width
, height
, numSlices
, &padDim
[i
]);
3566 padSize
[i
] = PowTwoAlign(padSize
[i
], sizeAlignInElement
);
3568 if ((minSize
== 0) ||
3569 ((padSize
[i
] * ratioHi
) <= (minSize
* ratioLow
)))
3571 minSize
= padSize
[i
];
3577 if ((allowedBlockSet
.micro
== TRUE
) &&
3578 (width
<= blkDim
[AddrBlockMicro
].w
) &&
3579 (height
<= blkDim
[AddrBlockMicro
].h
) &&
3580 (NextPow2(pIn
->minSizeAlign
) <= GetBlockSize(ADDR_SW_256B
)))
3582 minSizeBlk
= AddrBlockMicro
;
3585 if (minSizeBlk
== AddrBlockMicro
)
3587 allowedSwModeSet
.value
&= Gfx9Blk256BSwModeMask
;
3589 else if (minSizeBlk
== AddrBlock4KB
)
3591 allowedSwModeSet
.value
&= Gfx9Blk4KBSwModeMask
;
3595 ADDR_ASSERT(minSizeBlk
== AddrBlock64KB
);
3596 allowedSwModeSet
.value
&= Gfx9Blk64KBSwModeMask
;
3600 // Block type should be determined.
3601 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet
).value
));
3603 ADDR2_SWTYPE_SET allowedSwSet
= GetAllowedSwSet(allowedSwModeSet
);
3605 // Determine swizzle type if there is 2 or more swizzle type candidates
3606 if (IsPow2(allowedSwSet
.value
) == FALSE
)
3608 if (ElemLib::IsBlockCompressed(pIn
->format
))
3610 if (allowedSwSet
.sw_D
)
3612 allowedSwModeSet
.value
&= Gfx9DisplaySwModeMask
;
3616 ADDR_ASSERT(allowedSwSet
.sw_S
);
3617 allowedSwModeSet
.value
&= Gfx9StandardSwModeMask
;
3620 else if (ElemLib::IsMacroPixelPacked(pIn
->format
))
3622 if (allowedSwSet
.sw_S
)
3624 allowedSwModeSet
.value
&= Gfx9StandardSwModeMask
;
3626 else if (allowedSwSet
.sw_D
)
3628 allowedSwModeSet
.value
&= Gfx9DisplaySwModeMask
;
3632 ADDR_ASSERT(allowedSwSet
.sw_R
);
3633 allowedSwModeSet
.value
&= Gfx9RotateSwModeMask
;
3636 else if (pOut
->resourceType
== ADDR_RSRC_TEX_3D
)
3638 if (pIn
->flags
.color
&& allowedSwSet
.sw_D
)
3640 allowedSwModeSet
.value
&= Gfx9DisplaySwModeMask
;
3642 else if (allowedSwSet
.sw_Z
)
3644 allowedSwModeSet
.value
&= Gfx9ZSwModeMask
;
3648 ADDR_ASSERT(allowedSwSet
.sw_S
);
3649 allowedSwModeSet
.value
&= Gfx9StandardSwModeMask
;
3654 if (pIn
->flags
.rotated
&& allowedSwSet
.sw_R
)
3656 allowedSwModeSet
.value
&= Gfx9RotateSwModeMask
;
3658 else if (displayRsrc
&& allowedSwSet
.sw_D
)
3660 allowedSwModeSet
.value
&= Gfx9DisplaySwModeMask
;
3662 else if (allowedSwSet
.sw_S
)
3664 allowedSwModeSet
.value
&= Gfx9StandardSwModeMask
;
3668 ADDR_ASSERT(allowedSwSet
.sw_Z
);
3669 allowedSwModeSet
.value
&= Gfx9ZSwModeMask
;
3674 // Swizzle type should be determined.
3675 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet
).value
));
3677 // Determine swizzle mode now - always select the "largest" swizzle mode for a given block type +
3678 // swizzle type combination. For example, for AddrBlock64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3679 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3680 pOut
->swizzleMode
= static_cast<AddrSwizzleMode
>(Log2NonPow2(allowedSwModeSet
.value
));
3685 // Invalid combination...
3686 ADDR_ASSERT_ALWAYS();
3687 returnCode
= ADDR_INVALIDPARAMS
;
3694 ************************************************************************************************************************
3695 * Gfx9Lib::ComputeStereoInfo
3698 * Compute height alignment and right eye pipeBankXor for stereo surface
3703 ************************************************************************************************************************
3705 ADDR_E_RETURNCODE
Gfx9Lib::ComputeStereoInfo(
3706 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
,
3707 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
,
3708 UINT_32
* pHeightAlign
3711 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
3713 UINT_32 eqIndex
= HwlGetEquationIndex(pIn
, pOut
);
3715 if (eqIndex
< m_numEquations
)
3717 if (IsXor(pIn
->swizzleMode
))
3719 const UINT_32 blkSizeLog2
= GetBlockSizeLog2(pIn
->swizzleMode
);
3720 const UINT_32 numPipeBits
= GetPipeXorBits(blkSizeLog2
);
3721 const UINT_32 numBankBits
= GetBankXorBits(blkSizeLog2
);
3722 const UINT_32 bppLog2
= Log2(pIn
->bpp
>> 3);
3723 const UINT_32 maxYCoordBlock256
= Log2(Block256_2d
[bppLog2
].h
) - 1;
3724 MAYBE_UNUSED
const ADDR_EQUATION
*pEqToCheck
= &m_equationTable
[eqIndex
];
3726 ADDR_ASSERT(maxYCoordBlock256
==
3727 GetMaxValidChannelIndex(&pEqToCheck
->addr
[0], GetBlockSizeLog2(ADDR_SW_256B
), 1));
3729 const UINT_32 maxYCoordInBaseEquation
=
3730 (blkSizeLog2
- GetBlockSizeLog2(ADDR_SW_256B
)) / 2 + maxYCoordBlock256
;
3732 ADDR_ASSERT(maxYCoordInBaseEquation
==
3733 GetMaxValidChannelIndex(&pEqToCheck
->addr
[0], blkSizeLog2
, 1));
3735 const UINT_32 maxYCoordInPipeXor
= (numPipeBits
== 0) ? 0 : maxYCoordBlock256
+ numPipeBits
;
3737 ADDR_ASSERT(maxYCoordInPipeXor
==
3738 GetMaxValidChannelIndex(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
], numPipeBits
, 1));
3740 const UINT_32 maxYCoordInBankXor
= (numBankBits
== 0) ?
3741 0 : maxYCoordBlock256
+ (numPipeBits
+ 1) / 2 + numBankBits
;
3743 ADDR_ASSERT(maxYCoordInBankXor
==
3744 GetMaxValidChannelIndex(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
+ numPipeBits
], numBankBits
, 1));
3746 const UINT_32 maxYCoordInPipeBankXor
= Max(maxYCoordInPipeXor
, maxYCoordInBankXor
);
3748 if (maxYCoordInPipeBankXor
> maxYCoordInBaseEquation
)
3750 *pHeightAlign
= 1u << maxYCoordInPipeBankXor
;
3752 if (pOut
->pStereoInfo
!= NULL
)
3754 pOut
->pStereoInfo
->rightSwizzle
= 0;
3756 if ((PowTwoAlign(pIn
->height
, *pHeightAlign
) % (*pHeightAlign
* 2)) != 0)
3758 if (maxYCoordInPipeXor
== maxYCoordInPipeBankXor
)
3760 pOut
->pStereoInfo
->rightSwizzle
|= (1u << 1);
3763 if (maxYCoordInBankXor
== maxYCoordInPipeBankXor
)
3765 pOut
->pStereoInfo
->rightSwizzle
|=
3766 1u << ((numPipeBits
% 2) ? numPipeBits
: numPipeBits
+ 1);
3769 ADDR_ASSERT(pOut
->pStereoInfo
->rightSwizzle
==
3770 GetCoordActiveMask(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
],
3771 numPipeBits
+ numBankBits
, 1, maxYCoordInPipeBankXor
));
3779 ADDR_ASSERT_ALWAYS();
3780 returnCode
= ADDR_ERROR
;
3787 ************************************************************************************************************************
3788 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3791 * Internal function to calculate alignment for tiled surface
3795 ************************************************************************************************************************
3797 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceInfoTiled(
3798 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
3799 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
3802 ADDR_E_RETURNCODE returnCode
= ComputeBlockDimensionForSurf(&pOut
->blockWidth
,
3810 if (returnCode
== ADDR_OK
)
3812 UINT_32 pitchAlignInElement
= pOut
->blockWidth
;
3814 if ((IsTex2d(pIn
->resourceType
) == TRUE
) &&
3815 (pIn
->flags
.display
|| pIn
->flags
.rotated
) &&
3816 (pIn
->numMipLevels
<= 1) &&
3817 (pIn
->numSamples
<= 1) &&
3818 (pIn
->numFrags
<= 1))
3820 // Display engine needs pitch align to be at least 32 pixels.
3821 pitchAlignInElement
= PowTwoAlign(pitchAlignInElement
, 32);
3824 pOut
->pitch
= PowTwoAlign(pIn
->width
, pitchAlignInElement
);
3826 if ((pIn
->numMipLevels
<= 1) && (pIn
->pitchInElement
> 0))
3828 if ((pIn
->pitchInElement
% pitchAlignInElement
) != 0)
3830 returnCode
= ADDR_INVALIDPARAMS
;
3832 else if (pIn
->pitchInElement
< pOut
->pitch
)
3834 returnCode
= ADDR_INVALIDPARAMS
;
3838 pOut
->pitch
= pIn
->pitchInElement
;
3842 UINT_32 heightAlign
= 0;
3844 if (pIn
->flags
.qbStereo
)
3846 returnCode
= ComputeStereoInfo(pIn
, pOut
, &heightAlign
);
3849 if (returnCode
== ADDR_OK
)
3851 pOut
->height
= PowTwoAlign(pIn
->height
, pOut
->blockHeight
);
3853 if (heightAlign
> 1)
3855 pOut
->height
= PowTwoAlign(pOut
->height
, heightAlign
);
3858 pOut
->numSlices
= PowTwoAlign(pIn
->numSlices
, pOut
->blockSlices
);
3860 pOut
->epitchIsHeight
= FALSE
;
3861 pOut
->mipChainInTail
= FALSE
;
3862 pOut
->firstMipIdInTail
= pIn
->numMipLevels
;
3864 pOut
->mipChainPitch
= pOut
->pitch
;
3865 pOut
->mipChainHeight
= pOut
->height
;
3866 pOut
->mipChainSlice
= pOut
->numSlices
;
3868 if (pIn
->numMipLevels
> 1)
3870 pOut
->firstMipIdInTail
= GetMipChainInfo(pIn
->resourceType
,
3882 const UINT_32 endingMipId
= Min(pOut
->firstMipIdInTail
, pIn
->numMipLevels
- 1);
3884 if (endingMipId
== 0)
3886 const Dim3d tailMaxDim
= GetMipTailDim(pIn
->resourceType
,
3892 pOut
->epitchIsHeight
= TRUE
;
3893 pOut
->pitch
= tailMaxDim
.w
;
3894 pOut
->height
= tailMaxDim
.h
;
3895 pOut
->numSlices
= IsThick(pIn
->resourceType
, pIn
->swizzleMode
) ?
3896 tailMaxDim
.d
: pIn
->numSlices
;
3897 pOut
->mipChainInTail
= TRUE
;
3901 UINT_32 mip0WidthInBlk
= pOut
->pitch
/ pOut
->blockWidth
;
3902 UINT_32 mip0HeightInBlk
= pOut
->height
/ pOut
->blockHeight
;
3904 AddrMajorMode majorMode
= GetMajorMode(pIn
->resourceType
,
3908 pOut
->numSlices
/ pOut
->blockSlices
);
3909 if (majorMode
== ADDR_MAJOR_Y
)
3911 UINT_32 mip1WidthInBlk
= RoundHalf(mip0WidthInBlk
);
3913 if ((mip1WidthInBlk
== 1) && (endingMipId
> 2))
3918 pOut
->mipChainPitch
+= (mip1WidthInBlk
* pOut
->blockWidth
);
3920 pOut
->epitchIsHeight
= FALSE
;
3924 UINT_32 mip1HeightInBlk
= RoundHalf(mip0HeightInBlk
);
3926 if ((mip1HeightInBlk
== 1) && (endingMipId
> 2))
3931 pOut
->mipChainHeight
+= (mip1HeightInBlk
* pOut
->blockHeight
);
3933 pOut
->epitchIsHeight
= TRUE
;
3937 if (pOut
->pMipInfo
!= NULL
)
3939 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
3941 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
3943 Dim3d mipStartPos
= {0};
3944 UINT_32 mipTailOffsetInBytes
= 0;
3946 mipStartPos
= GetMipStartPos(pIn
->resourceType
,
3956 &mipTailOffsetInBytes
);
3958 UINT_32 pitchInBlock
=
3959 pOut
->mipChainPitch
/ pOut
->blockWidth
;
3960 UINT_32 sliceInBlock
=
3961 (pOut
->mipChainHeight
/ pOut
->blockHeight
) * pitchInBlock
;
3962 UINT_64 blockIndex
=
3963 mipStartPos
.d
* sliceInBlock
+ mipStartPos
.h
* pitchInBlock
+ mipStartPos
.w
;
3964 UINT_64 macroBlockOffset
=
3965 blockIndex
<< GetBlockSizeLog2(pIn
->swizzleMode
);
3967 pOut
->pMipInfo
[i
].macroBlockOffset
= macroBlockOffset
;
3968 pOut
->pMipInfo
[i
].mipTailOffset
= mipTailOffsetInBytes
;
3972 else if (pOut
->pMipInfo
!= NULL
)
3974 pOut
->pMipInfo
[0].pitch
= pOut
->pitch
;
3975 pOut
->pMipInfo
[0].height
= pOut
->height
;
3976 pOut
->pMipInfo
[0].depth
= IsTex3d(pIn
->resourceType
)? pOut
->numSlices
: 1;
3977 pOut
->pMipInfo
[0].offset
= 0;
3980 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->mipChainPitch
) * pOut
->mipChainHeight
*
3981 (pIn
->bpp
>> 3) * pIn
->numFrags
;
3982 pOut
->surfSize
= pOut
->sliceSize
* pOut
->mipChainSlice
;
3983 pOut
->baseAlign
= ComputeSurfaceBaseAlignTiled(pIn
->swizzleMode
);
3985 if ((IsBlock256b(pIn
->swizzleMode
) == FALSE
) &&
3986 (pIn
->flags
.color
|| pIn
->flags
.depth
|| pIn
->flags
.stencil
|| pIn
->flags
.fmask
) &&
3987 (pIn
->flags
.texture
== TRUE
) &&
3988 (pIn
->flags
.noMetadata
== FALSE
) &&
3989 (pIn
->flags
.metaPipeUnaligned
== FALSE
))
3991 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
3992 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
3993 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
3994 // them, which may cause invalid metadata to be fetched.
3995 pOut
->baseAlign
= Max(pOut
->baseAlign
, m_pipeInterleaveBytes
* m_pipes
);
4000 pOut
->baseAlign
= Max(pOut
->baseAlign
, PrtAlignment
);
4009 ************************************************************************************************************************
4010 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4013 * Internal function to calculate alignment for linear surface
4017 ************************************************************************************************************************
4019 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceInfoLinear(
4020 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
4021 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
4024 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
4026 UINT_32 actualHeight
= 0;
4027 UINT_32 elementBytes
= pIn
->bpp
>> 3;
4028 const UINT_32 alignment
= pIn
->flags
.prt
? PrtAlignment
: 256;
4030 if (IsTex1d(pIn
->resourceType
))
4032 if (pIn
->height
> 1)
4034 returnCode
= ADDR_INVALIDPARAMS
;
4038 const UINT_32 pitchAlignInElement
= alignment
/ elementBytes
;
4040 pitch
= PowTwoAlign(pIn
->width
, pitchAlignInElement
);
4041 actualHeight
= pIn
->numMipLevels
;
4043 if (pIn
->flags
.prt
== FALSE
)
4045 returnCode
= ApplyCustomizedPitchHeight(pIn
, elementBytes
, pitchAlignInElement
,
4046 &pitch
, &actualHeight
);
4049 if (returnCode
== ADDR_OK
)
4051 if (pOut
->pMipInfo
!= NULL
)
4053 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
4055 pOut
->pMipInfo
[i
].offset
= pitch
* elementBytes
* i
;
4056 pOut
->pMipInfo
[i
].pitch
= pitch
;
4057 pOut
->pMipInfo
[i
].height
= 1;
4058 pOut
->pMipInfo
[i
].depth
= 1;
4066 returnCode
= ComputeSurfaceLinearPadding(pIn
, &pitch
, &actualHeight
, pOut
->pMipInfo
);
4069 if ((pitch
== 0) || (actualHeight
== 0))
4071 returnCode
= ADDR_INVALIDPARAMS
;
4074 if (returnCode
== ADDR_OK
)
4076 pOut
->pitch
= pitch
;
4077 pOut
->height
= pIn
->height
;
4078 pOut
->numSlices
= pIn
->numSlices
;
4079 pOut
->mipChainPitch
= pitch
;
4080 pOut
->mipChainHeight
= actualHeight
;
4081 pOut
->mipChainSlice
= pOut
->numSlices
;
4082 pOut
->epitchIsHeight
= (pIn
->numMipLevels
> 1) ? TRUE
: FALSE
;
4083 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->pitch
) * actualHeight
* elementBytes
;
4084 pOut
->surfSize
= pOut
->sliceSize
* pOut
->numSlices
;
4085 pOut
->baseAlign
= (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
) ? (pIn
->bpp
/ 8) : alignment
;
4086 pOut
->blockWidth
= (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
) ? 1 : (256 / elementBytes
);
4087 pOut
->blockHeight
= 1;
4088 pOut
->blockSlices
= 1;
4091 // Post calculation validate
4092 ADDR_ASSERT(pOut
->sliceSize
> 0);
4098 ************************************************************************************************************************
4099 * Gfx9Lib::GetMipChainInfo
4102 * Internal function to get out information about mip chain
4105 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4106 ************************************************************************************************************************
4108 UINT_32
Gfx9Lib::GetMipChainInfo(
4109 AddrResourceType resourceType
,
4110 AddrSwizzleMode swizzleMode
,
4116 UINT_32 blockHeight
,
4118 UINT_32 numMipLevel
,
4119 ADDR2_MIP_INFO
* pMipInfo
) const
4121 const Dim3d tailMaxDim
=
4122 GetMipTailDim(resourceType
, swizzleMode
, blockWidth
, blockHeight
, blockDepth
);
4124 UINT_32 mipPitch
= mip0Width
;
4125 UINT_32 mipHeight
= mip0Height
;
4126 UINT_32 mipDepth
= IsTex3d(resourceType
) ? mip0Depth
: 1;
4128 UINT_32 firstMipIdInTail
= numMipLevel
;
4129 BOOL_32 inTail
= FALSE
;
4130 BOOL_32 finalDim
= FALSE
;
4131 BOOL_32 is3dThick
= IsThick(resourceType
, swizzleMode
);
4132 BOOL_32 is3dThin
= IsTex3d(resourceType
) && (is3dThick
== FALSE
);
4134 for (UINT_32 mipId
= 0; mipId
< numMipLevel
; mipId
++)
4138 if (finalDim
== FALSE
)
4144 mipSize
= mipPitch
* mipHeight
* mipDepth
* (bpp
>> 3);
4148 mipSize
= mipPitch
* mipHeight
* (bpp
>> 3);
4153 UINT_32 index
= Log2(bpp
>> 3);
4157 mipPitch
= Block256_3dZ
[index
].w
;
4158 mipHeight
= Block256_3dZ
[index
].h
;
4159 mipDepth
= Block256_3dZ
[index
].d
;
4163 mipPitch
= Block256_2d
[index
].w
;
4164 mipHeight
= Block256_2d
[index
].h
;
4173 inTail
= IsInMipTail(resourceType
, swizzleMode
, tailMaxDim
,
4174 mipPitch
, mipHeight
, mipDepth
);
4178 firstMipIdInTail
= mipId
;
4179 mipPitch
= tailMaxDim
.w
;
4180 mipHeight
= tailMaxDim
.h
;
4184 mipDepth
= tailMaxDim
.d
;
4189 mipPitch
= PowTwoAlign(mipPitch
, blockWidth
);
4190 mipHeight
= PowTwoAlign(mipHeight
, blockHeight
);
4194 mipDepth
= PowTwoAlign(mipDepth
, blockDepth
);
4199 if (pMipInfo
!= NULL
)
4201 pMipInfo
[mipId
].pitch
= mipPitch
;
4202 pMipInfo
[mipId
].height
= mipHeight
;
4203 pMipInfo
[mipId
].depth
= mipDepth
;
4204 pMipInfo
[mipId
].offset
= offset
;
4207 offset
+= (mipPitch
* mipHeight
* mipDepth
* (bpp
>> 3));
4213 mipDepth
= Max(mipDepth
>> 1, 1u);
4218 mipPitch
= Max(mipPitch
>> 1, 1u);
4219 mipHeight
= Max(mipHeight
>> 1, 1u);
4221 if (is3dThick
|| is3dThin
)
4223 mipDepth
= Max(mipDepth
>> 1, 1u);
4228 return firstMipIdInTail
;
4232 ************************************************************************************************************************
4233 * Gfx9Lib::GetMetaMiptailInfo
4236 * Get mip tail coordinate information.
4240 ************************************************************************************************************************
4242 VOID
Gfx9Lib::GetMetaMiptailInfo(
4243 ADDR2_META_MIP_INFO
* pInfo
, ///< [out] output structure to store per mip coord
4244 Dim3d mipCoord
, ///< [in] mip tail base coord
4245 UINT_32 numMipInTail
, ///< [in] number of mips in tail
4246 Dim3d
* pMetaBlkDim
///< [in] meta block width/height/depth
4249 BOOL_32 isThick
= (pMetaBlkDim
->d
> 1);
4250 UINT_32 mipWidth
= pMetaBlkDim
->w
;
4251 UINT_32 mipHeight
= pMetaBlkDim
->h
>> 1;
4252 UINT_32 mipDepth
= pMetaBlkDim
->d
;
4257 minInc
= (pMetaBlkDim
->h
>= 512) ? 128 : ((pMetaBlkDim
->h
== 256) ? 64 : 32);
4259 else if (pMetaBlkDim
->h
>= 1024)
4263 else if (pMetaBlkDim
->h
== 512)
4272 UINT_32 blk32MipId
= 0xFFFFFFFF;
4274 for (UINT_32 mip
= 0; mip
< numMipInTail
; mip
++)
4276 pInfo
[mip
].inMiptail
= TRUE
;
4277 pInfo
[mip
].startX
= mipCoord
.w
;
4278 pInfo
[mip
].startY
= mipCoord
.h
;
4279 pInfo
[mip
].startZ
= mipCoord
.d
;
4280 pInfo
[mip
].width
= mipWidth
;
4281 pInfo
[mip
].height
= mipHeight
;
4282 pInfo
[mip
].depth
= mipDepth
;
4286 if (blk32MipId
== 0xFFFFFFFF)
4291 mipCoord
.w
= pInfo
[blk32MipId
].startX
;
4292 mipCoord
.h
= pInfo
[blk32MipId
].startY
;
4293 mipCoord
.d
= pInfo
[blk32MipId
].startZ
;
4295 switch (mip
- blk32MipId
)
4298 mipCoord
.w
+= 32; // 16x16
4301 mipCoord
.h
+= 32; // 8x8
4304 mipCoord
.h
+= 32; // 4x4
4308 mipCoord
.h
+= 32; // 2x2
4312 mipCoord
.h
+= 32; // 1x1
4315 // The following are for BC/ASTC formats
4317 mipCoord
.h
+= 48; // 1/2 x 1/2
4320 mipCoord
.h
+= 48; // 1/4 x 1/4
4324 mipCoord
.h
+= 48; // 1/8 x 1/8
4328 mipCoord
.h
+= 48; // 1/16 x 1/16
4332 ADDR_ASSERT_ALWAYS();
4336 mipWidth
= ((mip
- blk32MipId
) == 0) ? 16 : 8;
4337 mipHeight
= mipWidth
;
4341 mipDepth
= mipWidth
;
4346 if (mipWidth
<= minInc
)
4348 // if we're below the minimal increment...
4351 // For 3d, just go in z direction
4352 mipCoord
.d
+= mipDepth
;
4356 // For 2d, first go across, then down
4357 if ((mipWidth
* 2) == minInc
)
4359 // if we're 2 mips below, that's when we go back in x, and down in y
4360 mipCoord
.w
-= minInc
;
4361 mipCoord
.h
+= minInc
;
4365 // otherwise, just go across in x
4366 mipCoord
.w
+= minInc
;
4372 // On even mip, go down, otherwise, go across
4375 mipCoord
.w
+= mipWidth
;
4379 mipCoord
.h
+= mipHeight
;
4382 // Divide the width by 2
4384 // After the first mip in tail, the mip is always a square
4385 mipHeight
= mipWidth
;
4386 // ...or for 3d, a cube
4389 mipDepth
= mipWidth
;
4396 ************************************************************************************************************************
4397 * Gfx9Lib::GetMipStartPos
4400 * Internal function to get out information about mip logical start position
4403 * logical start position in macro block width/heith/depth of one mip level within one slice
4404 ************************************************************************************************************************
4406 Dim3d
Gfx9Lib::GetMipStartPos(
4407 AddrResourceType resourceType
,
4408 AddrSwizzleMode swizzleMode
,
4413 UINT_32 blockHeight
,
4416 UINT_32 log2ElementBytes
,
4417 UINT_32
* pMipTailBytesOffset
) const
4419 Dim3d mipStartPos
= {0};
4420 const Dim3d tailMaxDim
= GetMipTailDim(resourceType
, swizzleMode
, blockWidth
, blockHeight
, blockDepth
);
4422 // Report mip in tail if Mip0 is already in mip tail
4423 BOOL_32 inMipTail
= IsInMipTail(resourceType
, swizzleMode
, tailMaxDim
, width
, height
, depth
);
4424 UINT_32 log2blkSize
= GetBlockSizeLog2(swizzleMode
);
4425 UINT_32 mipIndexInTail
= mipId
;
4427 if (inMipTail
== FALSE
)
4429 // Mip 0 dimension, unit in block
4430 UINT_32 mipWidthInBlk
= width
/ blockWidth
;
4431 UINT_32 mipHeightInBlk
= height
/ blockHeight
;
4432 UINT_32 mipDepthInBlk
= depth
/ blockDepth
;
4433 AddrMajorMode majorMode
= GetMajorMode(resourceType
,
4439 UINT_32 endingMip
= mipId
+ 1;
4441 for (UINT_32 i
= 1; i
<= mipId
; i
++)
4443 if ((i
== 1) || (i
== 3))
4445 if (majorMode
== ADDR_MAJOR_Y
)
4447 mipStartPos
.w
+= mipWidthInBlk
;
4451 mipStartPos
.h
+= mipHeightInBlk
;
4456 if (majorMode
== ADDR_MAJOR_X
)
4458 mipStartPos
.w
+= mipWidthInBlk
;
4460 else if (majorMode
== ADDR_MAJOR_Y
)
4462 mipStartPos
.h
+= mipHeightInBlk
;
4466 mipStartPos
.d
+= mipDepthInBlk
;
4470 BOOL_32 inTail
= FALSE
;
4472 if (IsThick(resourceType
, swizzleMode
))
4474 UINT_32 dim
= log2blkSize
% 3;
4479 (mipWidthInBlk
<= 2) && (mipHeightInBlk
== 1) && (mipDepthInBlk
<= 2);
4484 (mipWidthInBlk
== 1) && (mipHeightInBlk
<= 2) && (mipDepthInBlk
<= 2);
4489 (mipWidthInBlk
<= 2) && (mipHeightInBlk
<= 2) && (mipDepthInBlk
== 1);
4494 if (log2blkSize
& 1)
4496 inTail
= (mipWidthInBlk
<= 2) && (mipHeightInBlk
== 1);
4500 inTail
= (mipWidthInBlk
== 1) && (mipHeightInBlk
<= 2);
4510 mipWidthInBlk
= RoundHalf(mipWidthInBlk
);
4511 mipHeightInBlk
= RoundHalf(mipHeightInBlk
);
4512 mipDepthInBlk
= RoundHalf(mipDepthInBlk
);
4515 if (mipId
>= endingMip
)
4518 mipIndexInTail
= mipId
- endingMip
;
4524 UINT_32 index
= mipIndexInTail
+ MaxMacroBits
- log2blkSize
;
4525 ADDR_ASSERT(index
< sizeof(MipTailOffset256B
) / sizeof(UINT_32
));
4526 *pMipTailBytesOffset
= MipTailOffset256B
[index
] << 8;
4533 ************************************************************************************************************************
4534 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4537 * Internal function to calculate address from coord for tiled swizzle surface
4541 ************************************************************************************************************************
4543 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4544 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
4545 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
4548 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {0};
4549 localIn
.swizzleMode
= pIn
->swizzleMode
;
4550 localIn
.flags
= pIn
->flags
;
4551 localIn
.resourceType
= pIn
->resourceType
;
4552 localIn
.bpp
= pIn
->bpp
;
4553 localIn
.width
= Max(pIn
->unalignedWidth
, 1u);
4554 localIn
.height
= Max(pIn
->unalignedHeight
, 1u);
4555 localIn
.numSlices
= Max(pIn
->numSlices
, 1u);
4556 localIn
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
4557 localIn
.numSamples
= Max(pIn
->numSamples
, 1u);
4558 localIn
.numFrags
= Max(pIn
->numFrags
, 1u);
4559 if (localIn
.numMipLevels
<= 1)
4561 localIn
.pitchInElement
= pIn
->pitchInElement
;
4564 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut
= {0};
4565 ADDR_E_RETURNCODE returnCode
= ComputeSurfaceInfoTiled(&localIn
, &localOut
);
4567 BOOL_32 valid
= (returnCode
== ADDR_OK
) &&
4568 (IsThin(pIn
->resourceType
, pIn
->swizzleMode
) ||
4569 IsThick(pIn
->resourceType
, pIn
->swizzleMode
)) &&
4570 ((pIn
->pipeBankXor
== 0) || (IsXor(pIn
->swizzleMode
)));
4574 UINT_32 log2ElementBytes
= Log2(pIn
->bpp
>> 3);
4575 Dim3d mipStartPos
= {0};
4576 UINT_32 mipTailBytesOffset
= 0;
4578 if (pIn
->numMipLevels
> 1)
4580 // Mip-map chain cannot be MSAA surface
4581 ADDR_ASSERT((pIn
->numSamples
<= 1) && (pIn
->numFrags
<= 1));
4583 mipStartPos
= GetMipStartPos(pIn
->resourceType
,
4588 localOut
.blockWidth
,
4589 localOut
.blockHeight
,
4590 localOut
.blockSlices
,
4593 &mipTailBytesOffset
);
4596 UINT_32 interleaveOffset
= 0;
4597 UINT_32 pipeBits
= 0;
4598 UINT_32 pipeXor
= 0;
4599 UINT_32 bankBits
= 0;
4600 UINT_32 bankXor
= 0;
4602 if (IsThin(pIn
->resourceType
, pIn
->swizzleMode
))
4604 UINT_32 blockOffset
= 0;
4605 UINT_32 log2blkSize
= GetBlockSizeLog2(pIn
->swizzleMode
);
4607 if (IsZOrderSwizzle(pIn
->swizzleMode
))
4609 // Morton generation
4610 if ((log2ElementBytes
== 0) || (log2ElementBytes
== 2))
4612 UINT_32 totalLowBits
= 6 - log2ElementBytes
;
4613 UINT_32 mortBits
= totalLowBits
/ 2;
4614 UINT_32 lowBitsValue
= MortonGen2d(pIn
->y
, pIn
->x
, mortBits
);
4615 // Are 9 bits enough?
4616 UINT_32 highBitsValue
=
4617 MortonGen2d(pIn
->x
>> mortBits
, pIn
->y
>> mortBits
, 9) << totalLowBits
;
4618 blockOffset
= lowBitsValue
| highBitsValue
;
4619 ADDR_ASSERT(blockOffset
== lowBitsValue
+ highBitsValue
);
4623 blockOffset
= MortonGen2d(pIn
->y
, pIn
->x
, 13);
4626 // Fill LSBs with sample bits
4627 if (pIn
->numSamples
> 1)
4629 blockOffset
*= pIn
->numSamples
;
4630 blockOffset
|= pIn
->sample
;
4633 // Shift according to BytesPP
4634 blockOffset
<<= log2ElementBytes
;
4638 // Micro block offset
4639 UINT_32 microBlockOffset
= ComputeSurface2DMicroBlockOffset(pIn
);
4640 blockOffset
= microBlockOffset
;
4642 // Micro block dimension
4643 ADDR_ASSERT(log2ElementBytes
< MaxNumOfBpp
);
4644 Dim2d microBlockDim
= Block256_2d
[log2ElementBytes
];
4645 // Morton generation, does 12 bit enough?
4647 MortonGen2d((pIn
->x
/ microBlockDim
.w
), (pIn
->y
/ microBlockDim
.h
), 12) << 8;
4649 // Sample bits start location
4650 UINT_32 sampleStart
= log2blkSize
- Log2(pIn
->numSamples
);
4651 // Join sample bits information to the highest Macro block bits
4652 if (IsNonPrtXor(pIn
->swizzleMode
))
4654 // Non-prt-Xor : xor highest Macro block bits with sample bits
4655 blockOffset
= blockOffset
^ (pIn
->sample
<< sampleStart
);
4659 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4660 // after this op, the blockOffset only contains log2 Macro block size bits
4661 blockOffset
%= (1 << sampleStart
);
4662 blockOffset
|= (pIn
->sample
<< sampleStart
);
4663 ADDR_ASSERT((blockOffset
>> log2blkSize
) == 0);
4667 if (IsXor(pIn
->swizzleMode
))
4669 // Mask off bits above Macro block bits to keep page synonyms working for prt
4670 if (IsPrt(pIn
->swizzleMode
))
4672 blockOffset
&= ((1 << log2blkSize
) - 1);
4675 // Preserve offset inside pipe interleave
4676 interleaveOffset
= blockOffset
& ((1 << m_pipeInterleaveLog2
) - 1);
4677 blockOffset
>>= m_pipeInterleaveLog2
;
4680 pipeBits
= GetPipeXorBits(log2blkSize
);
4682 pipeXor
= FoldXor2d(blockOffset
, pipeBits
);
4683 blockOffset
>>= pipeBits
;
4686 bankBits
= GetBankXorBits(log2blkSize
);
4688 bankXor
= FoldXor2d(blockOffset
, bankBits
);
4689 blockOffset
>>= bankBits
;
4691 // Put all the part back together
4692 blockOffset
<<= bankBits
;
4693 blockOffset
|= bankXor
;
4694 blockOffset
<<= pipeBits
;
4695 blockOffset
|= pipeXor
;
4696 blockOffset
<<= m_pipeInterleaveLog2
;
4697 blockOffset
|= interleaveOffset
;
4700 ADDR_ASSERT((blockOffset
| mipTailBytesOffset
) == (blockOffset
+ mipTailBytesOffset
));
4701 ADDR_ASSERT((mipTailBytesOffset
== 0u) || (blockOffset
< (1u << log2blkSize
)));
4703 blockOffset
|= mipTailBytesOffset
;
4705 if (IsNonPrtXor(pIn
->swizzleMode
) && (pIn
->numSamples
<= 1))
4707 // Apply slice xor if not MSAA/PRT
4708 blockOffset
^= (ReverseBitVector(pIn
->slice
, pipeBits
) << m_pipeInterleaveLog2
);
4709 blockOffset
^= (ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
) <<
4710 (m_pipeInterleaveLog2
+ pipeBits
));
4713 returnCode
= ApplyCustomerPipeBankXor(pIn
->swizzleMode
, pIn
->pipeBankXor
,
4714 bankBits
, pipeBits
, &blockOffset
);
4716 blockOffset
%= (1 << log2blkSize
);
4718 UINT_32 pitchInMacroBlock
= localOut
.mipChainPitch
/ localOut
.blockWidth
;
4719 UINT_32 paddedHeightInMacroBlock
= localOut
.mipChainHeight
/ localOut
.blockHeight
;
4720 UINT_32 sliceSizeInMacroBlock
= pitchInMacroBlock
* paddedHeightInMacroBlock
;
4721 UINT_64 macroBlockIndex
=
4722 (pIn
->slice
+ mipStartPos
.d
) * sliceSizeInMacroBlock
+
4723 ((pIn
->y
/ localOut
.blockHeight
) + mipStartPos
.h
) * pitchInMacroBlock
+
4724 ((pIn
->x
/ localOut
.blockWidth
) + mipStartPos
.w
);
4726 pOut
->addr
= blockOffset
| (macroBlockIndex
<< log2blkSize
);
4730 UINT_32 log2blkSize
= GetBlockSizeLog2(pIn
->swizzleMode
);
4732 Dim3d microBlockDim
= Block1K_3d
[log2ElementBytes
];
4734 UINT_32 blockOffset
= MortonGen3d((pIn
->x
/ microBlockDim
.w
),
4735 (pIn
->y
/ microBlockDim
.h
),
4736 (pIn
->slice
/ microBlockDim
.d
),
4740 blockOffset
|= ComputeSurface3DMicroBlockOffset(pIn
);
4742 if (IsXor(pIn
->swizzleMode
))
4744 // Mask off bits above Macro block bits to keep page synonyms working for prt
4745 if (IsPrt(pIn
->swizzleMode
))
4747 blockOffset
&= ((1 << log2blkSize
) - 1);
4750 // Preserve offset inside pipe interleave
4751 interleaveOffset
= blockOffset
& ((1 << m_pipeInterleaveLog2
) - 1);
4752 blockOffset
>>= m_pipeInterleaveLog2
;
4755 pipeBits
= GetPipeXorBits(log2blkSize
);
4757 pipeXor
= FoldXor3d(blockOffset
, pipeBits
);
4758 blockOffset
>>= pipeBits
;
4761 bankBits
= GetBankXorBits(log2blkSize
);
4763 bankXor
= FoldXor3d(blockOffset
, bankBits
);
4764 blockOffset
>>= bankBits
;
4766 // Put all the part back together
4767 blockOffset
<<= bankBits
;
4768 blockOffset
|= bankXor
;
4769 blockOffset
<<= pipeBits
;
4770 blockOffset
|= pipeXor
;
4771 blockOffset
<<= m_pipeInterleaveLog2
;
4772 blockOffset
|= interleaveOffset
;
4775 ADDR_ASSERT((blockOffset
| mipTailBytesOffset
) == (blockOffset
+ mipTailBytesOffset
));
4776 ADDR_ASSERT((mipTailBytesOffset
== 0u) || (blockOffset
< (1u << log2blkSize
)));
4777 blockOffset
|= mipTailBytesOffset
;
4779 returnCode
= ApplyCustomerPipeBankXor(pIn
->swizzleMode
, pIn
->pipeBankXor
,
4780 bankBits
, pipeBits
, &blockOffset
);
4782 blockOffset
%= (1 << log2blkSize
);
4784 UINT_32 xb
= pIn
->x
/ localOut
.blockWidth
+ mipStartPos
.w
;
4785 UINT_32 yb
= pIn
->y
/ localOut
.blockHeight
+ mipStartPos
.h
;
4786 UINT_32 zb
= pIn
->slice
/ localOut
.blockSlices
+ + mipStartPos
.d
;
4788 UINT_32 pitchInBlock
= localOut
.mipChainPitch
/ localOut
.blockWidth
;
4789 UINT_32 sliceSizeInBlock
=
4790 (localOut
.mipChainHeight
/ localOut
.blockHeight
) * pitchInBlock
;
4791 UINT_64 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
4793 pOut
->addr
= blockOffset
| (blockIndex
<< log2blkSize
);
4798 returnCode
= ADDR_INVALIDPARAMS
;
4805 ************************************************************************************************************************
4806 * Gfx9Lib::ComputeSurfaceInfoLinear
4809 * Internal function to calculate padding for linear swizzle 2D/3D surface
4813 ************************************************************************************************************************
4815 ADDR_E_RETURNCODE
Gfx9Lib::ComputeSurfaceLinearPadding(
4816 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input srtucture
4817 UINT_32
* pMipmap0PaddedWidth
, ///< [out] padded width in element
4818 UINT_32
* pSlice0PaddedHeight
, ///< [out] padded height for HW
4819 ADDR2_MIP_INFO
* pMipInfo
///< [out] per mip information
4822 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
4824 UINT_32 elementBytes
= pIn
->bpp
>> 3;
4825 UINT_32 pitchAlignInElement
= 0;
4827 if (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
)
4829 ADDR_ASSERT(pIn
->numMipLevels
<= 1);
4830 ADDR_ASSERT(pIn
->numSlices
<= 1);
4831 pitchAlignInElement
= 1;
4835 pitchAlignInElement
= (256 / elementBytes
);
4838 UINT_32 mipChainWidth
= PowTwoAlign(pIn
->width
, pitchAlignInElement
);
4839 UINT_32 slice0PaddedHeight
= pIn
->height
;
4841 returnCode
= ApplyCustomizedPitchHeight(pIn
, elementBytes
, pitchAlignInElement
,
4842 &mipChainWidth
, &slice0PaddedHeight
);
4844 if (returnCode
== ADDR_OK
)
4846 UINT_32 mipChainHeight
= 0;
4847 UINT_32 mipHeight
= pIn
->height
;
4848 UINT_32 mipDepth
= (pIn
->resourceType
== ADDR_RSRC_TEX_3D
) ? pIn
->numSlices
: 1;
4850 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
4852 if (pMipInfo
!= NULL
)
4854 pMipInfo
[i
].offset
= mipChainWidth
* mipChainHeight
* elementBytes
;
4855 pMipInfo
[i
].pitch
= mipChainWidth
;
4856 pMipInfo
[i
].height
= mipHeight
;
4857 pMipInfo
[i
].depth
= mipDepth
;
4860 mipChainHeight
+= mipHeight
;
4861 mipHeight
= RoundHalf(mipHeight
);
4862 mipHeight
= Max(mipHeight
, 1u);
4865 *pMipmap0PaddedWidth
= mipChainWidth
;
4866 *pSlice0PaddedHeight
= (pIn
->numMipLevels
> 1) ? mipChainHeight
: slice0PaddedHeight
;