2 * Copyright © 2017 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
34 #include "gfx9addrlib.h"
36 #include "gfx9_gb_reg.h"
37 #include "gfx9_enum.h"
40 #include "amdgpu_id.h"
46 ////////////////////////////////////////////////////////////////////////////////////////////////////
47 ////////////////////////////////////////////////////////////////////////////////////////////////////
53 ************************************************************************************************************************
57 * Creates an Gfx9Lib object.
60 * Returns an Gfx9Lib object pointer.
61 ************************************************************************************************************************
63 Addr::Lib
* Gfx9HwlInit(const Client
* pClient
)
65 return V2::Gfx9Lib::CreateObj(pClient
);
71 ////////////////////////////////////////////////////////////////////////////////////////////////////
72 // Static Const Member
73 ////////////////////////////////////////////////////////////////////////////////////////////////////
75 const SwizzleModeFlags
Gfx9Lib::SwizzleModeTable
[ADDR_SW_MAX_TYPE
] =
76 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
77 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
78 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
79 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
80 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
82 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
83 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
84 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
85 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
87 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
88 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
89 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
90 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
92 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
93 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
94 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
95 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
97 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
98 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
99 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
100 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
102 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
103 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
104 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
105 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
107 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
108 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
109 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
110 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
112 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
113 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
114 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
115 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
116 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
119 const UINT_32
Gfx9Lib::MipTailOffset256B
[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
120 8, 6, 5, 4, 3, 2, 1, 0};
122 const Dim3d
Gfx9Lib::Block256_3dS
[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
124 const Dim3d
Gfx9Lib::Block256_3dZ
[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
127 ************************************************************************************************************************
133 ************************************************************************************************************************
135 Gfx9Lib::Gfx9Lib(const Client
* pClient
)
140 m_class
= AI_ADDRLIB
;
141 memset(&m_settings
, 0, sizeof(m_settings
));
142 memcpy(m_swizzleModeTable
, SwizzleModeTable
, sizeof(SwizzleModeTable
));
146 ************************************************************************************************************************
151 ************************************************************************************************************************
158 ************************************************************************************************************************
159 * Gfx9Lib::HwlComputeHtileInfo
162 * Interface function stub of AddrComputeHtilenfo
166 ************************************************************************************************************************
168 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileInfo(
169 const ADDR2_COMPUTE_HTILE_INFO_INPUT
* pIn
, ///< [in] input structure
170 ADDR2_COMPUTE_HTILE_INFO_OUTPUT
* pOut
///< [out] output structure
173 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
176 UINT_32 numRbTotal
= pIn
->hTileFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
178 UINT_32 numCompressBlkPerMetaBlk
, numCompressBlkPerMetaBlkLog2
;
180 if ((numPipeTotal
== 1) && (numRbTotal
== 1))
182 numCompressBlkPerMetaBlkLog2
= 10;
186 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ 10;
189 numCompressBlkPerMetaBlk
= 1 << numCompressBlkPerMetaBlkLog2
;
191 Dim3d metaBlkDim
= {8, 8, 1};
192 UINT_32 totalAmpBits
= numCompressBlkPerMetaBlkLog2
;
193 UINT_32 widthAmp
= (pIn
->numMipLevels
> 1) ? (totalAmpBits
>> 1) : RoundHalf(totalAmpBits
);
194 UINT_32 heightAmp
= totalAmpBits
- widthAmp
;
195 metaBlkDim
.w
<<= widthAmp
;
196 metaBlkDim
.h
<<= heightAmp
;
199 Dim3d metaBlkDimDbg
= {8, 8, 1};
200 for (UINT_32 index
= 0; index
< numCompressBlkPerMetaBlkLog2
; index
++)
202 if ((metaBlkDimDbg
.h
< metaBlkDimDbg
.w
) ||
203 ((pIn
->numMipLevels
> 1) && (metaBlkDimDbg
.h
== metaBlkDimDbg
.w
)))
205 metaBlkDimDbg
.h
<<= 1;
209 metaBlkDimDbg
.w
<<= 1;
212 ADDR_ASSERT((metaBlkDimDbg
.w
== metaBlkDim
.w
) && (metaBlkDimDbg
.h
== metaBlkDim
.h
));
219 GetMetaMipInfo(pIn
->numMipLevels
, &metaBlkDim
, FALSE
, pOut
->pMipInfo
,
220 pIn
->unalignedWidth
, pIn
->unalignedHeight
, pIn
->numSlices
,
221 &numMetaBlkX
, &numMetaBlkY
, &numMetaBlkZ
);
223 UINT_32 sizeAlign
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
225 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
226 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
227 pOut
->sliceSize
= numMetaBlkX
* numMetaBlkY
* numCompressBlkPerMetaBlk
* 4;
229 pOut
->metaBlkWidth
= metaBlkDim
.w
;
230 pOut
->metaBlkHeight
= metaBlkDim
.h
;
231 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
233 pOut
->baseAlign
= Max(numCompressBlkPerMetaBlk
* 4, sizeAlign
);
235 if (m_settings
.metaBaseAlignFix
)
237 pOut
->baseAlign
= Max(pOut
->baseAlign
, GetBlockSize(pIn
->swizzleMode
));
240 if ((IsXor(pIn
->swizzleMode
) == FALSE
) && (numPipeTotal
> 2))
242 UINT_32 additionalAlign
= numPipeTotal
* numCompressBlkPerMetaBlk
* 2;
244 if (additionalAlign
> sizeAlign
)
246 sizeAlign
= additionalAlign
;
250 pOut
->htileBytes
= PowTwoAlign(pOut
->sliceSize
* numMetaBlkZ
, sizeAlign
);
256 ************************************************************************************************************************
257 * Gfx9Lib::HwlComputeCmaskInfo
260 * Interface function stub of AddrComputeCmaskInfo
264 ************************************************************************************************************************
266 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeCmaskInfo(
267 const ADDR2_COMPUTE_CMASK_INFO_INPUT
* pIn
, ///< [in] input structure
268 ADDR2_COMPUTE_CMASK_INFO_OUTPUT
* pOut
///< [out] output structure
271 // TODO: Clarify with AddrLib team
272 // ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
274 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pIn
->cMaskFlags
.pipeAligned
,
277 UINT_32 numRbTotal
= pIn
->cMaskFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
279 UINT_32 numCompressBlkPerMetaBlkLog2
, numCompressBlkPerMetaBlk
;
281 if ((numPipeTotal
== 1) && (numRbTotal
== 1))
283 numCompressBlkPerMetaBlkLog2
= 13;
287 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ 10;
289 numCompressBlkPerMetaBlkLog2
= Max(numCompressBlkPerMetaBlkLog2
, 13u);
292 numCompressBlkPerMetaBlk
= 1 << numCompressBlkPerMetaBlkLog2
;
294 Dim2d metaBlkDim
= {8, 8};
295 UINT_32 totalAmpBits
= numCompressBlkPerMetaBlkLog2
;
296 UINT_32 heightAmp
= totalAmpBits
>> 1;
297 UINT_32 widthAmp
= totalAmpBits
- heightAmp
;
298 metaBlkDim
.w
<<= widthAmp
;
299 metaBlkDim
.h
<<= heightAmp
;
302 Dim2d metaBlkDimDbg
= {8, 8};
303 for (UINT_32 index
= 0; index
< numCompressBlkPerMetaBlkLog2
; index
++)
305 if (metaBlkDimDbg
.h
< metaBlkDimDbg
.w
)
307 metaBlkDimDbg
.h
<<= 1;
311 metaBlkDimDbg
.w
<<= 1;
314 ADDR_ASSERT((metaBlkDimDbg
.w
== metaBlkDim
.w
) && (metaBlkDimDbg
.h
== metaBlkDim
.h
));
317 UINT_32 numMetaBlkX
= (pIn
->unalignedWidth
+ metaBlkDim
.w
- 1) / metaBlkDim
.w
;
318 UINT_32 numMetaBlkY
= (pIn
->unalignedHeight
+ metaBlkDim
.h
- 1) / metaBlkDim
.h
;
319 UINT_32 numMetaBlkZ
= Max(pIn
->numSlices
, 1u);
321 UINT_32 sizeAlign
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
323 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
324 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
325 pOut
->sliceSize
= (numMetaBlkX
* numMetaBlkY
* numCompressBlkPerMetaBlk
) >> 1;
326 pOut
->cmaskBytes
= PowTwoAlign(pOut
->sliceSize
* numMetaBlkZ
, sizeAlign
);
327 pOut
->baseAlign
= Max(numCompressBlkPerMetaBlk
>> 1, sizeAlign
);
329 if (m_settings
.metaBaseAlignFix
)
331 pOut
->baseAlign
= Max(pOut
->baseAlign
, GetBlockSize(pIn
->swizzleMode
));
334 pOut
->metaBlkWidth
= metaBlkDim
.w
;
335 pOut
->metaBlkHeight
= metaBlkDim
.h
;
337 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
343 ************************************************************************************************************************
344 * Gfx9Lib::GetMetaMipInfo
351 ************************************************************************************************************************
353 VOID
Gfx9Lib::GetMetaMipInfo(
354 UINT_32 numMipLevels
, ///< [in] number of mip levels
355 Dim3d
* pMetaBlkDim
, ///< [in] meta block dimension
356 BOOL_32 dataThick
, ///< [in] data surface is thick
357 ADDR2_META_MIP_INFO
* pInfo
, ///< [out] meta mip info
358 UINT_32 mip0Width
, ///< [in] mip0 width
359 UINT_32 mip0Height
, ///< [in] mip0 height
360 UINT_32 mip0Depth
, ///< [in] mip0 depth
361 UINT_32
* pNumMetaBlkX
, ///< [out] number of metablock X in mipchain
362 UINT_32
* pNumMetaBlkY
, ///< [out] number of metablock Y in mipchain
363 UINT_32
* pNumMetaBlkZ
) ///< [out] number of metablock Z in mipchain
366 UINT_32 numMetaBlkX
= (mip0Width
+ pMetaBlkDim
->w
- 1) / pMetaBlkDim
->w
;
367 UINT_32 numMetaBlkY
= (mip0Height
+ pMetaBlkDim
->h
- 1) / pMetaBlkDim
->h
;
368 UINT_32 numMetaBlkZ
= (mip0Depth
+ pMetaBlkDim
->d
- 1) / pMetaBlkDim
->d
;
369 UINT_32 tailWidth
= pMetaBlkDim
->w
;
370 UINT_32 tailHeight
= pMetaBlkDim
->h
>> 1;
371 UINT_32 tailDepth
= pMetaBlkDim
->d
;
372 BOOL_32 inTail
= FALSE
;
373 AddrMajorMode major
= ADDR_MAJOR_MAX_TYPE
;
375 if (numMipLevels
> 1)
377 if (dataThick
&& (numMetaBlkZ
> numMetaBlkX
) && (numMetaBlkZ
> numMetaBlkY
))
380 major
= ADDR_MAJOR_Z
;
382 else if (numMetaBlkX
>= numMetaBlkY
)
385 major
= ADDR_MAJOR_X
;
390 major
= ADDR_MAJOR_Y
;
393 inTail
= ((mip0Width
<= tailWidth
) &&
394 (mip0Height
<= tailHeight
) &&
395 ((dataThick
== FALSE
) || (mip0Depth
<= tailDepth
)));
403 if (major
== ADDR_MAJOR_Z
)
406 pMipDim
= &numMetaBlkY
;
407 pOrderDim
= &numMetaBlkZ
;
410 else if (major
== ADDR_MAJOR_X
)
413 pMipDim
= &numMetaBlkY
;
414 pOrderDim
= &numMetaBlkX
;
420 pMipDim
= &numMetaBlkX
;
421 pOrderDim
= &numMetaBlkY
;
425 if ((*pMipDim
< 3) && (*pOrderDim
> orderLimit
) && (numMipLevels
> 3))
431 *pMipDim
+= ((*pMipDim
/ 2) + (*pMipDim
& 1));
438 UINT_32 mipWidth
= mip0Width
;
439 UINT_32 mipHeight
= mip0Height
;
440 UINT_32 mipDepth
= mip0Depth
;
441 Dim3d mipCoord
= {0};
443 for (UINT_32 mip
= 0; mip
< numMipLevels
; mip
++)
447 GetMetaMiptailInfo(&pInfo
[mip
], mipCoord
, numMipLevels
- mip
,
453 mipWidth
= PowTwoAlign(mipWidth
, pMetaBlkDim
->w
);
454 mipHeight
= PowTwoAlign(mipHeight
, pMetaBlkDim
->h
);
455 mipDepth
= PowTwoAlign(mipDepth
, pMetaBlkDim
->d
);
457 pInfo
[mip
].inMiptail
= FALSE
;
458 pInfo
[mip
].startX
= mipCoord
.w
;
459 pInfo
[mip
].startY
= mipCoord
.h
;
460 pInfo
[mip
].startZ
= mipCoord
.d
;
461 pInfo
[mip
].width
= mipWidth
;
462 pInfo
[mip
].height
= mipHeight
;
463 pInfo
[mip
].depth
= dataThick
? mipDepth
: 1;
465 if ((mip
>= 3) || (mip
& 1))
470 mipCoord
.w
+= mipWidth
;
473 mipCoord
.h
+= mipHeight
;
476 mipCoord
.d
+= mipDepth
;
487 mipCoord
.h
+= mipHeight
;
490 mipCoord
.w
+= mipWidth
;
493 mipCoord
.h
+= mipHeight
;
500 mipWidth
= Max(mipWidth
>> 1, 1u);
501 mipHeight
= Max(mipHeight
>> 1, 1u);
502 mipDepth
= Max(mipDepth
>> 1, 1u);
504 inTail
= ((mipWidth
<= tailWidth
) &&
505 (mipHeight
<= tailHeight
) &&
506 ((dataThick
== FALSE
) || (mipDepth
<= tailDepth
)));
511 *pNumMetaBlkX
= numMetaBlkX
;
512 *pNumMetaBlkY
= numMetaBlkY
;
513 *pNumMetaBlkZ
= numMetaBlkZ
;
517 ************************************************************************************************************************
518 * Gfx9Lib::HwlComputeDccInfo
521 * Interface function to compute DCC key info
525 ************************************************************************************************************************
527 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeDccInfo(
528 const ADDR2_COMPUTE_DCCINFO_INPUT
* pIn
, ///< [in] input structure
529 ADDR2_COMPUTE_DCCINFO_OUTPUT
* pOut
///< [out] output structure
532 BOOL_32 dataLinear
= IsLinear(pIn
->swizzleMode
);
533 BOOL_32 metaLinear
= pIn
->dccKeyFlags
.linear
;
534 BOOL_32 pipeAligned
= pIn
->dccKeyFlags
.pipeAligned
;
540 else if (metaLinear
== TRUE
)
545 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pipeAligned
, pIn
->swizzleMode
);
549 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
550 ADDR_ASSERT_ALWAYS();
552 pOut
->dccRamBaseAlign
= numPipeTotal
* m_pipeInterleaveBytes
;
553 pOut
->dccRamSize
= PowTwoAlign((pIn
->dataSurfaceSize
/ 256), pOut
->dccRamBaseAlign
);
557 BOOL_32 dataThick
= IsThick(pIn
->resourceType
, pIn
->swizzleMode
);
559 UINT_32 minMetaBlkSize
= dataThick
? 65536 : 4096;
561 UINT_32 numFrags
= Max(pIn
->numFrags
, 1u);
562 UINT_32 numSlices
= Max(pIn
->numSlices
, 1u);
564 minMetaBlkSize
/= numFrags
;
566 UINT_32 numCompressBlkPerMetaBlk
= minMetaBlkSize
;
568 UINT_32 numRbTotal
= pIn
->dccKeyFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
570 if ((numPipeTotal
> 1) || (numRbTotal
> 1))
572 numCompressBlkPerMetaBlk
=
573 Max(numCompressBlkPerMetaBlk
, m_se
* m_rbPerSe
* (dataThick
? 262144 : 1024));
575 if (numCompressBlkPerMetaBlk
> 65536 * pIn
->bpp
)
577 numCompressBlkPerMetaBlk
= 65536 * pIn
->bpp
;
581 Dim3d compressBlkDim
= GetDccCompressBlk(pIn
->resourceType
, pIn
->swizzleMode
, pIn
->bpp
);
582 Dim3d metaBlkDim
= compressBlkDim
;
584 for (UINT_32 index
= 1; index
< numCompressBlkPerMetaBlk
; index
<<= 1)
586 if ((metaBlkDim
.h
< metaBlkDim
.w
) ||
587 ((pIn
->numMipLevels
> 1) && (metaBlkDim
.h
== metaBlkDim
.w
)))
589 if ((dataThick
== FALSE
) || (metaBlkDim
.h
<= metaBlkDim
.d
))
600 if ((dataThick
== FALSE
) || (metaBlkDim
.w
<= metaBlkDim
.d
))
615 GetMetaMipInfo(pIn
->numMipLevels
, &metaBlkDim
, dataThick
, pOut
->pMipInfo
,
616 pIn
->unalignedWidth
, pIn
->unalignedHeight
, numSlices
,
617 &numMetaBlkX
, &numMetaBlkY
, &numMetaBlkZ
);
619 UINT_32 sizeAlign
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
621 if (numFrags
> m_maxCompFrag
)
623 sizeAlign
*= (numFrags
/ m_maxCompFrag
);
626 pOut
->dccRamSize
= numMetaBlkX
* numMetaBlkY
* numMetaBlkZ
*
627 numCompressBlkPerMetaBlk
* numFrags
;
628 pOut
->dccRamSize
= PowTwoAlign(pOut
->dccRamSize
, sizeAlign
);
629 pOut
->dccRamBaseAlign
= Max(numCompressBlkPerMetaBlk
, sizeAlign
);
631 if (m_settings
.metaBaseAlignFix
)
633 pOut
->dccRamBaseAlign
= Max(pOut
->dccRamBaseAlign
, GetBlockSize(pIn
->swizzleMode
));
636 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
637 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
638 pOut
->depth
= numMetaBlkZ
* metaBlkDim
.d
;
640 pOut
->compressBlkWidth
= compressBlkDim
.w
;
641 pOut
->compressBlkHeight
= compressBlkDim
.h
;
642 pOut
->compressBlkDepth
= compressBlkDim
.d
;
644 pOut
->metaBlkWidth
= metaBlkDim
.w
;
645 pOut
->metaBlkHeight
= metaBlkDim
.h
;
646 pOut
->metaBlkDepth
= metaBlkDim
.d
;
648 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
649 pOut
->fastClearSizePerSlice
=
650 pOut
->metaBlkNumPerSlice
* numCompressBlkPerMetaBlk
* Min(numFrags
, m_maxCompFrag
);
657 ************************************************************************************************************************
658 * Gfx9Lib::HwlGetMaxAlignments
661 * Gets maximum alignments
664 ************************************************************************************************************************
666 ADDR_E_RETURNCODE
Gfx9Lib::HwlGetMaxAlignments(
667 ADDR_GET_MAX_ALIGNMENTS_OUTPUT
* pOut
///< [out] output structure
670 pOut
->baseAlign
= HwlComputeSurfaceBaseAlign(ADDR_SW_64KB
);
676 ************************************************************************************************************************
677 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
680 * Interface function stub of AddrComputeCmaskAddrFromCoord
684 ************************************************************************************************************************
686 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeCmaskAddrFromCoord(
687 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
688 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
691 ADDR2_COMPUTE_CMASK_INFO_INPUT input
= {0};
692 input
.size
= sizeof(input
);
693 input
.cMaskFlags
= pIn
->cMaskFlags
;
694 input
.colorFlags
= pIn
->colorFlags
;
695 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
696 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
697 input
.numSlices
= Max(pIn
->numSlices
, 1u);
698 input
.swizzleMode
= pIn
->swizzleMode
;
699 input
.resourceType
= pIn
->resourceType
;
701 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output
= {0};
702 output
.size
= sizeof(output
);
704 ADDR_E_RETURNCODE returnCode
= ComputeCmaskInfo(&input
, &output
);
706 if (returnCode
== ADDR_OK
)
708 UINT_32 fmaskBpp
= GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
);
709 UINT_32 fmaskElementBytesLog2
= Log2(fmaskBpp
>> 3);
710 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
711 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
715 GetMetaEquation(&metaEq
, 0, fmaskElementBytesLog2
, 0, pIn
->cMaskFlags
,
716 Gfx9DataFmask
, pIn
->swizzleMode
, pIn
->resourceType
,
717 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0);
719 UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
720 UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
721 UINT_32 zb
= pIn
->slice
;
723 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
724 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
725 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
727 UINT_64 address
= metaEq
.solve(pIn
->x
, pIn
->y
, pIn
->slice
, 0, blockIndex
);
729 pOut
->addr
= address
>> 1;
730 pOut
->bitPosition
= static_cast<UINT_32
>((address
& 1) << 2);
733 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->cMaskFlags
.pipeAligned
,
736 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
738 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
745 ************************************************************************************************************************
746 * Gfx9Lib::HwlComputeHtileAddrFromCoord
749 * Interface function stub of AddrComputeHtileAddrFromCoord
753 ************************************************************************************************************************
755 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileAddrFromCoord(
756 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
757 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
760 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
762 if (pIn
->numMipLevels
> 1)
764 returnCode
= ADDR_NOTIMPLEMENTED
;
768 ADDR2_COMPUTE_HTILE_INFO_INPUT input
= {0};
769 input
.size
= sizeof(input
);
770 input
.hTileFlags
= pIn
->hTileFlags
;
771 input
.depthFlags
= pIn
->depthflags
;
772 input
.swizzleMode
= pIn
->swizzleMode
;
773 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
774 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
775 input
.numSlices
= Max(pIn
->numSlices
, 1u);
776 input
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
778 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output
= {0};
779 output
.size
= sizeof(output
);
781 returnCode
= ComputeHtileInfo(&input
, &output
);
783 if (returnCode
== ADDR_OK
)
785 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
786 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
787 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
788 UINT_32 numSamplesLog2
= Log2(pIn
->numSamples
);
792 GetMetaEquation(&metaEq
, 0, elementBytesLog2
, numSamplesLog2
, pIn
->hTileFlags
,
793 Gfx9DataDepthStencil
, pIn
->swizzleMode
, ADDR_RSRC_TEX_2D
,
794 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0);
796 UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
797 UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
798 UINT_32 zb
= pIn
->slice
;
800 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
801 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
802 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
804 UINT_64 address
= metaEq
.solve(pIn
->x
, pIn
->y
, pIn
->slice
, 0, blockIndex
);
806 pOut
->addr
= address
>> 1;
808 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
811 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
813 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
821 ************************************************************************************************************************
822 * Gfx9Lib::HwlComputeHtileCoordFromAddr
825 * Interface function stub of AddrComputeHtileCoordFromAddr
829 ************************************************************************************************************************
831 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileCoordFromAddr(
832 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT
* pIn
, ///< [in] input structure
833 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
* pOut
///< [out] output structure
836 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
838 if (pIn
->numMipLevels
> 1)
840 returnCode
= ADDR_NOTIMPLEMENTED
;
844 ADDR2_COMPUTE_HTILE_INFO_INPUT input
= {0};
845 input
.size
= sizeof(input
);
846 input
.hTileFlags
= pIn
->hTileFlags
;
847 input
.swizzleMode
= pIn
->swizzleMode
;
848 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
849 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
850 input
.numSlices
= Max(pIn
->numSlices
, 1u);
851 input
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
853 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output
= {0};
854 output
.size
= sizeof(output
);
856 returnCode
= ComputeHtileInfo(&input
, &output
);
858 if (returnCode
== ADDR_OK
)
860 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
861 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
862 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
863 UINT_32 numSamplesLog2
= Log2(pIn
->numSamples
);
867 GetMetaEquation(&metaEq
, 0, elementBytesLog2
, numSamplesLog2
, pIn
->hTileFlags
,
868 Gfx9DataDepthStencil
, pIn
->swizzleMode
, ADDR_RSRC_TEX_2D
,
869 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0);
871 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
874 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
876 UINT_64 nibbleAddress
= (pIn
->addr
^ (pipeXor
<< m_pipeInterleaveLog2
)) << 1;
878 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
879 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
881 UINT_32 x
, y
, z
, s
, m
;
882 metaEq
.solveAddr(nibbleAddress
, sliceSizeInBlock
, x
, y
, z
, s
, m
);
884 pOut
->slice
= m
/ sliceSizeInBlock
;
885 pOut
->y
= ((m
% sliceSizeInBlock
) / pitchInBlock
) * output
.metaBlkHeight
+ y
;
886 pOut
->x
= (m
% pitchInBlock
) * output
.metaBlkWidth
+ x
;
894 ************************************************************************************************************************
895 * Gfx9Lib::HwlComputeDccAddrFromCoord
898 * Interface function stub of AddrComputeDccAddrFromCoord
902 ************************************************************************************************************************
904 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeDccAddrFromCoord(
905 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
* pIn
,
906 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT
* pOut
) const
908 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
910 if ((pIn
->numMipLevels
> 1) || (pIn
->mipId
> 1) || pIn
->dccKeyFlags
.linear
)
912 returnCode
= ADDR_NOTIMPLEMENTED
;
916 ADDR2_COMPUTE_DCCINFO_INPUT input
= {0};
917 input
.size
= sizeof(input
);
918 input
.dccKeyFlags
= pIn
->dccKeyFlags
;
919 input
.colorFlags
= pIn
->colorFlags
;
920 input
.swizzleMode
= pIn
->swizzleMode
;
921 input
.resourceType
= pIn
->resourceType
;
922 input
.bpp
= pIn
->bpp
;
923 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
924 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
925 input
.numSlices
= Max(pIn
->numSlices
, 1u);
926 input
.numFrags
= Max(pIn
->numFrags
, 1u);
927 input
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
929 ADDR2_COMPUTE_DCCINFO_OUTPUT output
= {0};
930 output
.size
= sizeof(output
);
932 returnCode
= ComputeDccInfo(&input
, &output
);
934 if (returnCode
== ADDR_OK
)
936 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
937 UINT_32 numSamplesLog2
= Log2(pIn
->numFrags
);
938 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
939 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
940 UINT_32 metaBlkDepthLog2
= Log2(output
.metaBlkDepth
);
941 UINT_32 compBlkWidthLog2
= Log2(output
.compressBlkWidth
);
942 UINT_32 compBlkHeightLog2
= Log2(output
.compressBlkHeight
);
943 UINT_32 compBlkDepthLog2
= Log2(output
.compressBlkDepth
);
947 GetMetaEquation(&metaEq
, pIn
->mipId
, elementBytesLog2
, numSamplesLog2
, pIn
->dccKeyFlags
,
948 Gfx9DataColor
, pIn
->swizzleMode
, pIn
->resourceType
,
949 metaBlkWidthLog2
, metaBlkHeightLog2
, metaBlkDepthLog2
,
950 compBlkWidthLog2
, compBlkHeightLog2
, compBlkDepthLog2
);
952 UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
953 UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
954 UINT_32 zb
= pIn
->slice
/ output
.metaBlkDepth
;
956 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
957 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
958 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
960 UINT_64 address
= metaEq
.solve(pIn
->x
, pIn
->y
, pIn
->slice
, pIn
->sample
, blockIndex
);
962 pOut
->addr
= address
>> 1;
964 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->dccKeyFlags
.pipeAligned
,
967 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
969 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
977 ************************************************************************************************************************
978 * Gfx9Lib::HwlInitGlobalParams
981 * Initializes global parameters
984 * TRUE if all settings are valid
986 ************************************************************************************************************************
988 BOOL_32
Gfx9Lib::HwlInitGlobalParams(
989 const ADDR_CREATE_INPUT
* pCreateIn
) ///< [in] create input
991 BOOL_32 valid
= TRUE
;
993 if (m_settings
.isArcticIsland
)
995 GB_ADDR_CONFIG gbAddrConfig
;
997 gbAddrConfig
.u32All
= pCreateIn
->regValue
.gbAddrConfig
;
999 // These values are copied from CModel code
1000 switch (gbAddrConfig
.bits
.NUM_PIPES
)
1002 case ADDR_CONFIG_1_PIPE
:
1006 case ADDR_CONFIG_2_PIPE
:
1010 case ADDR_CONFIG_4_PIPE
:
1014 case ADDR_CONFIG_8_PIPE
:
1018 case ADDR_CONFIG_16_PIPE
:
1022 case ADDR_CONFIG_32_PIPE
:
1027 ADDR_ASSERT_ALWAYS();
1031 switch (gbAddrConfig
.bits
.PIPE_INTERLEAVE_SIZE
)
1033 case ADDR_CONFIG_PIPE_INTERLEAVE_256B
:
1034 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_256B
;
1035 m_pipeInterleaveLog2
= 8;
1037 case ADDR_CONFIG_PIPE_INTERLEAVE_512B
:
1038 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_512B
;
1039 m_pipeInterleaveLog2
= 9;
1041 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB
:
1042 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_1KB
;
1043 m_pipeInterleaveLog2
= 10;
1045 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB
:
1046 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_2KB
;
1047 m_pipeInterleaveLog2
= 11;
1050 ADDR_ASSERT_ALWAYS();
1054 switch (gbAddrConfig
.bits
.NUM_BANKS
)
1056 case ADDR_CONFIG_1_BANK
:
1060 case ADDR_CONFIG_2_BANK
:
1064 case ADDR_CONFIG_4_BANK
:
1068 case ADDR_CONFIG_8_BANK
:
1072 case ADDR_CONFIG_16_BANK
:
1077 ADDR_ASSERT_ALWAYS();
1081 switch (gbAddrConfig
.bits
.NUM_SHADER_ENGINES
)
1083 case ADDR_CONFIG_1_SHADER_ENGINE
:
1087 case ADDR_CONFIG_2_SHADER_ENGINE
:
1091 case ADDR_CONFIG_4_SHADER_ENGINE
:
1095 case ADDR_CONFIG_8_SHADER_ENGINE
:
1100 ADDR_ASSERT_ALWAYS();
1104 switch (gbAddrConfig
.bits
.NUM_RB_PER_SE
)
1106 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE
:
1110 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE
:
1114 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE
:
1119 ADDR_ASSERT_ALWAYS();
1123 switch (gbAddrConfig
.bits
.MAX_COMPRESSED_FRAGS
)
1125 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS
:
1127 m_maxCompFragLog2
= 0;
1129 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS
:
1131 m_maxCompFragLog2
= 1;
1133 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS
:
1135 m_maxCompFragLog2
= 2;
1137 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS
:
1139 m_maxCompFragLog2
= 3;
1142 ADDR_ASSERT_ALWAYS();
1146 m_blockVarSizeLog2
= pCreateIn
->regValue
.blockVarSizeLog2
;
1147 ADDR_ASSERT((m_blockVarSizeLog2
== 0) ||
1148 ((m_blockVarSizeLog2
>= 17u) && (m_blockVarSizeLog2
<= 20u)));
1149 m_blockVarSizeLog2
= Min(Max(17u, m_blockVarSizeLog2
), 20u);
1154 ADDR_NOT_IMPLEMENTED();
1159 InitEquationTable();
1166 ************************************************************************************************************************
1167 * Gfx9Lib::HwlConvertChipFamily
1170 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1173 ************************************************************************************************************************
1175 ChipFamily
Gfx9Lib::HwlConvertChipFamily(
1176 UINT_32 uChipFamily
, ///< [in] chip family defined in atiih.h
1177 UINT_32 uChipRevision
) ///< [in] chip revision defined in "asic_family"_id.h
1179 ChipFamily family
= ADDR_CHIP_FAMILY_AI
;
1181 switch (uChipFamily
)
1184 m_settings
.isArcticIsland
= 1;
1185 m_settings
.isVega10
= ASICREV_IS_VEGA10_P(uChipRevision
);
1187 if (m_settings
.isVega10
)
1189 m_settings
.isDce12
= 1;
1192 m_settings
.metaBaseAlignFix
= 1;
1194 m_settings
.depthPipeXorDisable
= 1;
1198 m_settings
.isArcticIsland
= 1;
1199 m_settings
.isRaven
= ASICREV_IS_RAVEN(uChipRevision
);
1201 if (m_settings
.isRaven
)
1203 m_settings
.isDcn1
= 1;
1206 m_settings
.metaBaseAlignFix
= 1;
1208 m_settings
.depthPipeXorDisable
= 1;
1212 ADDR_ASSERT(!"This should be a Fusion");
1220 ************************************************************************************************************************
1221 * Gfx9Lib::InitRbEquation
1227 ************************************************************************************************************************
1229 VOID
Gfx9Lib::GetRbEquation(
1230 CoordEq
* pRbEq
, ///< [out] rb equation
1231 UINT_32 numRbPerSeLog2
, ///< [in] number of rb per shader engine
1232 UINT_32 numSeLog2
) ///< [in] number of shader engine
1234 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1235 UINT_32 rbRegion
= (numRbPerSeLog2
== 0) ? 5 : 4;
1236 Coordinate
cx('x', rbRegion
);
1237 Coordinate
cy('y', rbRegion
);
1240 UINT_32 numRbTotalLog2
= numRbPerSeLog2
+ numSeLog2
;
1242 // Clear the rb equation
1244 pRbEq
->resize(numRbTotalLog2
);
1246 if ((numSeLog2
> 0) && (numRbPerSeLog2
== 1))
1248 // Special case when more than 1 SE, and 2 RB per SE
1249 (*pRbEq
)[0].add(cx
);
1250 (*pRbEq
)[0].add(cy
);
1253 (*pRbEq
)[0].add(cy
);
1257 UINT_32 numBits
= 2 * (numRbTotalLog2
- start
);
1259 for (UINT_32 i
= 0; i
< numBits
; i
++)
1262 start
+ (((start
+ i
) >= numRbTotalLog2
) ? (2 * (numRbTotalLog2
- start
) - i
- 1) : i
);
1266 (*pRbEq
)[idx
].add(cx
);
1271 (*pRbEq
)[idx
].add(cy
);
1278 ************************************************************************************************************************
1279 * Gfx9Lib::GetDataEquation
1282 * Get data equation for fmask and Z
1285 ************************************************************************************************************************
1287 VOID
Gfx9Lib::GetDataEquation(
1288 CoordEq
* pDataEq
, ///< [out] data surface equation
1289 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1290 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1291 AddrResourceType resourceType
, ///< [in] data surface resource type
1292 UINT_32 elementBytesLog2
, ///< [in] data surface element bytes
1293 UINT_32 numSamplesLog2
) ///< [in] data surface sample count
1296 Coordinate
cx('x', 0);
1297 Coordinate
cy('y', 0);
1298 Coordinate
cz('z', 0);
1299 Coordinate
cs('s', 0);
1301 // Clear the equation
1303 pDataEq
->resize(27);
1305 if (dataSurfaceType
== Gfx9DataColor
)
1307 if (IsLinear(swizzleMode
))
1309 Coordinate
cm('m', 0);
1311 pDataEq
->resize(49);
1313 for (UINT_32 i
= 0; i
< 49; i
++)
1315 (*pDataEq
)[i
].add(cm
);
1319 else if (IsThick(resourceType
, swizzleMode
))
1321 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1323 if (IsStandardSwizzle(resourceType
, swizzleMode
))
1325 // Standard 3d swizzle
1326 // Fill in bottom x bits
1327 for (i
= elementBytesLog2
; i
< 4; i
++)
1329 (*pDataEq
)[i
].add(cx
);
1332 // Fill in 2 bits of y and then z
1333 for (i
= 4; i
< 6; i
++)
1335 (*pDataEq
)[i
].add(cy
);
1338 for (i
= 6; i
< 8; i
++)
1340 (*pDataEq
)[i
].add(cz
);
1343 if (elementBytesLog2
< 2)
1345 // fill in z & y bit
1346 (*pDataEq
)[8].add(cz
);
1347 (*pDataEq
)[9].add(cy
);
1351 else if (elementBytesLog2
== 2)
1353 // fill in y and x bit
1354 (*pDataEq
)[8].add(cy
);
1355 (*pDataEq
)[9].add(cx
);
1362 (*pDataEq
)[8].add(cx
);
1364 (*pDataEq
)[9].add(cx
);
1371 UINT_32 m2dEnd
= (elementBytesLog2
==0) ? 3 : ((elementBytesLog2
< 4) ? 4 : 5);
1372 UINT_32 numZs
= (elementBytesLog2
== 0 || elementBytesLog2
== 4) ?
1373 2 : ((elementBytesLog2
== 1) ? 3 : 1);
1374 pDataEq
->mort2d(cx
, cy
, elementBytesLog2
, m2dEnd
);
1375 for (i
= m2dEnd
+ 1; i
<= m2dEnd
+ numZs
; i
++)
1377 (*pDataEq
)[i
].add(cz
);
1380 if ((elementBytesLog2
== 0) || (elementBytesLog2
== 3))
1383 (*pDataEq
)[6].add(cx
);
1384 (*pDataEq
)[7].add(cz
);
1388 else if (elementBytesLog2
== 2)
1391 (*pDataEq
)[6].add(cy
);
1392 (*pDataEq
)[7].add(cz
);
1397 (*pDataEq
)[8].add(cy
);
1398 (*pDataEq
)[9].add(cx
);
1402 // Fill in bit 10 and up
1403 pDataEq
->mort3d( cz
, cy
, cx
, 10 );
1405 else if (IsThin(resourceType
, swizzleMode
))
1407 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swizzleMode
);
1409 UINT_32 microYBits
= (8 - elementBytesLog2
) / 2;
1410 UINT_32 tileSplitStart
= blockSizeLog2
- numSamplesLog2
;
1412 // Fill in bottom x bits
1413 for (i
= elementBytesLog2
; i
< 4; i
++)
1415 (*pDataEq
)[i
].add(cx
);
1418 // Fill in bottom y bits
1419 for (i
= 4; i
< 4 + microYBits
; i
++)
1421 (*pDataEq
)[i
].add(cy
);
1424 // Fill in last of the micro_x bits
1425 for (i
= 4 + microYBits
; i
< 8; i
++)
1427 (*pDataEq
)[i
].add(cx
);
1430 // Fill in x/y bits below sample split
1431 pDataEq
->mort2d(cy
, cx
, 8, tileSplitStart
- 1);
1432 // Fill in sample bits
1433 for (i
= 0; i
< numSamplesLog2
; i
++)
1436 (*pDataEq
)[tileSplitStart
+ i
].add(cs
);
1438 // Fill in x/y bits above sample split
1439 if ((numSamplesLog2
& 1) ^ (blockSizeLog2
& 1))
1441 pDataEq
->mort2d(cx
, cy
, blockSizeLog2
);
1445 pDataEq
->mort2d(cy
, cx
, blockSizeLog2
);
1450 ADDR_ASSERT_ALWAYS();
1456 UINT_32 sampleStart
= elementBytesLog2
;
1457 UINT_32 pixelStart
= elementBytesLog2
+ numSamplesLog2
;
1458 UINT_32 ymajStart
= 6 + numSamplesLog2
;
1460 for (UINT_32 s
= 0; s
< numSamplesLog2
; s
++)
1463 (*pDataEq
)[sampleStart
+ s
].add(cs
);
1466 // Put in the x-major order pixel bits
1467 pDataEq
->mort2d(cx
, cy
, pixelStart
, ymajStart
- 1);
1468 // Put in the y-major order pixel bits
1469 pDataEq
->mort2d(cy
, cx
, ymajStart
);
1474 ************************************************************************************************************************
1475 * Gfx9Lib::GetPipeEquation
1481 ************************************************************************************************************************
1483 VOID
Gfx9Lib::GetPipeEquation(
1484 CoordEq
* pPipeEq
, ///< [out] pipe equation
1485 CoordEq
* pDataEq
, ///< [in] data equation
1486 UINT_32 pipeInterleaveLog2
, ///< [in] pipe interleave
1487 UINT_32 numPipeLog2
, ///< [in] number of pipes
1488 UINT_32 numSamplesLog2
, ///< [in] data surface sample count
1489 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1490 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1491 AddrResourceType resourceType
///< [in] data surface resource type
1494 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swizzleMode
);
1497 pDataEq
->copy(dataEq
);
1499 if (dataSurfaceType
== Gfx9DataColor
)
1501 INT_32 shift
= static_cast<INT_32
>(numSamplesLog2
);
1502 dataEq
.shift(-shift
, blockSizeLog2
- numSamplesLog2
);
1505 dataEq
.copy(*pPipeEq
, pipeInterleaveLog2
, numPipeLog2
);
1507 // This section should only apply to z/stencil, maybe fmask
1508 // If the pipe bit is below the comp block size,
1509 // then keep moving up the address until we find a bit that is above
1510 UINT_32 pipeStart
= 0;
1512 if (dataSurfaceType
!= Gfx9DataColor
)
1514 Coordinate
tileMin('x', 3);
1516 while (dataEq
[pipeInterleaveLog2
+ pipeStart
][0] < tileMin
)
1521 // if pipe is 0, then the first pipe bit is above the comp block size,
1522 // so we don't need to do anything
1523 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1524 // we will get the same pipe equation
1527 for (UINT_32 i
= 0; i
< numPipeLog2
; i
++)
1529 // Copy the jth bit above pipe interleave to the current pipe equation bit
1530 dataEq
[pipeInterleaveLog2
+ pipeStart
+ i
].copyto((*pPipeEq
)[i
]);
1535 if (IsPrt(swizzleMode
))
1537 // Clear out bits above the block size if prt's are enabled
1538 dataEq
.resize(blockSizeLog2
);
1542 if (IsXor(swizzleMode
))
1546 if (IsThick(resourceType
, swizzleMode
))
1550 dataEq
.copy(xorMask2
, pipeInterleaveLog2
+ numPipeLog2
, 2 * numPipeLog2
);
1552 xorMask
.resize(numPipeLog2
);
1554 for (UINT_32 pipeIdx
= 0; pipeIdx
< numPipeLog2
; pipeIdx
++)
1556 xorMask
[pipeIdx
].add(xorMask2
[2 * pipeIdx
]);
1557 xorMask
[pipeIdx
].add(xorMask2
[2 * pipeIdx
+ 1]);
1562 // Xor in the bits above the pipe+gpu bits
1563 dataEq
.copy(xorMask
, pipeInterleaveLog2
+ pipeStart
+ numPipeLog2
, numPipeLog2
);
1565 if ((numSamplesLog2
== 0) && (IsPrt(swizzleMode
) == FALSE
))
1569 // if 1xaa and not prt, then xor in the z bits
1571 xorMask2
.resize(numPipeLog2
);
1572 for (UINT_32 pipeIdx
= 0; pipeIdx
< numPipeLog2
; pipeIdx
++)
1574 co
.set('z', numPipeLog2
- 1 - pipeIdx
);
1575 xorMask2
[pipeIdx
].add(co
);
1578 pPipeEq
->xorin(xorMask2
);
1583 pPipeEq
->xorin(xorMask
);
1588 ************************************************************************************************************************
1589 * Gfx9Lib::GetMetaEquation
1592 * Get meta equation for cmask/htile/DCC
1595 ************************************************************************************************************************
1597 VOID
Gfx9Lib::GetMetaEquation(
1598 CoordEq
* pMetaEq
, ///< [out] meta equation
1599 UINT_32 maxMip
, ///< [in] max mip Id
1600 UINT_32 elementBytesLog2
, ///< [in] data surface element bytes
1601 UINT_32 numSamplesLog2
, ///< [in] data surface sample count
1602 ADDR2_META_FLAGS metaFlag
, ///< [in] meta falg
1603 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1604 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1605 AddrResourceType resourceType
, ///< [in] data surface resource type
1606 UINT_32 metaBlkWidthLog2
, ///< [in] meta block width
1607 UINT_32 metaBlkHeightLog2
, ///< [in] meta block height
1608 UINT_32 metaBlkDepthLog2
, ///< [in] meta block depth
1609 UINT_32 compBlkWidthLog2
, ///< [in] compress block width
1610 UINT_32 compBlkHeightLog2
, ///< [in] compress block height
1611 UINT_32 compBlkDepthLog2
) ///< [in] compress block depth
1614 UINT_32 numPipeTotalLog2
= GetPipeLog2ForMetaAddressing(metaFlag
.pipeAligned
, swizzleMode
);
1615 UINT_32 pipeInterleaveLog2
= m_pipeInterleaveLog2
;
1616 //UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1618 // Get the correct data address and rb equation
1620 GetDataEquation(&dataEq
, dataSurfaceType
, swizzleMode
, resourceType
,
1621 elementBytesLog2
, numSamplesLog2
);
1623 // Get pipe and rb equations
1624 CoordEq pipeEquation
;
1625 GetPipeEquation(&pipeEquation
, &dataEq
, pipeInterleaveLog2
, numPipeTotalLog2
,
1626 numSamplesLog2
, dataSurfaceType
, swizzleMode
, resourceType
);
1627 numPipeTotalLog2
= pipeEquation
.getsize();
1629 if (metaFlag
.linear
)
1631 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1632 ADDR_ASSERT_ALWAYS();
1634 ADDR_ASSERT(dataSurfaceType
== Gfx9DataColor
);
1636 dataEq
.copy(*pMetaEq
);
1638 if (IsLinear(swizzleMode
))
1640 if (metaFlag
.pipeAligned
)
1642 // Remove the pipe bits
1643 INT_32 shift
= static_cast<INT_32
>(numPipeTotalLog2
);
1644 pMetaEq
->shift(-shift
, pipeInterleaveLog2
);
1646 // Divide by comp block size, which for linear (which is always color) is 256 B
1649 if (metaFlag
.pipeAligned
)
1651 // Put pipe bits back in
1652 pMetaEq
->shift(numPipeTotalLog2
, pipeInterleaveLog2
);
1654 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1656 pipeEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+ i
]);
1665 UINT_32 maxCompFragLog2
= static_cast<INT_32
>(m_maxCompFragLog2
);
1666 UINT_32 compFragLog2
=
1667 ((dataSurfaceType
== Gfx9DataColor
) && (numSamplesLog2
> maxCompFragLog2
)) ?
1668 maxCompFragLog2
: numSamplesLog2
;
1670 UINT_32 uncompFragLog2
= numSamplesLog2
- compFragLog2
;
1672 // Make sure the metaaddr is cleared
1674 pMetaEq
->resize(27);
1676 if (IsThick(resourceType
, swizzleMode
))
1678 Coordinate
cx('x', 0);
1679 Coordinate
cy('y', 0);
1680 Coordinate
cz('z', 0);
1684 pMetaEq
->mort3d(cy
, cx
, cz
);
1688 pMetaEq
->mort3d(cx
, cy
, cz
);
1693 Coordinate
cx('x', 0);
1694 Coordinate
cy('y', 0);
1699 pMetaEq
->mort2d(cy
, cx
, compFragLog2
);
1703 pMetaEq
->mort2d(cx
, cy
, compFragLog2
);
1706 //------------------------------------------------------------------------------------------------------------------------
1707 // Put the compressible fragments at the lsb
1708 // the uncompressible frags will be at the msb of the micro address
1709 //------------------------------------------------------------------------------------------------------------------------
1710 for (UINT_32 s
= 0; s
< compFragLog2
; s
++)
1713 (*pMetaEq
)[s
].add(cs
);
1717 // Keep a copy of the pipe equations
1718 CoordEq origPipeEquation
;
1719 pipeEquation
.copy(origPipeEquation
);
1722 // filter out everything under the compressed block size
1723 co
.set('x', compBlkWidthLog2
);
1724 pMetaEq
->Filter('<', co
, 0, 'x');
1725 co
.set('y', compBlkHeightLog2
);
1726 pMetaEq
->Filter('<', co
, 0, 'y');
1727 co
.set('z', compBlkDepthLog2
);
1728 pMetaEq
->Filter('<', co
, 0, 'z');
1730 // For non-color, filter out sample bits
1731 if (dataSurfaceType
!= Gfx9DataColor
)
1734 pMetaEq
->Filter('<', co
, 0, 's');
1737 // filter out everything above the metablock size
1738 co
.set('x', metaBlkWidthLog2
- 1);
1739 pMetaEq
->Filter('>', co
, 0, 'x');
1740 co
.set('y', metaBlkHeightLog2
- 1);
1741 pMetaEq
->Filter('>', co
, 0, 'y');
1742 co
.set('z', metaBlkDepthLog2
- 1);
1743 pMetaEq
->Filter('>', co
, 0, 'z');
1745 // filter out everything above the metablock size for the channel bits
1746 co
.set('x', metaBlkWidthLog2
- 1);
1747 pipeEquation
.Filter('>', co
, 0, 'x');
1748 co
.set('y', metaBlkHeightLog2
- 1);
1749 pipeEquation
.Filter('>', co
, 0, 'y');
1750 co
.set('z', metaBlkDepthLog2
- 1);
1751 pipeEquation
.Filter('>', co
, 0, 'z');
1753 // Make sure we still have the same number of channel bits
1754 if (pipeEquation
.getsize() != numPipeTotalLog2
)
1756 ADDR_ASSERT_ALWAYS();
1759 // Loop through all channel and rb bits,
1760 // and make sure these components exist in the metadata address
1761 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1763 for (UINT_32 j
= pipeEquation
[i
].getsize(); j
> 0; j
--)
1765 if (pMetaEq
->Exists(pipeEquation
[i
][j
- 1]) == FALSE
)
1767 ADDR_ASSERT_ALWAYS();
1772 UINT_32 numSeLog2
= metaFlag
.rbAligned
? m_seLog2
: 0;
1773 UINT_32 numRbPeSeLog2
= metaFlag
.rbAligned
? m_rbPerSeLog2
: 0;
1774 CoordEq origRbEquation
;
1776 GetRbEquation(&origRbEquation
, numRbPeSeLog2
, numSeLog2
);
1778 CoordEq rbEquation
= origRbEquation
;
1780 UINT_32 numRbTotalLog2
= numRbPeSeLog2
+ numSeLog2
;
1782 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
1784 for (UINT_32 j
= rbEquation
[i
].getsize(); j
> 0; j
--)
1786 if (pMetaEq
->Exists(rbEquation
[i
][j
- 1]) == FALSE
)
1788 ADDR_ASSERT_ALWAYS();
1793 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1794 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
1796 for (UINT_32 j
= 0; j
< numPipeTotalLog2
; j
++)
1798 if (rbEquation
[i
] == pipeEquation
[j
])
1800 rbEquation
[i
].Clear();
1805 // Loop through each bit of the channel, get the smallest coordinate,
1806 // and remove it from the metaaddr, and rb_equation
1807 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1809 pipeEquation
[i
].getsmallest(co
);
1811 UINT_32 old_size
= pMetaEq
->getsize();
1812 pMetaEq
->Filter('=', co
);
1813 UINT_32 new_size
= pMetaEq
->getsize();
1814 if (new_size
!= old_size
-1)
1816 ADDR_ASSERT_ALWAYS();
1818 pipeEquation
.remove(co
);
1819 for (UINT_32 j
= 0; j
< numRbTotalLog2
; j
++)
1821 if (rbEquation
[j
].remove(co
))
1823 // if we actually removed something from this bit, then add the remaining
1824 // channel bits, as these can be removed for this bit
1825 for (UINT_32 k
= 0; k
< pipeEquation
[i
].getsize(); k
++)
1827 if (pipeEquation
[i
][k
] != co
)
1829 rbEquation
[j
].add(pipeEquation
[i
][k
]);
1836 // Loop through the rb bits and see what remain;
1837 // filter out the smallest coordinate if it remains
1838 UINT_32 rbBitsLeft
= 0;
1839 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
1841 if (rbEquation
[i
].getsize() > 0)
1844 rbEquation
[i
].getsmallest(co
);
1845 UINT_32 old_size
= pMetaEq
->getsize();
1846 pMetaEq
->Filter('=', co
);
1847 UINT_32 new_size
= pMetaEq
->getsize();
1848 if (new_size
!= old_size
- 1)
1852 for (UINT_32 j
= i
+ 1; j
< numRbTotalLog2
; j
++)
1854 if (rbEquation
[j
].remove(co
))
1856 // if we actually removed something from this bit, then add the remaining
1857 // rb bits, as these can be removed for this bit
1858 for (UINT_32 k
= 0; k
< rbEquation
[i
].getsize(); k
++)
1860 if (rbEquation
[i
][k
] != co
)
1862 rbEquation
[j
].add(rbEquation
[i
][k
]);
1870 // capture the size of the metaaddr
1871 UINT_32 metaSize
= pMetaEq
->getsize();
1872 // resize to 49 bits...make this a nibble address
1873 pMetaEq
->resize(49);
1874 // Concatenate the macro address above the current address
1875 for (UINT_32 i
= metaSize
, j
= 0; i
< 49; i
++, j
++)
1878 (*pMetaEq
)[i
].add(co
);
1881 // Multiply by meta element size (in nibbles)
1882 if (dataSurfaceType
== Gfx9DataColor
)
1886 else if (dataSurfaceType
== Gfx9DataDepthStencil
)
1891 //------------------------------------------------------------------------------------------
1892 // Note the pipeInterleaveLog2+1 is because address is a nibble address
1893 // Shift up from pipe interleave number of channel
1894 // and rb bits left, and uncompressed fragments
1895 //------------------------------------------------------------------------------------------
1897 pMetaEq
->shift(numPipeTotalLog2
+ rbBitsLeft
+ uncompFragLog2
, pipeInterleaveLog2
+ 1);
1899 // Put in the channel bits
1900 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1902 origPipeEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+1 + i
]);
1905 // Put in remaining rb bits
1906 for (UINT_32 i
= 0, j
= 0; j
< rbBitsLeft
; i
= (i
+ 1) % numRbTotalLog2
)
1908 if (rbEquation
[i
].getsize() > 0)
1910 origRbEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+ 1 + numPipeTotalLog2
+ j
]);
1911 // Mark any rb bit we add in to the rb mask
1916 //------------------------------------------------------------------------------------------
1917 // Put in the uncompressed fragment bits
1918 //------------------------------------------------------------------------------------------
1919 for (UINT_32 i
= 0; i
< uncompFragLog2
; i
++)
1921 co
.set('s', compFragLog2
+ i
);
1922 (*pMetaEq
)[pipeInterleaveLog2
+ 1 + numPipeTotalLog2
+ rbBitsLeft
+ i
].add(co
);
1928 ************************************************************************************************************************
1929 * Gfx9Lib::IsEquationSupported
1932 * Check if equation is supported for given swizzle mode and resource type.
1936 ************************************************************************************************************************
1938 BOOL_32
Gfx9Lib::IsEquationSupported(
1939 AddrResourceType rsrcType
,
1940 AddrSwizzleMode swMode
,
1941 UINT_32 elementBytesLog2
) const
1943 BOOL_32 supported
= (elementBytesLog2
< MaxElementBytesLog2
) &&
1944 (IsLinear(swMode
) == FALSE
) &&
1945 (((IsTex2d(rsrcType
) == TRUE
) &&
1946 ((elementBytesLog2
< 4) ||
1947 ((IsRotateSwizzle(swMode
) == FALSE
) &&
1948 (IsZOrderSwizzle(swMode
) == FALSE
)))) ||
1949 ((IsTex3d(rsrcType
) == TRUE
) &&
1950 (IsRotateSwizzle(swMode
) == FALSE
) &&
1951 (IsBlock256b(swMode
) == FALSE
)));
1957 ************************************************************************************************************************
1958 * Gfx9Lib::InitEquationTable
1961 * Initialize Equation table.
1965 ************************************************************************************************************************
1967 VOID
Gfx9Lib::InitEquationTable()
1969 memset(m_equationTable
, 0, sizeof(m_equationTable
));
1971 // Loop all possible resource type (2D/3D)
1972 for (UINT_32 rsrcTypeIdx
= 0; rsrcTypeIdx
< MaxRsrcType
; rsrcTypeIdx
++)
1974 AddrResourceType rsrcType
= static_cast<AddrResourceType
>(rsrcTypeIdx
+ ADDR_RSRC_TEX_2D
);
1976 // Loop all possible swizzle mode
1977 for (UINT_32 swModeIdx
= 0; swModeIdx
< MaxSwMode
; swModeIdx
++)
1979 AddrSwizzleMode swMode
= static_cast<AddrSwizzleMode
>(swModeIdx
);
1981 // Loop all possible bpp
1982 for (UINT_32 bppIdx
= 0; bppIdx
< MaxElementBytesLog2
; bppIdx
++)
1984 UINT_32 equationIndex
= ADDR_INVALID_EQUATION_INDEX
;
1986 // Check if the input is supported
1987 if (IsEquationSupported(rsrcType
, swMode
, bppIdx
))
1989 ADDR_EQUATION equation
;
1990 ADDR_E_RETURNCODE retCode
;
1992 memset(&equation
, 0, sizeof(ADDR_EQUATION
));
1994 // Generate the equation
1995 if (IsBlock256b(swMode
) && IsTex2d(rsrcType
))
1997 retCode
= ComputeBlock256Equation(rsrcType
, swMode
, bppIdx
, &equation
);
1999 else if (IsThin(rsrcType
, swMode
))
2001 retCode
= ComputeThinEquation(rsrcType
, swMode
, bppIdx
, &equation
);
2005 retCode
= ComputeThickEquation(rsrcType
, swMode
, bppIdx
, &equation
);
2008 // Only fill the equation into the table if the return code is ADDR_OK,
2009 // otherwise if the return code is not ADDR_OK, it indicates this is not
2010 // a valid input, we do nothing but just fill invalid equation index
2011 // into the lookup table.
2012 if (retCode
== ADDR_OK
)
2014 equationIndex
= m_numEquations
;
2015 ADDR_ASSERT(equationIndex
< EquationTableSize
);
2017 m_equationTable
[equationIndex
] = equation
;
2023 ADDR_ASSERT_ALWAYS();
2027 // Fill the index into the lookup table, if the combination is not supported
2028 // fill the invalid equation index
2029 m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][bppIdx
] = equationIndex
;
2036 ************************************************************************************************************************
2037 * Gfx9Lib::HwlGetEquationIndex
2040 * Interface function stub of GetEquationIndex
2044 ************************************************************************************************************************
2046 UINT_32
Gfx9Lib::HwlGetEquationIndex(
2047 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
,
2048 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
2051 AddrResourceType rsrcType
= pIn
->resourceType
;
2052 AddrSwizzleMode swMode
= pIn
->swizzleMode
;
2053 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
2054 UINT_32 index
= ADDR_INVALID_EQUATION_INDEX
;
2056 if (IsEquationSupported(rsrcType
, swMode
, elementBytesLog2
))
2058 UINT_32 rsrcTypeIdx
= static_cast<UINT_32
>(rsrcType
) - 1;
2059 UINT_32 swModeIdx
= static_cast<UINT_32
>(swMode
);
2061 index
= m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][elementBytesLog2
];
2064 if (pOut
->pMipInfo
!= NULL
)
2066 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
2068 pOut
->pMipInfo
[i
].equationIndex
= index
;
2076 ************************************************************************************************************************
2077 * Gfx9Lib::HwlComputeBlock256Equation
2080 * Interface function stub of ComputeBlock256Equation
2084 ************************************************************************************************************************
2086 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeBlock256Equation(
2087 AddrResourceType rsrcType
,
2088 AddrSwizzleMode swMode
,
2089 UINT_32 elementBytesLog2
,
2090 ADDR_EQUATION
* pEquation
) const
2092 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2094 pEquation
->numBits
= 8;
2097 for (; i
< elementBytesLog2
; i
++)
2099 InitChannel(1, 0 , i
, &pEquation
->addr
[i
]);
2102 ADDR_CHANNEL_SETTING
* pixelBit
= &pEquation
->addr
[elementBytesLog2
];
2104 const UINT_32 maxBitsUsed
= 4;
2105 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2106 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2108 for (i
= 0; i
< maxBitsUsed
; i
++)
2110 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2111 InitChannel(1, 1, i
, &y
[i
]);
2114 if (IsStandardSwizzle(rsrcType
, swMode
))
2116 switch (elementBytesLog2
)
2159 ADDR_ASSERT_ALWAYS();
2160 ret
= ADDR_INVALIDPARAMS
;
2164 else if (IsDisplaySwizzle(rsrcType
, swMode
))
2166 switch (elementBytesLog2
)
2209 ADDR_ASSERT_ALWAYS();
2210 ret
= ADDR_INVALIDPARAMS
;
2214 else if (IsRotateSwizzle(swMode
))
2216 switch (elementBytesLog2
)
2253 ADDR_ASSERT_ALWAYS();
2255 ret
= ADDR_INVALIDPARAMS
;
2261 ADDR_ASSERT_ALWAYS();
2262 ret
= ADDR_INVALIDPARAMS
;
2268 Dim2d microBlockDim
= Block256_2d
[elementBytesLog2
];
2269 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation
->addr
, 8, 0)) ==
2270 (microBlockDim
.w
* (1 << elementBytesLog2
)));
2271 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation
->addr
, 8, 1)) == microBlockDim
.h
);
2278 ************************************************************************************************************************
2279 * Gfx9Lib::HwlComputeThinEquation
2282 * Interface function stub of ComputeThinEquation
2286 ************************************************************************************************************************
2288 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeThinEquation(
2289 AddrResourceType rsrcType
,
2290 AddrSwizzleMode swMode
,
2291 UINT_32 elementBytesLog2
,
2292 ADDR_EQUATION
* pEquation
) const
2294 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2296 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swMode
);
2298 UINT_32 maxXorBits
= blockSizeLog2
;
2299 if (IsNonPrtXor(swMode
))
2301 // For non-prt-xor, maybe need to initialize some more bits for xor
2302 // The highest xor bit used in equation will be max the following 3 items:
2303 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2304 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2307 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+ 2 * GetPipeXorBits(blockSizeLog2
));
2308 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+
2309 GetPipeXorBits(blockSizeLog2
) +
2310 2 * GetBankXorBits(blockSizeLog2
));
2313 const UINT_32 maxBitsUsed
= 14;
2314 ADDR_ASSERT((2 * maxBitsUsed
) >= maxXorBits
);
2315 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2316 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2318 const UINT_32 extraXorBits
= 16;
2319 ADDR_ASSERT(extraXorBits
>= maxXorBits
- blockSizeLog2
);
2320 ADDR_CHANNEL_SETTING xorExtra
[extraXorBits
] = {};
2322 for (UINT_32 i
= 0; i
< maxBitsUsed
; i
++)
2324 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2325 InitChannel(1, 1, i
, &y
[i
]);
2328 ADDR_CHANNEL_SETTING
* pixelBit
= pEquation
->addr
;
2330 for (UINT_32 i
= 0; i
< elementBytesLog2
; i
++)
2332 InitChannel(1, 0 , i
, &pixelBit
[i
]);
2337 UINT_32 lowBits
= 0;
2339 if (IsZOrderSwizzle(swMode
))
2341 if (elementBytesLog2
<= 3)
2343 for (UINT_32 i
= elementBytesLog2
; i
< 6; i
++)
2345 pixelBit
[i
] = (((i
- elementBytesLog2
) & 1) == 0) ? x
[xIdx
++] : y
[yIdx
++];
2352 ret
= ADDR_INVALIDPARAMS
;
2357 ret
= HwlComputeBlock256Equation(rsrcType
, swMode
, elementBytesLog2
, pEquation
);
2361 Dim2d microBlockDim
= Block256_2d
[elementBytesLog2
];
2362 xIdx
= Log2(microBlockDim
.w
);
2363 yIdx
= Log2(microBlockDim
.h
);
2370 for (UINT_32 i
= lowBits
; i
< blockSizeLog2
; i
++)
2372 pixelBit
[i
] = ((i
& 1) == 0) ? y
[yIdx
++] : x
[xIdx
++];
2375 for (UINT_32 i
= blockSizeLog2
; i
< maxXorBits
; i
++)
2377 xorExtra
[i
- blockSizeLog2
] = ((i
& 1) == 0) ? y
[yIdx
++] : x
[xIdx
++];
2383 UINT_32 pipeStart
= m_pipeInterleaveLog2
;
2384 UINT_32 pipeXorBits
= GetPipeXorBits(blockSizeLog2
);
2386 UINT_32 bankStart
= pipeStart
+ pipeXorBits
;
2387 UINT_32 bankXorBits
= GetBankXorBits(blockSizeLog2
);
2389 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2391 UINT_32 xor1BitPos
= pipeStart
+ 2 * pipeXorBits
- 1 - i
;
2392 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2393 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2395 InitChannel(&pEquation
->xor1
[pipeStart
+ i
], pXor1Src
);
2398 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2400 UINT_32 xor1BitPos
= bankStart
+ 2 * bankXorBits
- 1 - i
;
2401 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2402 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2404 InitChannel(&pEquation
->xor1
[bankStart
+ i
], pXor1Src
);
2407 if (IsPrt(swMode
) == FALSE
)
2409 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2411 InitChannel(1, 2, pipeXorBits
- i
- 1, &pEquation
->xor2
[pipeStart
+ i
]);
2414 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2416 InitChannel(1, 2, bankXorBits
- i
- 1 + pipeXorBits
, &pEquation
->xor2
[bankStart
+ i
]);
2421 pEquation
->numBits
= blockSizeLog2
;
2428 ************************************************************************************************************************
2429 * Gfx9Lib::HwlComputeThickEquation
2432 * Interface function stub of ComputeThickEquation
2436 ************************************************************************************************************************
2438 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeThickEquation(
2439 AddrResourceType rsrcType
,
2440 AddrSwizzleMode swMode
,
2441 UINT_32 elementBytesLog2
,
2442 ADDR_EQUATION
* pEquation
) const
2444 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2446 ADDR_ASSERT(IsTex3d(rsrcType
));
2448 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swMode
);
2450 UINT_32 maxXorBits
= blockSizeLog2
;
2451 if (IsNonPrtXor(swMode
))
2453 // For non-prt-xor, maybe need to initialize some more bits for xor
2454 // The highest xor bit used in equation will be max the following 3:
2455 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2456 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2459 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+ 3 * GetPipeXorBits(blockSizeLog2
));
2460 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+
2461 GetPipeXorBits(blockSizeLog2
) +
2462 3 * GetBankXorBits(blockSizeLog2
));
2465 for (UINT_32 i
= 0; i
< elementBytesLog2
; i
++)
2467 InitChannel(1, 0 , i
, &pEquation
->addr
[i
]);
2470 ADDR_CHANNEL_SETTING
* pixelBit
= &pEquation
->addr
[elementBytesLog2
];
2472 const UINT_32 maxBitsUsed
= 12;
2473 ADDR_ASSERT((3 * maxBitsUsed
) >= maxXorBits
);
2474 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2475 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2476 ADDR_CHANNEL_SETTING z
[maxBitsUsed
] = {};
2478 const UINT_32 extraXorBits
= 24;
2479 ADDR_ASSERT(extraXorBits
>= maxXorBits
- blockSizeLog2
);
2480 ADDR_CHANNEL_SETTING xorExtra
[extraXorBits
] = {};
2482 for (UINT_32 i
= 0; i
< maxBitsUsed
; i
++)
2484 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2485 InitChannel(1, 1, i
, &y
[i
]);
2486 InitChannel(1, 2, i
, &z
[i
]);
2489 if (IsZOrderSwizzle(swMode
))
2491 switch (elementBytesLog2
)
2544 ADDR_ASSERT_ALWAYS();
2545 ret
= ADDR_INVALIDPARAMS
;
2549 else if (IsStandardSwizzle(rsrcType
, swMode
))
2551 switch (elementBytesLog2
)
2604 ADDR_ASSERT_ALWAYS();
2605 ret
= ADDR_INVALIDPARAMS
;
2611 ADDR_ASSERT_ALWAYS();
2612 ret
= ADDR_INVALIDPARAMS
;
2617 Dim3d microBlockDim
= Block1K_3d
[elementBytesLog2
];
2618 UINT_32 xIdx
= Log2(microBlockDim
.w
);
2619 UINT_32 yIdx
= Log2(microBlockDim
.h
);
2620 UINT_32 zIdx
= Log2(microBlockDim
.d
);
2622 pixelBit
= pEquation
->addr
;
2624 const UINT_32 lowBits
= 10;
2625 ADDR_ASSERT(pEquation
->addr
[lowBits
- 1].valid
== 1);
2626 ADDR_ASSERT(pEquation
->addr
[lowBits
].valid
== 0);
2628 for (UINT_32 i
= lowBits
; i
< blockSizeLog2
; i
++)
2632 pixelBit
[i
] = x
[xIdx
++];
2634 else if ((i
% 3) == 1)
2636 pixelBit
[i
] = z
[zIdx
++];
2640 pixelBit
[i
] = y
[yIdx
++];
2644 for (UINT_32 i
= blockSizeLog2
; i
< maxXorBits
; i
++)
2648 xorExtra
[i
- blockSizeLog2
] = x
[xIdx
++];
2650 else if ((i
% 3) == 1)
2652 xorExtra
[i
- blockSizeLog2
] = z
[zIdx
++];
2656 xorExtra
[i
- blockSizeLog2
] = y
[yIdx
++];
2663 UINT_32 pipeStart
= m_pipeInterleaveLog2
;
2664 UINT_32 pipeXorBits
= GetPipeXorBits(blockSizeLog2
);
2665 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2667 UINT_32 xor1BitPos
= pipeStart
+ (3 * pipeXorBits
) - 1 - (2 * i
);
2668 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2669 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2671 InitChannel(&pEquation
->xor1
[pipeStart
+ i
], pXor1Src
);
2673 UINT_32 xor2BitPos
= pipeStart
+ (3 * pipeXorBits
) - 2 - (2 * i
);
2674 ADDR_CHANNEL_SETTING
* pXor2Src
= (xor2BitPos
< blockSizeLog2
) ?
2675 &pEquation
->addr
[xor2BitPos
] : &xorExtra
[xor2BitPos
- blockSizeLog2
];
2677 InitChannel(&pEquation
->xor2
[pipeStart
+ i
], pXor2Src
);
2680 UINT_32 bankStart
= pipeStart
+ pipeXorBits
;
2681 UINT_32 bankXorBits
= GetBankXorBits(blockSizeLog2
);
2682 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2684 UINT_32 xor1BitPos
= bankStart
+ (3 * bankXorBits
) - 1 - (2 * i
);
2685 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2686 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2688 InitChannel(&pEquation
->xor1
[bankStart
+ i
], pXor1Src
);
2690 UINT_32 xor2BitPos
= bankStart
+ (3 * bankXorBits
) - 2 - (2 * i
);
2691 ADDR_CHANNEL_SETTING
* pXor2Src
= (xor2BitPos
< blockSizeLog2
) ?
2692 &pEquation
->addr
[xor2BitPos
] : &xorExtra
[xor2BitPos
- blockSizeLog2
];
2694 InitChannel(&pEquation
->xor2
[bankStart
+ i
], pXor2Src
);
2698 pEquation
->numBits
= blockSizeLog2
;
2705 ************************************************************************************************************************
2706 * Gfx9Lib::IsValidDisplaySwizzleMode
2709 * Check if a swizzle mode is supported by display engine
2712 * TRUE is swizzle mode is supported by display engine
2713 ************************************************************************************************************************
2715 BOOL_32
Gfx9Lib::IsValidDisplaySwizzleMode(
2716 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
2718 BOOL_32 support
= FALSE
;
2720 //const AddrResourceType resourceType = pIn->resourceType;
2721 const AddrSwizzleMode swizzleMode
= pIn
->swizzleMode
;
2723 if (m_settings
.isDce12
)
2725 switch (swizzleMode
)
2727 case ADDR_SW_256B_D
:
2728 case ADDR_SW_256B_R
:
2729 support
= (pIn
->bpp
== 32);
2732 case ADDR_SW_LINEAR
:
2735 case ADDR_SW_64KB_D
:
2736 case ADDR_SW_64KB_R
:
2739 case ADDR_SW_4KB_D_X
:
2740 case ADDR_SW_4KB_R_X
:
2741 case ADDR_SW_64KB_D_X
:
2742 case ADDR_SW_64KB_R_X
:
2743 case ADDR_SW_VAR_D_X
:
2744 case ADDR_SW_VAR_R_X
:
2745 support
= (pIn
->bpp
<= 64);
2752 else if (m_settings
.isDcn1
)
2754 switch (swizzleMode
)
2757 case ADDR_SW_64KB_D
:
2759 case ADDR_SW_64KB_D_T
:
2760 case ADDR_SW_4KB_D_X
:
2761 case ADDR_SW_64KB_D_X
:
2762 case ADDR_SW_VAR_D_X
:
2763 support
= (pIn
->bpp
== 64);
2766 case ADDR_SW_LINEAR
:
2768 case ADDR_SW_64KB_S
:
2770 case ADDR_SW_64KB_S_T
:
2771 case ADDR_SW_4KB_S_X
:
2772 case ADDR_SW_64KB_S_X
:
2773 case ADDR_SW_VAR_S_X
:
2774 support
= (pIn
->bpp
<= 64);
2783 ADDR_NOT_IMPLEMENTED();
2790 ************************************************************************************************************************
2791 * Gfx9Lib::HwlComputePipeBankXor
2794 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2798 ************************************************************************************************************************
2800 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputePipeBankXor(
2801 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT
* pIn
,
2802 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
* pOut
) const
2804 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
2805 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
2806 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
2808 UINT_32 pipeXor
= 0;
2809 UINT_32 bankXor
= 0;
2811 const UINT_32 bankMask
= (1 << bankBits
) - 1;
2812 const UINT_32 index
= pIn
->surfIndex
& bankMask
;
2814 const UINT_32 bpp
= pIn
->flags
.fmask
?
2815 GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
) : GetElemLib()->GetBitsPerPixel(pIn
->format
);
2818 static const UINT_32 BankXorSmallBpp
[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
2819 static const UINT_32 BankXorLargeBpp
[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
2821 bankXor
= (bpp
<= 32) ? BankXorSmallBpp
[index
] : BankXorLargeBpp
[index
];
2823 else if (bankBits
> 0)
2825 UINT_32 bankIncrease
= (1 << (bankBits
- 1)) - 1;
2826 bankIncrease
= (bankIncrease
== 0) ? 1 : bankIncrease
;
2827 bankXor
= (index
* bankIncrease
) & bankMask
;
2830 pOut
->pipeBankXor
= (bankXor
<< pipeBits
) | pipeXor
;
2836 ************************************************************************************************************************
2837 * Gfx9Lib::HwlComputeSlicePipeBankXor
2840 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2844 ************************************************************************************************************************
2846 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSlicePipeBankXor(
2847 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT
* pIn
,
2848 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
* pOut
) const
2850 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
2851 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
2852 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
2854 UINT_32 pipeXor
= ReverseBitVector(pIn
->slice
, pipeBits
);
2855 UINT_32 bankXor
= ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
);
2857 pOut
->pipeBankXor
= pIn
->basePipeBankXor
^ (pipeXor
| (bankXor
<< pipeBits
));
2863 ************************************************************************************************************************
2864 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2867 * Compute sub resource offset to support swizzle pattern
2871 ************************************************************************************************************************
2873 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2874 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
* pIn
,
2875 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
* pOut
) const
2877 ADDR_ASSERT(IsThin(pIn
->resourceType
, pIn
->swizzleMode
));
2879 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
2880 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
2881 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
2882 UINT_32 pipeXor
= ReverseBitVector(pIn
->slice
, pipeBits
);
2883 UINT_32 bankXor
= ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
);
2884 UINT_32 pipeBankXor
= ((pipeXor
| (bankXor
<< pipeBits
)) ^ (pIn
->pipeBankXor
)) << m_pipeInterleaveLog2
;
2886 pOut
->offset
= pIn
->slice
* pIn
->sliceSize
+
2887 pIn
->macroBlockOffset
+
2888 (pIn
->mipTailOffset
^ pipeBankXor
) -
2889 static_cast<UINT_64
>(pipeBankXor
);
2894 ************************************************************************************************************************
2895 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
2898 * Compute surface info sanity check
2902 ************************************************************************************************************************
2904 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
2905 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
2907 BOOL_32 invalid
= FALSE
;
2909 if ((pIn
->bpp
> 128) || (pIn
->width
== 0) || (pIn
->numFrags
> 8) || (pIn
->numSamples
> 16))
2913 else if ((pIn
->swizzleMode
>= ADDR_SW_MAX_TYPE
) ||
2914 (pIn
->resourceType
>= ADDR_RSRC_MAX_TYPE
))
2919 BOOL_32 mipmap
= (pIn
->numMipLevels
> 1);
2920 BOOL_32 msaa
= (pIn
->numFrags
> 1);
2922 ADDR2_SURFACE_FLAGS flags
= pIn
->flags
;
2923 BOOL_32 zbuffer
= (flags
.depth
|| flags
.stencil
);
2924 BOOL_32 color
= flags
.color
;
2925 BOOL_32 display
= flags
.display
|| flags
.rotated
;
2927 AddrResourceType rsrcType
= pIn
->resourceType
;
2928 BOOL_32 tex3d
= IsTex3d(rsrcType
);
2929 AddrSwizzleMode swizzle
= pIn
->swizzleMode
;
2930 BOOL_32 linear
= IsLinear(swizzle
);
2931 BOOL_32 blk256B
= IsBlock256b(swizzle
);
2932 BOOL_32 blkVar
= IsBlockVariable(swizzle
);
2933 BOOL_32 isNonPrtXor
= IsNonPrtXor(swizzle
);
2934 BOOL_32 prt
= flags
.prt
;
2935 BOOL_32 stereo
= flags
.qbStereo
;
2937 if (invalid
== FALSE
)
2939 if ((pIn
->numFrags
> 1) &&
2940 (GetBlockSize(swizzle
) < (m_pipeInterleaveBytes
* pIn
->numFrags
)))
2942 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2947 if (invalid
== FALSE
)
2951 case ADDR_RSRC_TEX_1D
:
2952 invalid
= msaa
|| zbuffer
|| display
|| (linear
== FALSE
) || stereo
;
2954 case ADDR_RSRC_TEX_2D
:
2955 invalid
= (msaa
&& mipmap
) || (stereo
&& msaa
) || (stereo
&& mipmap
);
2957 case ADDR_RSRC_TEX_3D
:
2958 invalid
= msaa
|| zbuffer
|| display
|| stereo
;
2966 if (invalid
== FALSE
)
2970 invalid
= (IsValidDisplaySwizzleMode(pIn
) == FALSE
);
2974 if (invalid
== FALSE
)
2978 invalid
= ((ADDR_RSRC_TEX_1D
!= rsrcType
) && prt
) ||
2979 zbuffer
|| msaa
|| (pIn
->bpp
== 0) || ((pIn
->bpp
% 8) != 0);
2983 if (blk256B
|| blkVar
|| isNonPrtXor
)
2988 invalid
= invalid
|| zbuffer
|| tex3d
|| mipmap
|| msaa
;
2992 if (invalid
== FALSE
)
2994 if (IsZOrderSwizzle(swizzle
))
2996 invalid
= color
&& msaa
;
2998 else if (IsStandardSwizzle(rsrcType
, swizzle
))
3002 else if (IsDisplaySwizzle(rsrcType
, swizzle
))
3006 else if (IsRotateSwizzle(swizzle
))
3008 invalid
= zbuffer
|| (pIn
->bpp
> 64) || tex3d
;
3012 ADDR_ASSERT(!"invalid swizzle mode");
3019 ADDR_ASSERT(invalid
== FALSE
);
3021 return invalid
? ADDR_INVALIDPARAMS
: ADDR_OK
;
3025 ************************************************************************************************************************
3026 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3029 * Internal function to get suggested surface information for cliet to use
3033 ************************************************************************************************************************
3035 ADDR_E_RETURNCODE
Gfx9Lib::HwlGetPreferredSurfaceSetting(
3036 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
* pIn
,
3037 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
* pOut
) const
3039 // Macro define resource block type
3042 AddrBlockMicro
= 0, // Resource uses 256B block
3043 AddrBlock4KB
= 1, // Resource uses 4KB block
3044 AddrBlock64KB
= 2, // Resource uses 64KB block
3045 AddrBlockVar
= 3, // Resource uses var blcok
3046 AddrBlockLinear
= 4, // Resource uses linear swizzle mode
3048 AddrBlockMaxTiledType
= AddrBlock64KB
+ 1,
3053 AddrBlockSetMicro
= 1 << AddrBlockMicro
,
3054 AddrBlockSetMacro4KB
= 1 << AddrBlock4KB
,
3055 AddrBlockSetMacro64KB
= 1 << AddrBlock64KB
,
3056 AddrBlockSetVar
= 1 << AddrBlockVar
,
3057 AddrBlockSetLinear
= 1 << AddrBlockLinear
,
3059 AddrBlockSetMacro
= AddrBlockSetMacro4KB
| AddrBlockSetMacro64KB
,
3062 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
3063 ElemLib
* pElemLib
= GetElemLib();
3065 // Set format to INVALID will skip this conversion
3066 UINT_32 expandX
= 1;
3067 UINT_32 expandY
= 1;
3068 UINT_32 bpp
= pIn
->bpp
;
3069 UINT_32 width
= pIn
->width
;
3070 UINT_32 height
= pIn
->height
;
3072 if (pIn
->format
!= ADDR_FMT_INVALID
)
3074 // Don't care for this case
3075 ElemMode elemMode
= ADDR_UNCOMPRESSED
;
3077 // Get compression/expansion factors and element mode which indicates compression/expansion
3078 bpp
= pElemLib
->GetBitsPerPixel(pIn
->format
,
3083 UINT_32 basePitch
= 0;
3084 GetElemLib()->AdjustSurfaceInfo(elemMode
,
3093 UINT_32 numSamples
= Max(pIn
->numSamples
, 1u);
3094 UINT_32 numFrags
= (pIn
->numFrags
== 0) ? numSamples
: pIn
->numFrags
;
3095 UINT_32 slice
= Max(pIn
->numSlices
, 1u);
3096 UINT_32 numMipLevels
= Max(pIn
->numMipLevels
, 1u);
3097 UINT_32 minSizeAlign
= NextPow2(pIn
->minSizeAlign
);
3099 if (pIn
->flags
.fmask
)
3101 bpp
= GetFmaskBpp(numSamples
, numFrags
);
3104 pOut
->resourceType
= ADDR_RSRC_TEX_2D
;
3108 // The output may get changed for volume(3D) texture resource in future
3109 pOut
->resourceType
= pIn
->resourceType
;
3112 ADDR_ASSERT(bpp
>= 8u);
3113 UINT_64 minSizeAlignInElement
= Max(minSizeAlign
/ (bpp
>> 3), 1u);
3115 if (IsTex1d(pOut
->resourceType
))
3117 pOut
->swizzleMode
= ADDR_SW_LINEAR
;
3118 pOut
->validBlockSet
.value
= AddrBlockSetLinear
;
3119 pOut
->canXor
= FALSE
;
3123 ADDR2_BLOCK_SET blockSet
;
3126 AddrSwType swType
= ADDR_SW_S
;
3128 // prt Xor and non-xor will have less height align requirement for stereo surface
3129 BOOL_32 prtXor
= (pIn
->flags
.prt
|| pIn
->flags
.qbStereo
) && (pIn
->noXor
== FALSE
);
3130 BOOL_32 displayResource
= FALSE
;
3132 pOut
->canXor
= (pIn
->flags
.prt
== FALSE
) && (pIn
->noXor
== FALSE
);
3134 // Filter out improper swType and blockSet by HW restriction
3135 if (pIn
->flags
.fmask
|| pIn
->flags
.depth
|| pIn
->flags
.stencil
)
3137 ADDR_ASSERT(IsTex2d(pOut
->resourceType
));
3138 blockSet
.value
= AddrBlockSetMacro
;
3141 if (pIn
->flags
.depth
&& pIn
->flags
.texture
)
3143 if (((bpp
== 16) && (numFrags
>= 4)) ||
3144 ((bpp
== 32) && (numFrags
>= 2)))
3146 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3147 // equation from wrong address within memory range a tile covered and use the
3148 // garbage data for compressed Z reading which finally leads to corruption.
3149 pOut
->canXor
= FALSE
;
3154 else if (ElemLib::IsBlockCompressed(pIn
->format
))
3156 // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes. Not sure
3157 // under what circumstances "_D" would be appropriate as these formats are not
3159 blockSet
.value
= AddrBlockSetMacro
;
3161 // This isn't to be used as texture and caller doesn't allow macro tiled.
3162 if ((pIn
->flags
.texture
== FALSE
) &&
3163 (pIn
->forbiddenBlock
.macro4KB
&& pIn
->forbiddenBlock
.macro64KB
))
3165 blockSet
.value
|= AddrBlockSetLinear
;
3169 else if (ElemLib::IsMacroPixelPacked(pIn
->format
))
3171 // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes. Its not
3172 // clear under what circumstances the D or R modes would be appropriate since
3173 // these formats are not displayable.
3174 blockSet
.value
= AddrBlockSetLinear
| AddrBlockSetMacro
;
3177 else if (IsTex3d(pOut
->resourceType
))
3179 blockSet
.value
= AddrBlockSetLinear
| AddrBlockSetMacro
;
3183 // PRT cannot use SW_D which gives an unexpected block dimension
3186 else if ((numMipLevels
> 1) && (slice
>= width
) && (slice
>= height
))
3188 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3189 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3192 else if (pIn
->flags
.color
)
3203 swType
= ((pIn
->flags
.display
== TRUE
) ||
3204 (pIn
->flags
.overlay
== TRUE
) ||
3205 (pIn
->bpp
== 128)) ? ADDR_SW_D
: ADDR_SW_S
;
3207 if (numMipLevels
> 1)
3209 ADDR_ASSERT(numFrags
== 1);
3210 blockSet
.value
= AddrBlockSetLinear
| AddrBlockSetMacro
;
3212 else if ((numFrags
> 1) || (numSamples
> 1))
3214 ADDR_ASSERT(IsTex2d(pOut
->resourceType
));
3215 blockSet
.value
= AddrBlockSetMacro
;
3219 ADDR_ASSERT(IsTex2d(pOut
->resourceType
));
3220 blockSet
.value
= AddrBlockSetLinear
| AddrBlockSetMicro
| AddrBlockSetMacro
;
3222 displayResource
= pIn
->flags
.rotated
|| pIn
->flags
.display
;
3224 if (displayResource
)
3226 swType
= pIn
->flags
.rotated
? ADDR_SW_R
: ADDR_SW_D
;
3232 else if (m_settings
.isDce12
)
3236 blockSet
.micro
= FALSE
;
3239 // DCE12 does not support display surface to be _T swizzle mode
3242 else if (m_settings
.isDcn1
)
3244 // _R is not supported by Dcn1
3254 blockSet
.micro
= FALSE
;
3258 ADDR_NOT_IMPLEMENTED();
3259 returnCode
= ADDR_NOTSUPPORTED
;
3265 if ((numFrags
> 1) &&
3266 (GetBlockSize(ADDR_SW_4KB
) < (m_pipeInterleaveBytes
* numFrags
)))
3268 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3269 blockSet
.macro4KB
= FALSE
;
3274 blockSet
.value
&= AddrBlockSetMacro64KB
;
3277 // Apply customized forbidden setting
3278 blockSet
.value
&= ~pIn
->forbiddenBlock
.value
;
3280 if (pIn
->maxAlign
> 0)
3282 if (pIn
->maxAlign
< GetBlockSize(ADDR_SW_64KB
))
3284 blockSet
.macro64KB
= FALSE
;
3287 if (pIn
->maxAlign
< GetBlockSize(ADDR_SW_4KB
))
3289 blockSet
.macro4KB
= FALSE
;
3292 if (pIn
->maxAlign
< GetBlockSize(ADDR_SW_256B
))
3294 blockSet
.micro
= FALSE
;
3298 Dim3d blkAlign
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}};
3299 Dim3d paddedDim
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}};
3300 UINT_64 padSize
[AddrBlockMaxTiledType
] = {0};
3304 returnCode
= ComputeBlockDimensionForSurf(&blkAlign
[AddrBlockMicro
].w
,
3305 &blkAlign
[AddrBlockMicro
].h
,
3306 &blkAlign
[AddrBlockMicro
].d
,
3312 if (returnCode
== ADDR_OK
)
3314 if (displayResource
)
3316 blkAlign
[AddrBlockMicro
].w
= PowTwoAlign(blkAlign
[AddrBlockMicro
].w
, 32);
3318 else if ((blkAlign
[AddrBlockMicro
].w
>= width
) && (blkAlign
[AddrBlockMicro
].h
>= height
) &&
3319 (minSizeAlign
<= GetBlockSize(ADDR_SW_256B
)))
3321 // If one 256B block can contain the surface, don't bother bigger block type
3322 blockSet
.macro4KB
= FALSE
;
3323 blockSet
.macro64KB
= FALSE
;
3324 blockSet
.var
= FALSE
;
3327 padSize
[AddrBlockMicro
] = ComputePadSize(&blkAlign
[AddrBlockMicro
], width
, height
,
3328 slice
, &paddedDim
[AddrBlockMicro
]);
3332 if ((returnCode
== ADDR_OK
) && blockSet
.macro4KB
)
3334 returnCode
= ComputeBlockDimensionForSurf(&blkAlign
[AddrBlock4KB
].w
,
3335 &blkAlign
[AddrBlock4KB
].h
,
3336 &blkAlign
[AddrBlock4KB
].d
,
3342 if (returnCode
== ADDR_OK
)
3344 if (displayResource
)
3346 blkAlign
[AddrBlock4KB
].w
= PowTwoAlign(blkAlign
[AddrBlock4KB
].w
, 32);
3349 padSize
[AddrBlock4KB
] = ComputePadSize(&blkAlign
[AddrBlock4KB
], width
, height
,
3350 slice
, &paddedDim
[AddrBlock4KB
]);
3352 ADDR_ASSERT(padSize
[AddrBlock4KB
] >= padSize
[AddrBlockMicro
]);
3356 if ((returnCode
== ADDR_OK
) && blockSet
.macro64KB
)
3358 returnCode
= ComputeBlockDimensionForSurf(&blkAlign
[AddrBlock64KB
].w
,
3359 &blkAlign
[AddrBlock64KB
].h
,
3360 &blkAlign
[AddrBlock64KB
].d
,
3366 if (returnCode
== ADDR_OK
)
3368 if (displayResource
)
3370 blkAlign
[AddrBlock64KB
].w
= PowTwoAlign(blkAlign
[AddrBlock64KB
].w
, 32);
3373 padSize
[AddrBlock64KB
] = ComputePadSize(&blkAlign
[AddrBlock64KB
], width
, height
,
3374 slice
, &paddedDim
[AddrBlock64KB
]);
3376 ADDR_ASSERT(padSize
[AddrBlock64KB
] >= padSize
[AddrBlock4KB
]);
3377 ADDR_ASSERT(padSize
[AddrBlock64KB
] >= padSize
[AddrBlockMicro
]);
3381 if (returnCode
== ADDR_OK
)
3383 for (UINT_32 i
= AddrBlockMicro
; i
< AddrBlockMaxTiledType
; i
++)
3385 padSize
[i
] = PowTwoAlign(padSize
[i
], minSizeAlignInElement
);
3388 // Use minimum block type which meets all conditions above if flag minimizeAlign was set
3389 if (pIn
->flags
.minimizeAlign
)
3391 // If padded size of 64KB block is larger than padded size of 256B block or 4KB
3392 // block, filter out 64KB block from candidate list
3393 if (blockSet
.macro64KB
&&
3394 ((blockSet
.micro
&& (padSize
[AddrBlockMicro
] < padSize
[AddrBlock64KB
])) ||
3395 (blockSet
.macro4KB
&& (padSize
[AddrBlock4KB
] < padSize
[AddrBlock64KB
]))))
3397 blockSet
.macro64KB
= FALSE
;
3400 // If padded size of 4KB block is larger than padded size of 256B block,
3401 // filter out 4KB block from candidate list
3402 if (blockSet
.macro4KB
&&
3404 (padSize
[AddrBlockMicro
] < padSize
[AddrBlock4KB
]))
3406 blockSet
.macro4KB
= FALSE
;
3409 // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
3410 else if (pIn
->flags
.opt4space
)
3412 UINT_64 threshold
= blockSet
.micro
? padSize
[AddrBlockMicro
] :
3413 (blockSet
.macro4KB
? padSize
[AddrBlock4KB
] : padSize
[AddrBlock64KB
]);
3415 threshold
+= threshold
>> 1;
3417 if (blockSet
.macro64KB
&& (padSize
[AddrBlock64KB
] > threshold
))
3419 blockSet
.macro64KB
= FALSE
;
3422 if (blockSet
.macro4KB
&& (padSize
[AddrBlock4KB
] > threshold
))
3424 blockSet
.macro4KB
= FALSE
;
3429 if (blockSet
.macro64KB
&&
3430 (padSize
[AddrBlock64KB
] >= static_cast<UINT_64
>(width
) * height
* slice
* 2) &&
3431 ((blockSet
.value
& ~AddrBlockSetMacro64KB
) != 0))
3433 // If 64KB block waste more than half memory on padding, filter it out from
3434 // candidate list when it is not the only choice left
3435 blockSet
.macro64KB
= FALSE
;
3439 if (blockSet
.value
== 0)
3441 // Bad things happen, client will not get any useful information from AddrLib.
3442 // Maybe we should fill in some output earlier instead of outputing nothing?
3443 ADDR_ASSERT_ALWAYS();
3444 returnCode
= ADDR_INVALIDPARAMS
;
3448 pOut
->validBlockSet
= blockSet
;
3449 pOut
->canXor
= pOut
->canXor
&&
3450 (blockSet
.macro4KB
|| blockSet
.macro64KB
|| blockSet
.var
);
3452 if (blockSet
.macro64KB
|| blockSet
.macro4KB
)
3454 if (swType
== ADDR_SW_Z
)
3456 pOut
->swizzleMode
= blockSet
.macro64KB
? ADDR_SW_64KB_Z
: ADDR_SW_4KB_Z
;
3458 else if (swType
== ADDR_SW_S
)
3460 pOut
->swizzleMode
= blockSet
.macro64KB
? ADDR_SW_64KB_S
: ADDR_SW_4KB_S
;
3462 else if (swType
== ADDR_SW_D
)
3464 pOut
->swizzleMode
= blockSet
.macro64KB
? ADDR_SW_64KB_D
: ADDR_SW_4KB_D
;
3468 ADDR_ASSERT(swType
== ADDR_SW_R
);
3469 pOut
->swizzleMode
= blockSet
.macro64KB
? ADDR_SW_64KB_R
: ADDR_SW_4KB_R
;
3472 if (prtXor
&& blockSet
.macro64KB
)
3474 // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
3475 const UINT_32 prtGap
= ADDR_SW_64KB_Z_T
- ADDR_SW_64KB_Z
;
3476 pOut
->swizzleMode
= static_cast<AddrSwizzleMode
>(pOut
->swizzleMode
+ prtGap
);
3478 else if (pOut
->canXor
)
3480 // Client wants XOR and this is allowed, return XOR version swizzle mode
3481 const UINT_32 xorGap
= ADDR_SW_4KB_Z_X
- ADDR_SW_4KB_Z
;
3482 pOut
->swizzleMode
= static_cast<AddrSwizzleMode
>(pOut
->swizzleMode
+ xorGap
);
3485 else if (blockSet
.micro
)
3487 if (swType
== ADDR_SW_S
)
3489 pOut
->swizzleMode
= ADDR_SW_256B_S
;
3491 else if (swType
== ADDR_SW_D
)
3493 pOut
->swizzleMode
= ADDR_SW_256B_D
;
3497 ADDR_ASSERT(swType
== ADDR_SW_R
);
3498 pOut
->swizzleMode
= ADDR_SW_256B_R
;
3501 else if (blockSet
.linear
)
3503 // Fall into this branch doesn't mean linear is suitable, only no other choices!
3504 pOut
->swizzleMode
= ADDR_SW_LINEAR
;
3508 ADDR_ASSERT(blockSet
.var
);
3510 // Designer consider VAR swizzle mode is usless for most cases
3511 ADDR_UNHANDLED_CASE();
3513 returnCode
= ADDR_NOTSUPPORTED
;
3517 // Post sanity check, at least AddrLib should accept the output generated by its own
3518 if (pOut
->swizzleMode
!= ADDR_SW_LINEAR
)
3520 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {0};
3521 localIn
.flags
= pIn
->flags
;
3522 localIn
.swizzleMode
= pOut
->swizzleMode
;
3523 localIn
.resourceType
= pOut
->resourceType
;
3524 localIn
.format
= pIn
->format
;
3526 localIn
.width
= width
;
3527 localIn
.height
= height
;
3528 localIn
.numSlices
= slice
;
3529 localIn
.numMipLevels
= numMipLevels
;
3530 localIn
.numSamples
= numSamples
;
3531 localIn
.numFrags
= numFrags
;
3533 HwlComputeSurfaceInfoSanityCheck(&localIn
);
3535 // TODO : check all valid block type available in validBlockSet?
3546 ************************************************************************************************************************
3547 * Gfx9Lib::ComputeStereoInfo
3550 * Compute height alignment and right eye pipeBankXor for stereo surface
3555 ************************************************************************************************************************
3557 ADDR_E_RETURNCODE
Gfx9Lib::ComputeStereoInfo(
3558 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
,
3559 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
,
3560 UINT_32
* pHeightAlign
3563 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
3565 UINT_32 eqIndex
= HwlGetEquationIndex(pIn
, pOut
);
3567 if (eqIndex
< m_numEquations
)
3569 if (IsXor(pIn
->swizzleMode
))
3571 const UINT_32 blkSizeLog2
= GetBlockSizeLog2(pIn
->swizzleMode
);
3572 const UINT_32 numPipeBits
= GetPipeXorBits(blkSizeLog2
);
3573 const UINT_32 numBankBits
= GetBankXorBits(blkSizeLog2
);
3574 const UINT_32 bppLog2
= Log2(pIn
->bpp
>> 3);
3575 const UINT_32 maxYCoordBlock256
= Log2(Block256_2d
[bppLog2
].h
) - 1;
3576 const ADDR_EQUATION
*pEqToCheck
= &m_equationTable
[eqIndex
];
3578 ADDR_ASSERT(maxYCoordBlock256
==
3579 GetMaxValidChannelIndex(&pEqToCheck
->addr
[0], GetBlockSizeLog2(ADDR_SW_256B
), 1));
3581 const UINT_32 maxYCoordInBaseEquation
=
3582 (blkSizeLog2
- GetBlockSizeLog2(ADDR_SW_256B
)) / 2 + maxYCoordBlock256
;
3584 ADDR_ASSERT(maxYCoordInBaseEquation
==
3585 GetMaxValidChannelIndex(&pEqToCheck
->addr
[0], blkSizeLog2
, 1));
3587 const UINT_32 maxYCoordInPipeXor
= (numPipeBits
== 0) ? 0 : maxYCoordBlock256
+ numPipeBits
;
3589 ADDR_ASSERT(maxYCoordInPipeXor
==
3590 GetMaxValidChannelIndex(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
], numPipeBits
, 1));
3592 const UINT_32 maxYCoordInBankXor
= (numBankBits
== 0) ?
3593 0 : maxYCoordBlock256
+ (numPipeBits
+ 1) / 2 + numBankBits
;
3595 ADDR_ASSERT(maxYCoordInBankXor
==
3596 GetMaxValidChannelIndex(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
+ numPipeBits
], numBankBits
, 1));
3598 const UINT_32 maxYCoordInPipeBankXor
= Max(maxYCoordInPipeXor
, maxYCoordInBankXor
);
3600 if (maxYCoordInPipeBankXor
> maxYCoordInBaseEquation
)
3602 *pHeightAlign
= 1u << maxYCoordInPipeBankXor
;
3604 if (pOut
->pStereoInfo
!= NULL
)
3606 pOut
->pStereoInfo
->rightSwizzle
= 0;
3608 if ((PowTwoAlign(pIn
->height
, *pHeightAlign
) % (*pHeightAlign
* 2)) != 0)
3610 if (maxYCoordInPipeXor
== maxYCoordInPipeBankXor
)
3612 pOut
->pStereoInfo
->rightSwizzle
|= (1u << 1);
3615 if (maxYCoordInBankXor
== maxYCoordInPipeBankXor
)
3617 pOut
->pStereoInfo
->rightSwizzle
|=
3618 1u << ((numPipeBits
% 2) ? numPipeBits
: numPipeBits
+ 1);
3621 ADDR_ASSERT(pOut
->pStereoInfo
->rightSwizzle
==
3622 GetCoordActiveMask(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
],
3623 numPipeBits
+ numBankBits
, 1, maxYCoordInPipeBankXor
));
3631 ADDR_ASSERT_ALWAYS();
3632 returnCode
= ADDR_ERROR
;
3639 ************************************************************************************************************************
3640 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3643 * Internal function to calculate alignment for tiled surface
3647 ************************************************************************************************************************
3649 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceInfoTiled(
3650 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
3651 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
3654 ADDR_E_RETURNCODE returnCode
= ComputeBlockDimensionForSurf(&pOut
->blockWidth
,
3662 if (returnCode
== ADDR_OK
)
3664 UINT_32 pitchAlignInElement
= pOut
->blockWidth
;
3666 if ((IsTex2d(pIn
->resourceType
) == TRUE
) &&
3667 (pIn
->flags
.display
|| pIn
->flags
.rotated
) &&
3668 (pIn
->numMipLevels
<= 1) &&
3669 (pIn
->numSamples
<= 1) &&
3670 (pIn
->numFrags
<= 1))
3672 // Display engine needs pitch align to be at least 32 pixels.
3673 pitchAlignInElement
= PowTwoAlign(pitchAlignInElement
, 32);
3676 pOut
->pitch
= PowTwoAlign(pIn
->width
, pitchAlignInElement
);
3678 if ((pIn
->numMipLevels
<= 1) && (pIn
->pitchInElement
> 0))
3680 if ((pIn
->pitchInElement
% pitchAlignInElement
) != 0)
3682 returnCode
= ADDR_INVALIDPARAMS
;
3684 else if (pIn
->pitchInElement
< pOut
->pitch
)
3686 returnCode
= ADDR_INVALIDPARAMS
;
3690 pOut
->pitch
= pIn
->pitchInElement
;
3694 UINT_32 heightAlign
= 0;
3696 if (pIn
->flags
.qbStereo
)
3698 returnCode
= ComputeStereoInfo(pIn
, pOut
, &heightAlign
);
3701 if (returnCode
== ADDR_OK
)
3703 pOut
->height
= PowTwoAlign(pIn
->height
, pOut
->blockHeight
);
3705 if (heightAlign
> 1)
3707 pOut
->height
= PowTwoAlign(pOut
->height
, heightAlign
);
3710 pOut
->numSlices
= PowTwoAlign(pIn
->numSlices
, pOut
->blockSlices
);
3712 pOut
->epitchIsHeight
= FALSE
;
3713 pOut
->mipChainInTail
= FALSE
;
3715 pOut
->mipChainPitch
= pOut
->pitch
;
3716 pOut
->mipChainHeight
= pOut
->height
;
3717 pOut
->mipChainSlice
= pOut
->numSlices
;
3719 if (pIn
->numMipLevels
> 1)
3721 UINT_32 numMipLevel
;
3722 ADDR2_MIP_INFO
*pMipInfo
;
3723 ADDR2_MIP_INFO mipInfo
[4];
3725 if (pOut
->pMipInfo
!= NULL
)
3727 pMipInfo
= pOut
->pMipInfo
;
3728 numMipLevel
= pIn
->numMipLevels
;
3733 numMipLevel
= Min(pIn
->numMipLevels
, 4u);
3736 UINT_32 endingMip
= GetMipChainInfo(pIn
->resourceType
,
3750 pOut
->epitchIsHeight
= TRUE
;
3751 pOut
->pitch
= pMipInfo
[0].pitch
;
3752 pOut
->height
= pMipInfo
[0].height
;
3753 pOut
->numSlices
= pMipInfo
[0].depth
;
3754 pOut
->mipChainInTail
= TRUE
;
3758 UINT_32 mip0WidthInBlk
= pOut
->pitch
/ pOut
->blockWidth
;
3759 UINT_32 mip0HeightInBlk
= pOut
->height
/ pOut
->blockHeight
;
3761 AddrMajorMode majorMode
= GetMajorMode(pIn
->resourceType
,
3765 pOut
->numSlices
/ pOut
->blockSlices
);
3766 if (majorMode
== ADDR_MAJOR_Y
)
3768 UINT_32 mip1WidthInBlk
= RoundHalf(mip0WidthInBlk
);
3770 if ((mip1WidthInBlk
== 1) && (endingMip
> 2))
3775 pOut
->mipChainPitch
+= (mip1WidthInBlk
* pOut
->blockWidth
);
3777 pOut
->epitchIsHeight
= FALSE
;
3781 UINT_32 mip1HeightInBlk
= RoundHalf(mip0HeightInBlk
);
3783 if ((mip1HeightInBlk
== 1) && (endingMip
> 2))
3788 pOut
->mipChainHeight
+= (mip1HeightInBlk
* pOut
->blockHeight
);
3790 pOut
->epitchIsHeight
= TRUE
;
3794 if (pOut
->pMipInfo
!= NULL
)
3796 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
3798 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
3800 Dim3d mipStartPos
= {0};
3801 UINT_32 mipTailOffsetInBytes
= 0;
3803 mipStartPos
= GetMipStartPos(pIn
->resourceType
,
3813 &mipTailOffsetInBytes
);
3815 UINT_32 pitchInBlock
=
3816 pOut
->mipChainPitch
/ pOut
->blockWidth
;
3817 UINT_32 sliceInBlock
=
3818 (pOut
->mipChainHeight
/ pOut
->blockHeight
) * pitchInBlock
;
3819 UINT_64 blockIndex
=
3820 mipStartPos
.d
* sliceInBlock
+ mipStartPos
.h
* pitchInBlock
+ mipStartPos
.w
;
3821 UINT_64 macroBlockOffset
=
3822 blockIndex
<< GetBlockSizeLog2(pIn
->swizzleMode
);
3824 pMipInfo
[i
].macroBlockOffset
= macroBlockOffset
;
3825 pMipInfo
[i
].mipTailOffset
= mipTailOffsetInBytes
;
3829 else if (pOut
->pMipInfo
!= NULL
)
3831 pOut
->pMipInfo
[0].pitch
= pOut
->pitch
;
3832 pOut
->pMipInfo
[0].height
= pOut
->height
;
3833 pOut
->pMipInfo
[0].depth
= IsTex3d(pIn
->resourceType
)? pOut
->numSlices
: 1;
3834 pOut
->pMipInfo
[0].offset
= 0;
3837 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->mipChainPitch
) * pOut
->mipChainHeight
*
3838 (pIn
->bpp
>> 3) * pIn
->numFrags
;
3839 pOut
->surfSize
= pOut
->sliceSize
* pOut
->mipChainSlice
;
3840 pOut
->baseAlign
= HwlComputeSurfaceBaseAlign(pIn
->swizzleMode
);
3844 pOut
->baseAlign
= Max(pOut
->baseAlign
, PrtAlignment
);
3853 ************************************************************************************************************************
3854 * Gfx9Lib::GetMipChainInfo
3857 * Internal function to get out information about mip chain
3860 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
3861 ************************************************************************************************************************
3863 UINT_32
Gfx9Lib::GetMipChainInfo(
3864 AddrResourceType resourceType
,
3865 AddrSwizzleMode swizzleMode
,
3871 UINT_32 blockHeight
,
3873 UINT_32 numMipLevel
,
3874 ADDR2_MIP_INFO
* pMipInfo
) const
3876 const Dim3d tailMaxDim
=
3877 GetMipTailDim(resourceType
, swizzleMode
, blockWidth
, blockHeight
, blockDepth
);
3879 UINT_32 mipPitch
= mip0Width
;
3880 UINT_32 mipHeight
= mip0Height
;
3881 UINT_32 mipDepth
= IsTex3d(resourceType
) ? mip0Depth
: 1;
3883 UINT_32 endingMip
= numMipLevel
- 1;
3884 BOOL_32 inTail
= FALSE
;
3885 BOOL_32 finalDim
= FALSE
;
3887 BOOL_32 is3dThick
= IsThick(resourceType
, swizzleMode
);
3888 BOOL_32 is3dThin
= IsTex3d(resourceType
) && (is3dThick
== FALSE
);
3890 for (UINT_32 mipId
= 0; mipId
< numMipLevel
; mipId
++)
3894 if (finalDim
== FALSE
)
3900 mipSize
= mipPitch
* mipHeight
* mipDepth
* (bpp
>> 3);
3904 mipSize
= mipPitch
* mipHeight
* (bpp
>> 3);
3909 UINT_32 index
= Log2(bpp
>> 3);
3913 mipPitch
= Block256_3dZ
[index
].w
;
3914 mipHeight
= Block256_3dZ
[index
].h
;
3915 mipDepth
= Block256_3dZ
[index
].d
;
3919 mipPitch
= Block256_2d
[index
].w
;
3920 mipHeight
= Block256_2d
[index
].h
;
3929 inTail
= IsInMipTail(resourceType
, swizzleMode
, tailMaxDim
,
3930 mipPitch
, mipHeight
, mipDepth
);
3936 mipPitch
= tailMaxDim
.w
;
3937 mipHeight
= tailMaxDim
.h
;
3941 mipDepth
= tailMaxDim
.d
;
3946 mipPitch
= PowTwoAlign(mipPitch
, blockWidth
);
3947 mipHeight
= PowTwoAlign(mipHeight
, blockHeight
);
3951 mipDepth
= PowTwoAlign(mipDepth
, blockDepth
);
3956 pMipInfo
[mipId
].pitch
= mipPitch
;
3957 pMipInfo
[mipId
].height
= mipHeight
;
3958 pMipInfo
[mipId
].depth
= mipDepth
;
3959 pMipInfo
[mipId
].offset
= offset
;
3960 offset
+= (mipPitch
* mipHeight
* mipDepth
* (bpp
>> 3));
3966 mipDepth
= Max(mipDepth
>> 1, 1u);
3971 mipPitch
= Max(mipPitch
>> 1, 1u);
3972 mipHeight
= Max(mipHeight
>> 1, 1u);
3974 if (is3dThick
|| is3dThin
)
3976 mipDepth
= Max(mipDepth
>> 1, 1u);
3985 ************************************************************************************************************************
3986 * Gfx9Lib::GetMetaMiptailInfo
3989 * Get mip tail coordinate information.
3993 ************************************************************************************************************************
3995 VOID
Gfx9Lib::GetMetaMiptailInfo(
3996 ADDR2_META_MIP_INFO
* pInfo
, ///< [out] output structure to store per mip coord
3997 Dim3d mipCoord
, ///< [in] mip tail base coord
3998 UINT_32 numMipInTail
, ///< [in] number of mips in tail
3999 Dim3d
* pMetaBlkDim
///< [in] meta block width/height/depth
4002 BOOL_32 isThick
= (pMetaBlkDim
->d
> 1);
4003 UINT_32 mipWidth
= pMetaBlkDim
->w
;
4004 UINT_32 mipHeight
= pMetaBlkDim
->h
>> 1;
4005 UINT_32 mipDepth
= pMetaBlkDim
->d
;
4010 minInc
= (pMetaBlkDim
->h
>= 512) ? 128 : ((pMetaBlkDim
->h
== 256) ? 64 : 32);
4012 else if (pMetaBlkDim
->h
>= 1024)
4016 else if (pMetaBlkDim
->h
== 512)
4025 UINT_32 blk32MipId
= 0xFFFFFFFF;
4027 for (UINT_32 mip
= 0; mip
< numMipInTail
; mip
++)
4029 pInfo
[mip
].inMiptail
= TRUE
;
4030 pInfo
[mip
].startX
= mipCoord
.w
;
4031 pInfo
[mip
].startY
= mipCoord
.h
;
4032 pInfo
[mip
].startZ
= mipCoord
.d
;
4033 pInfo
[mip
].width
= mipWidth
;
4034 pInfo
[mip
].height
= mipHeight
;
4035 pInfo
[mip
].depth
= mipDepth
;
4039 if (blk32MipId
== 0xFFFFFFFF)
4044 mipCoord
.w
= pInfo
[blk32MipId
].startX
;
4045 mipCoord
.h
= pInfo
[blk32MipId
].startY
;
4046 mipCoord
.d
= pInfo
[blk32MipId
].startZ
;
4048 switch (mip
- blk32MipId
)
4051 mipCoord
.w
+= 32; // 16x16
4054 mipCoord
.h
+= 32; // 8x8
4057 mipCoord
.h
+= 32; // 4x4
4061 mipCoord
.h
+= 32; // 2x2
4065 mipCoord
.h
+= 32; // 1x1
4068 // The following are for BC/ASTC formats
4070 mipCoord
.h
+= 48; // 1/2 x 1/2
4073 mipCoord
.h
+= 48; // 1/4 x 1/4
4077 mipCoord
.h
+= 48; // 1/8 x 1/8
4081 mipCoord
.h
+= 48; // 1/16 x 1/16
4085 ADDR_ASSERT_ALWAYS();
4089 mipWidth
= ((mip
- blk32MipId
) == 0) ? 16 : 8;
4090 mipHeight
= mipWidth
;
4094 mipDepth
= mipWidth
;
4099 if (mipWidth
<= minInc
)
4101 // if we're below the minimal increment...
4104 // For 3d, just go in z direction
4105 mipCoord
.d
+= mipDepth
;
4109 // For 2d, first go across, then down
4110 if ((mipWidth
* 2) == minInc
)
4112 // if we're 2 mips below, that's when we go back in x, and down in y
4113 mipCoord
.w
-= minInc
;
4114 mipCoord
.h
+= minInc
;
4118 // otherwise, just go across in x
4119 mipCoord
.w
+= minInc
;
4125 // On even mip, go down, otherwise, go across
4128 mipCoord
.w
+= mipWidth
;
4132 mipCoord
.h
+= mipHeight
;
4135 // Divide the width by 2
4137 // After the first mip in tail, the mip is always a square
4138 mipHeight
= mipWidth
;
4139 // ...or for 3d, a cube
4142 mipDepth
= mipWidth
;
4149 ************************************************************************************************************************
4150 * Gfx9Lib::GetMipStartPos
4153 * Internal function to get out information about mip logical start position
4156 * logical start position in macro block width/heith/depth of one mip level within one slice
4157 ************************************************************************************************************************
4159 Dim3d
Gfx9Lib::GetMipStartPos(
4160 AddrResourceType resourceType
,
4161 AddrSwizzleMode swizzleMode
,
4166 UINT_32 blockHeight
,
4169 UINT_32 log2ElementBytes
,
4170 UINT_32
* pMipTailBytesOffset
) const
4172 Dim3d mipStartPos
= {0};
4173 const Dim3d tailMaxDim
= GetMipTailDim(resourceType
, swizzleMode
, blockWidth
, blockHeight
, blockDepth
);
4175 // Report mip in tail if Mip0 is already in mip tail
4176 BOOL_32 inMipTail
= IsInMipTail(resourceType
, swizzleMode
, tailMaxDim
, width
, height
, depth
);
4177 UINT_32 log2blkSize
= GetBlockSizeLog2(swizzleMode
);
4178 UINT_32 mipIndexInTail
= mipId
;
4180 if (inMipTail
== FALSE
)
4182 // Mip 0 dimension, unit in block
4183 UINT_32 mipWidthInBlk
= width
/ blockWidth
;
4184 UINT_32 mipHeightInBlk
= height
/ blockHeight
;
4185 UINT_32 mipDepthInBlk
= depth
/ blockDepth
;
4186 AddrMajorMode majorMode
= GetMajorMode(resourceType
,
4192 UINT_32 endingMip
= mipId
+ 1;
4194 for (UINT_32 i
= 1; i
<= mipId
; i
++)
4196 if ((i
== 1) || (i
== 3))
4198 if (majorMode
== ADDR_MAJOR_Y
)
4200 mipStartPos
.w
+= mipWidthInBlk
;
4204 mipStartPos
.h
+= mipHeightInBlk
;
4209 if (majorMode
== ADDR_MAJOR_X
)
4211 mipStartPos
.w
+= mipWidthInBlk
;
4213 else if (majorMode
== ADDR_MAJOR_Y
)
4215 mipStartPos
.h
+= mipHeightInBlk
;
4219 mipStartPos
.d
+= mipDepthInBlk
;
4223 BOOL_32 inTail
= FALSE
;
4225 if (IsThick(resourceType
, swizzleMode
))
4227 UINT_32 dim
= log2blkSize
% 3;
4232 (mipWidthInBlk
<= 2) && (mipHeightInBlk
== 1) && (mipDepthInBlk
<= 2);
4237 (mipWidthInBlk
== 1) && (mipHeightInBlk
<= 2) && (mipDepthInBlk
<= 2);
4242 (mipWidthInBlk
<= 2) && (mipHeightInBlk
<= 2) && (mipDepthInBlk
== 1);
4247 if (log2blkSize
& 1)
4249 inTail
= (mipWidthInBlk
<= 2) && (mipHeightInBlk
== 1);
4253 inTail
= (mipWidthInBlk
== 1) && (mipHeightInBlk
<= 2);
4263 mipWidthInBlk
= RoundHalf(mipWidthInBlk
);
4264 mipHeightInBlk
= RoundHalf(mipHeightInBlk
);
4265 mipDepthInBlk
= RoundHalf(mipDepthInBlk
);
4268 if (mipId
>= endingMip
)
4271 mipIndexInTail
= mipId
- endingMip
;
4277 UINT_32 index
= mipIndexInTail
+ MaxMacroBits
- log2blkSize
;
4278 ADDR_ASSERT(index
< sizeof(MipTailOffset256B
) / sizeof(UINT_32
));
4279 *pMipTailBytesOffset
= MipTailOffset256B
[index
] << 8;
4286 ************************************************************************************************************************
4287 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4290 * Internal function to calculate address from coord for tiled swizzle surface
4294 ************************************************************************************************************************
4296 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4297 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
4298 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
4301 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {0};
4302 localIn
.swizzleMode
= pIn
->swizzleMode
;
4303 localIn
.flags
= pIn
->flags
;
4304 localIn
.resourceType
= pIn
->resourceType
;
4305 localIn
.bpp
= pIn
->bpp
;
4306 localIn
.width
= Max(pIn
->unalignedWidth
, 1u);
4307 localIn
.height
= Max(pIn
->unalignedHeight
, 1u);
4308 localIn
.numSlices
= Max(pIn
->numSlices
, 1u);
4309 localIn
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
4310 localIn
.numSamples
= Max(pIn
->numSamples
, 1u);
4311 localIn
.numFrags
= Max(pIn
->numFrags
, 1u);
4312 if (localIn
.numMipLevels
<= 1)
4314 localIn
.pitchInElement
= pIn
->pitchInElement
;
4317 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut
= {0};
4318 ADDR_E_RETURNCODE returnCode
= ComputeSurfaceInfoTiled(&localIn
, &localOut
);
4320 BOOL_32 valid
= (returnCode
== ADDR_OK
) &&
4321 (IsThin(pIn
->resourceType
, pIn
->swizzleMode
) ||
4322 IsThick(pIn
->resourceType
, pIn
->swizzleMode
)) &&
4323 ((pIn
->pipeBankXor
== 0) || (IsXor(pIn
->swizzleMode
)));
4327 UINT_32 log2ElementBytes
= Log2(pIn
->bpp
>> 3);
4328 Dim3d mipStartPos
= {0};
4329 UINT_32 mipTailBytesOffset
= 0;
4331 if (pIn
->numMipLevels
> 1)
4333 // Mip-map chain cannot be MSAA surface
4334 ADDR_ASSERT((pIn
->numSamples
<= 1) && (pIn
->numFrags
<= 1));
4336 mipStartPos
= GetMipStartPos(pIn
->resourceType
,
4341 localOut
.blockWidth
,
4342 localOut
.blockHeight
,
4343 localOut
.blockSlices
,
4346 &mipTailBytesOffset
);
4349 UINT_32 interleaveOffset
= 0;
4350 UINT_32 pipeBits
= 0;
4351 UINT_32 pipeXor
= 0;
4352 UINT_32 bankBits
= 0;
4353 UINT_32 bankXor
= 0;
4355 if (IsThin(pIn
->resourceType
, pIn
->swizzleMode
))
4357 UINT_32 blockOffset
= 0;
4358 UINT_32 log2blkSize
= GetBlockSizeLog2(pIn
->swizzleMode
);
4360 if (IsZOrderSwizzle(pIn
->swizzleMode
))
4362 // Morton generation
4363 if ((log2ElementBytes
== 0) || (log2ElementBytes
== 2))
4365 UINT_32 totalLowBits
= 6 - log2ElementBytes
;
4366 UINT_32 mortBits
= totalLowBits
/ 2;
4367 UINT_32 lowBitsValue
= MortonGen2d(pIn
->y
, pIn
->x
, mortBits
);
4368 // Are 9 bits enough?
4369 UINT_32 highBitsValue
=
4370 MortonGen2d(pIn
->x
>> mortBits
, pIn
->y
>> mortBits
, 9) << totalLowBits
;
4371 blockOffset
= lowBitsValue
| highBitsValue
;
4372 ADDR_ASSERT(blockOffset
== lowBitsValue
+ highBitsValue
);
4376 blockOffset
= MortonGen2d(pIn
->y
, pIn
->x
, 13);
4379 // Fill LSBs with sample bits
4380 if (pIn
->numSamples
> 1)
4382 blockOffset
*= pIn
->numSamples
;
4383 blockOffset
|= pIn
->sample
;
4386 // Shift according to BytesPP
4387 blockOffset
<<= log2ElementBytes
;
4391 // Micro block offset
4392 UINT_32 microBlockOffset
= ComputeSurface2DMicroBlockOffset(pIn
);
4393 blockOffset
= microBlockOffset
;
4395 // Micro block dimension
4396 ADDR_ASSERT(log2ElementBytes
< MaxNumOfBpp
);
4397 Dim2d microBlockDim
= Block256_2d
[log2ElementBytes
];
4398 // Morton generation, does 12 bit enough?
4400 MortonGen2d((pIn
->x
/ microBlockDim
.w
), (pIn
->y
/ microBlockDim
.h
), 12) << 8;
4402 // Sample bits start location
4403 UINT_32 sampleStart
= log2blkSize
- Log2(pIn
->numSamples
);
4404 // Join sample bits information to the highest Macro block bits
4405 if (IsNonPrtXor(pIn
->swizzleMode
))
4407 // Non-prt-Xor : xor highest Macro block bits with sample bits
4408 blockOffset
= blockOffset
^ (pIn
->sample
<< sampleStart
);
4412 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4413 // after this op, the blockOffset only contains log2 Macro block size bits
4414 blockOffset
%= (1 << sampleStart
);
4415 blockOffset
|= (pIn
->sample
<< sampleStart
);
4416 ADDR_ASSERT((blockOffset
>> log2blkSize
) == 0);
4420 if (IsXor(pIn
->swizzleMode
))
4422 // Mask off bits above Macro block bits to keep page synonyms working for prt
4423 if (IsPrt(pIn
->swizzleMode
))
4425 blockOffset
&= ((1 << log2blkSize
) - 1);
4428 // Preserve offset inside pipe interleave
4429 interleaveOffset
= blockOffset
& ((1 << m_pipeInterleaveLog2
) - 1);
4430 blockOffset
>>= m_pipeInterleaveLog2
;
4433 pipeBits
= GetPipeXorBits(log2blkSize
);
4435 pipeXor
= FoldXor2d(blockOffset
, pipeBits
);
4436 blockOffset
>>= pipeBits
;
4439 bankBits
= GetBankXorBits(log2blkSize
);
4441 bankXor
= FoldXor2d(blockOffset
, bankBits
);
4442 blockOffset
>>= bankBits
;
4444 // Put all the part back together
4445 blockOffset
<<= bankBits
;
4446 blockOffset
|= bankXor
;
4447 blockOffset
<<= pipeBits
;
4448 blockOffset
|= pipeXor
;
4449 blockOffset
<<= m_pipeInterleaveLog2
;
4450 blockOffset
|= interleaveOffset
;
4453 ADDR_ASSERT((blockOffset
| mipTailBytesOffset
) == (blockOffset
+ mipTailBytesOffset
));
4454 ADDR_ASSERT((mipTailBytesOffset
== 0u) || (blockOffset
< (1u << log2blkSize
)));
4456 blockOffset
|= mipTailBytesOffset
;
4458 if (IsNonPrtXor(pIn
->swizzleMode
) && (pIn
->numSamples
<= 1))
4460 // Apply slice xor if not MSAA/PRT
4461 blockOffset
^= (ReverseBitVector(pIn
->slice
, pipeBits
) << m_pipeInterleaveLog2
);
4462 blockOffset
^= (ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
) <<
4463 (m_pipeInterleaveLog2
+ pipeBits
));
4466 returnCode
= ApplyCustomerPipeBankXor(pIn
->swizzleMode
, pIn
->pipeBankXor
,
4467 bankBits
, pipeBits
, &blockOffset
);
4469 blockOffset
%= (1 << log2blkSize
);
4471 UINT_32 pitchInMacroBlock
= localOut
.mipChainPitch
/ localOut
.blockWidth
;
4472 UINT_32 paddedHeightInMacroBlock
= localOut
.mipChainHeight
/ localOut
.blockHeight
;
4473 UINT_32 sliceSizeInMacroBlock
= pitchInMacroBlock
* paddedHeightInMacroBlock
;
4474 UINT_32 macroBlockIndex
=
4475 (pIn
->slice
+ mipStartPos
.d
) * sliceSizeInMacroBlock
+
4476 ((pIn
->y
/ localOut
.blockHeight
) + mipStartPos
.h
) * pitchInMacroBlock
+
4477 ((pIn
->x
/ localOut
.blockWidth
) + mipStartPos
.w
);
4479 UINT_64 macroBlockOffset
= (static_cast<UINT_64
>(macroBlockIndex
) <<
4480 GetBlockSizeLog2(pIn
->swizzleMode
));
4482 pOut
->addr
= blockOffset
| macroBlockOffset
;
4486 UINT_32 log2blkSize
= GetBlockSizeLog2(pIn
->swizzleMode
);
4488 Dim3d microBlockDim
= Block1K_3d
[log2ElementBytes
];
4490 UINT_32 blockOffset
= MortonGen3d((pIn
->x
/ microBlockDim
.w
),
4491 (pIn
->y
/ microBlockDim
.h
),
4492 (pIn
->slice
/ microBlockDim
.d
),
4496 blockOffset
|= ComputeSurface3DMicroBlockOffset(pIn
);
4498 if (IsXor(pIn
->swizzleMode
))
4500 // Mask off bits above Macro block bits to keep page synonyms working for prt
4501 if (IsPrt(pIn
->swizzleMode
))
4503 blockOffset
&= ((1 << log2blkSize
) - 1);
4506 // Preserve offset inside pipe interleave
4507 interleaveOffset
= blockOffset
& ((1 << m_pipeInterleaveLog2
) - 1);
4508 blockOffset
>>= m_pipeInterleaveLog2
;
4511 pipeBits
= GetPipeXorBits(log2blkSize
);
4513 pipeXor
= FoldXor3d(blockOffset
, pipeBits
);
4514 blockOffset
>>= pipeBits
;
4517 bankBits
= GetBankXorBits(log2blkSize
);
4519 bankXor
= FoldXor3d(blockOffset
, bankBits
);
4520 blockOffset
>>= bankBits
;
4522 // Put all the part back together
4523 blockOffset
<<= bankBits
;
4524 blockOffset
|= bankXor
;
4525 blockOffset
<<= pipeBits
;
4526 blockOffset
|= pipeXor
;
4527 blockOffset
<<= m_pipeInterleaveLog2
;
4528 blockOffset
|= interleaveOffset
;
4531 ADDR_ASSERT((blockOffset
| mipTailBytesOffset
) == (blockOffset
+ mipTailBytesOffset
));
4532 ADDR_ASSERT((mipTailBytesOffset
== 0u) || (blockOffset
< (1u << log2blkSize
)));
4533 blockOffset
|= mipTailBytesOffset
;
4535 returnCode
= ApplyCustomerPipeBankXor(pIn
->swizzleMode
, pIn
->pipeBankXor
,
4536 bankBits
, pipeBits
, &blockOffset
);
4538 blockOffset
%= (1 << log2blkSize
);
4540 UINT_32 xb
= pIn
->x
/ localOut
.blockWidth
+ mipStartPos
.w
;
4541 UINT_32 yb
= pIn
->y
/ localOut
.blockHeight
+ mipStartPos
.h
;
4542 UINT_32 zb
= pIn
->slice
/ localOut
.blockSlices
+ + mipStartPos
.d
;
4544 UINT_32 pitchInBlock
= localOut
.mipChainPitch
/ localOut
.blockWidth
;
4545 UINT_32 sliceSizeInBlock
=
4546 (localOut
.mipChainHeight
/ localOut
.blockHeight
) * pitchInBlock
;
4547 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
4549 pOut
->addr
= blockOffset
| (blockIndex
<< log2blkSize
);
4554 returnCode
= ADDR_INVALIDPARAMS
;