2 * Copyright © 2017 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
34 #include "gfx9addrlib.h"
36 #include "gfx9_gb_reg.h"
38 #include "amdgpu_asic_addr.h"
40 ////////////////////////////////////////////////////////////////////////////////////////////////////
41 ////////////////////////////////////////////////////////////////////////////////////////////////////
47 ************************************************************************************************************************
51 * Creates an Gfx9Lib object.
54 * Returns an Gfx9Lib object pointer.
55 ************************************************************************************************************************
57 Addr::Lib
* Gfx9HwlInit(const Client
* pClient
)
59 return V2::Gfx9Lib::CreateObj(pClient
);
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66 // Static Const Member
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
69 const SwizzleModeFlags
Gfx9Lib::SwizzleModeTable
[ADDR_SW_MAX_TYPE
] =
70 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
71 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
72 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
73 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
74 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
76 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
77 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
78 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
79 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
81 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
82 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
83 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
84 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
86 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
87 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
88 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
89 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
91 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
92 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
93 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
94 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
96 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
97 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
98 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
99 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
101 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
102 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
103 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
104 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
106 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
107 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
108 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
109 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
110 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
113 const UINT_32
Gfx9Lib::MipTailOffset256B
[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
114 8, 6, 5, 4, 3, 2, 1, 0};
116 const Dim3d
Gfx9Lib::Block256_3dS
[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
118 const Dim3d
Gfx9Lib::Block256_3dZ
[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
121 ************************************************************************************************************************
127 ************************************************************************************************************************
129 Gfx9Lib::Gfx9Lib(const Client
* pClient
)
134 m_class
= AI_ADDRLIB
;
135 memset(&m_settings
, 0, sizeof(m_settings
));
136 memcpy(m_swizzleModeTable
, SwizzleModeTable
, sizeof(SwizzleModeTable
));
140 ************************************************************************************************************************
145 ************************************************************************************************************************
152 ************************************************************************************************************************
153 * Gfx9Lib::HwlComputeHtileInfo
156 * Interface function stub of AddrComputeHtilenfo
160 ************************************************************************************************************************
162 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileInfo(
163 const ADDR2_COMPUTE_HTILE_INFO_INPUT
* pIn
, ///< [in] input structure
164 ADDR2_COMPUTE_HTILE_INFO_OUTPUT
* pOut
///< [out] output structure
167 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
170 UINT_32 numRbTotal
= pIn
->hTileFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
172 UINT_32 numCompressBlkPerMetaBlk
, numCompressBlkPerMetaBlkLog2
;
174 if ((numPipeTotal
== 1) && (numRbTotal
== 1))
176 numCompressBlkPerMetaBlkLog2
= 10;
180 if (m_settings
.applyAliasFix
)
182 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ Max(10u, m_pipeInterleaveLog2
);
186 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ 10;
190 numCompressBlkPerMetaBlk
= 1 << numCompressBlkPerMetaBlkLog2
;
192 Dim3d metaBlkDim
= {8, 8, 1};
193 UINT_32 totalAmpBits
= numCompressBlkPerMetaBlkLog2
;
194 UINT_32 widthAmp
= (pIn
->numMipLevels
> 1) ? (totalAmpBits
>> 1) : RoundHalf(totalAmpBits
);
195 UINT_32 heightAmp
= totalAmpBits
- widthAmp
;
196 metaBlkDim
.w
<<= widthAmp
;
197 metaBlkDim
.h
<<= heightAmp
;
200 Dim3d metaBlkDimDbg
= {8, 8, 1};
201 for (UINT_32 index
= 0; index
< numCompressBlkPerMetaBlkLog2
; index
++)
203 if ((metaBlkDimDbg
.h
< metaBlkDimDbg
.w
) ||
204 ((pIn
->numMipLevels
> 1) && (metaBlkDimDbg
.h
== metaBlkDimDbg
.w
)))
206 metaBlkDimDbg
.h
<<= 1;
210 metaBlkDimDbg
.w
<<= 1;
213 ADDR_ASSERT((metaBlkDimDbg
.w
== metaBlkDim
.w
) && (metaBlkDimDbg
.h
== metaBlkDim
.h
));
220 GetMetaMipInfo(pIn
->numMipLevels
, &metaBlkDim
, FALSE
, pOut
->pMipInfo
,
221 pIn
->unalignedWidth
, pIn
->unalignedHeight
, pIn
->numSlices
,
222 &numMetaBlkX
, &numMetaBlkY
, &numMetaBlkZ
);
224 UINT_32 sizeAlign
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
226 if (m_settings
.htileAlignFix
)
231 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
232 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
233 pOut
->sliceSize
= numMetaBlkX
* numMetaBlkY
* numCompressBlkPerMetaBlk
* 4;
235 pOut
->metaBlkWidth
= metaBlkDim
.w
;
236 pOut
->metaBlkHeight
= metaBlkDim
.h
;
237 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
239 pOut
->baseAlign
= Max(numCompressBlkPerMetaBlk
* 4, sizeAlign
);
241 if (m_settings
.metaBaseAlignFix
)
243 pOut
->baseAlign
= Max(pOut
->baseAlign
, GetBlockSize(pIn
->swizzleMode
));
246 if ((IsXor(pIn
->swizzleMode
) == FALSE
) && (numPipeTotal
> 2))
248 UINT_32 additionalAlign
= numPipeTotal
* numCompressBlkPerMetaBlk
* 2;
250 if (additionalAlign
> sizeAlign
)
252 sizeAlign
= additionalAlign
;
256 pOut
->htileBytes
= PowTwoAlign(pOut
->sliceSize
* numMetaBlkZ
, sizeAlign
);
262 ************************************************************************************************************************
263 * Gfx9Lib::HwlComputeCmaskInfo
266 * Interface function stub of AddrComputeCmaskInfo
270 ************************************************************************************************************************
272 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeCmaskInfo(
273 const ADDR2_COMPUTE_CMASK_INFO_INPUT
* pIn
, ///< [in] input structure
274 ADDR2_COMPUTE_CMASK_INFO_OUTPUT
* pOut
///< [out] output structure
277 // TODO: Clarify with AddrLib team
278 // ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
280 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pIn
->cMaskFlags
.pipeAligned
,
283 UINT_32 numRbTotal
= pIn
->cMaskFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
285 UINT_32 numCompressBlkPerMetaBlkLog2
, numCompressBlkPerMetaBlk
;
287 if ((numPipeTotal
== 1) && (numRbTotal
== 1))
289 numCompressBlkPerMetaBlkLog2
= 13;
293 if (m_settings
.applyAliasFix
)
295 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ Max(10u, m_pipeInterleaveLog2
);
299 numCompressBlkPerMetaBlkLog2
= m_seLog2
+ m_rbPerSeLog2
+ 10;
302 numCompressBlkPerMetaBlkLog2
= Max(numCompressBlkPerMetaBlkLog2
, 13u);
305 numCompressBlkPerMetaBlk
= 1 << numCompressBlkPerMetaBlkLog2
;
307 Dim2d metaBlkDim
= {8, 8};
308 UINT_32 totalAmpBits
= numCompressBlkPerMetaBlkLog2
;
309 UINT_32 heightAmp
= totalAmpBits
>> 1;
310 UINT_32 widthAmp
= totalAmpBits
- heightAmp
;
311 metaBlkDim
.w
<<= widthAmp
;
312 metaBlkDim
.h
<<= heightAmp
;
315 Dim2d metaBlkDimDbg
= {8, 8};
316 for (UINT_32 index
= 0; index
< numCompressBlkPerMetaBlkLog2
; index
++)
318 if (metaBlkDimDbg
.h
< metaBlkDimDbg
.w
)
320 metaBlkDimDbg
.h
<<= 1;
324 metaBlkDimDbg
.w
<<= 1;
327 ADDR_ASSERT((metaBlkDimDbg
.w
== metaBlkDim
.w
) && (metaBlkDimDbg
.h
== metaBlkDim
.h
));
330 UINT_32 numMetaBlkX
= (pIn
->unalignedWidth
+ metaBlkDim
.w
- 1) / metaBlkDim
.w
;
331 UINT_32 numMetaBlkY
= (pIn
->unalignedHeight
+ metaBlkDim
.h
- 1) / metaBlkDim
.h
;
332 UINT_32 numMetaBlkZ
= Max(pIn
->numSlices
, 1u);
334 UINT_32 sizeAlign
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
336 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
337 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
338 pOut
->sliceSize
= (numMetaBlkX
* numMetaBlkY
* numCompressBlkPerMetaBlk
) >> 1;
339 pOut
->cmaskBytes
= PowTwoAlign(pOut
->sliceSize
* numMetaBlkZ
, sizeAlign
);
340 pOut
->baseAlign
= Max(numCompressBlkPerMetaBlk
>> 1, sizeAlign
);
342 if (m_settings
.metaBaseAlignFix
)
344 pOut
->baseAlign
= Max(pOut
->baseAlign
, GetBlockSize(pIn
->swizzleMode
));
347 pOut
->metaBlkWidth
= metaBlkDim
.w
;
348 pOut
->metaBlkHeight
= metaBlkDim
.h
;
350 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
356 ************************************************************************************************************************
357 * Gfx9Lib::GetMetaMipInfo
364 ************************************************************************************************************************
366 VOID
Gfx9Lib::GetMetaMipInfo(
367 UINT_32 numMipLevels
, ///< [in] number of mip levels
368 Dim3d
* pMetaBlkDim
, ///< [in] meta block dimension
369 BOOL_32 dataThick
, ///< [in] data surface is thick
370 ADDR2_META_MIP_INFO
* pInfo
, ///< [out] meta mip info
371 UINT_32 mip0Width
, ///< [in] mip0 width
372 UINT_32 mip0Height
, ///< [in] mip0 height
373 UINT_32 mip0Depth
, ///< [in] mip0 depth
374 UINT_32
* pNumMetaBlkX
, ///< [out] number of metablock X in mipchain
375 UINT_32
* pNumMetaBlkY
, ///< [out] number of metablock Y in mipchain
376 UINT_32
* pNumMetaBlkZ
) ///< [out] number of metablock Z in mipchain
379 UINT_32 numMetaBlkX
= (mip0Width
+ pMetaBlkDim
->w
- 1) / pMetaBlkDim
->w
;
380 UINT_32 numMetaBlkY
= (mip0Height
+ pMetaBlkDim
->h
- 1) / pMetaBlkDim
->h
;
381 UINT_32 numMetaBlkZ
= (mip0Depth
+ pMetaBlkDim
->d
- 1) / pMetaBlkDim
->d
;
382 UINT_32 tailWidth
= pMetaBlkDim
->w
;
383 UINT_32 tailHeight
= pMetaBlkDim
->h
>> 1;
384 UINT_32 tailDepth
= pMetaBlkDim
->d
;
385 BOOL_32 inTail
= FALSE
;
386 AddrMajorMode major
= ADDR_MAJOR_MAX_TYPE
;
388 if (numMipLevels
> 1)
390 if (dataThick
&& (numMetaBlkZ
> numMetaBlkX
) && (numMetaBlkZ
> numMetaBlkY
))
393 major
= ADDR_MAJOR_Z
;
395 else if (numMetaBlkX
>= numMetaBlkY
)
398 major
= ADDR_MAJOR_X
;
403 major
= ADDR_MAJOR_Y
;
406 inTail
= ((mip0Width
<= tailWidth
) &&
407 (mip0Height
<= tailHeight
) &&
408 ((dataThick
== FALSE
) || (mip0Depth
<= tailDepth
)));
416 if (major
== ADDR_MAJOR_Z
)
419 pMipDim
= &numMetaBlkY
;
420 pOrderDim
= &numMetaBlkZ
;
423 else if (major
== ADDR_MAJOR_X
)
426 pMipDim
= &numMetaBlkY
;
427 pOrderDim
= &numMetaBlkX
;
433 pMipDim
= &numMetaBlkX
;
434 pOrderDim
= &numMetaBlkY
;
438 if ((*pMipDim
< 3) && (*pOrderDim
> orderLimit
) && (numMipLevels
> 3))
444 *pMipDim
+= ((*pMipDim
/ 2) + (*pMipDim
& 1));
451 UINT_32 mipWidth
= mip0Width
;
452 UINT_32 mipHeight
= mip0Height
;
453 UINT_32 mipDepth
= mip0Depth
;
454 Dim3d mipCoord
= {0};
456 for (UINT_32 mip
= 0; mip
< numMipLevels
; mip
++)
460 GetMetaMiptailInfo(&pInfo
[mip
], mipCoord
, numMipLevels
- mip
,
466 mipWidth
= PowTwoAlign(mipWidth
, pMetaBlkDim
->w
);
467 mipHeight
= PowTwoAlign(mipHeight
, pMetaBlkDim
->h
);
468 mipDepth
= PowTwoAlign(mipDepth
, pMetaBlkDim
->d
);
470 pInfo
[mip
].inMiptail
= FALSE
;
471 pInfo
[mip
].startX
= mipCoord
.w
;
472 pInfo
[mip
].startY
= mipCoord
.h
;
473 pInfo
[mip
].startZ
= mipCoord
.d
;
474 pInfo
[mip
].width
= mipWidth
;
475 pInfo
[mip
].height
= mipHeight
;
476 pInfo
[mip
].depth
= dataThick
? mipDepth
: 1;
478 if ((mip
>= 3) || (mip
& 1))
483 mipCoord
.w
+= mipWidth
;
486 mipCoord
.h
+= mipHeight
;
489 mipCoord
.d
+= mipDepth
;
500 mipCoord
.h
+= mipHeight
;
503 mipCoord
.w
+= mipWidth
;
506 mipCoord
.h
+= mipHeight
;
513 mipWidth
= Max(mipWidth
>> 1, 1u);
514 mipHeight
= Max(mipHeight
>> 1, 1u);
515 mipDepth
= Max(mipDepth
>> 1, 1u);
517 inTail
= ((mipWidth
<= tailWidth
) &&
518 (mipHeight
<= tailHeight
) &&
519 ((dataThick
== FALSE
) || (mipDepth
<= tailDepth
)));
524 *pNumMetaBlkX
= numMetaBlkX
;
525 *pNumMetaBlkY
= numMetaBlkY
;
526 *pNumMetaBlkZ
= numMetaBlkZ
;
530 ************************************************************************************************************************
531 * Gfx9Lib::HwlComputeDccInfo
534 * Interface function to compute DCC key info
538 ************************************************************************************************************************
540 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeDccInfo(
541 const ADDR2_COMPUTE_DCCINFO_INPUT
* pIn
, ///< [in] input structure
542 ADDR2_COMPUTE_DCCINFO_OUTPUT
* pOut
///< [out] output structure
545 BOOL_32 dataLinear
= IsLinear(pIn
->swizzleMode
);
546 BOOL_32 metaLinear
= pIn
->dccKeyFlags
.linear
;
547 BOOL_32 pipeAligned
= pIn
->dccKeyFlags
.pipeAligned
;
553 else if (metaLinear
== TRUE
)
558 UINT_32 numPipeTotal
= GetPipeNumForMetaAddressing(pipeAligned
, pIn
->swizzleMode
);
562 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
563 ADDR_ASSERT_ALWAYS();
565 pOut
->dccRamBaseAlign
= numPipeTotal
* m_pipeInterleaveBytes
;
566 pOut
->dccRamSize
= PowTwoAlign((pIn
->dataSurfaceSize
/ 256), pOut
->dccRamBaseAlign
);
570 BOOL_32 dataThick
= IsThick(pIn
->resourceType
, pIn
->swizzleMode
);
572 UINT_32 minMetaBlkSize
= dataThick
? 65536 : 4096;
574 UINT_32 numFrags
= Max(pIn
->numFrags
, 1u);
575 UINT_32 numSlices
= Max(pIn
->numSlices
, 1u);
577 minMetaBlkSize
/= numFrags
;
579 UINT_32 numCompressBlkPerMetaBlk
= minMetaBlkSize
;
581 UINT_32 numRbTotal
= pIn
->dccKeyFlags
.rbAligned
? m_se
* m_rbPerSe
: 1;
583 if ((numPipeTotal
> 1) || (numRbTotal
> 1))
585 const UINT_32 thinBlkSize
= 1 << (m_settings
.applyAliasFix
? Max(10u, m_pipeInterleaveLog2
) : 10);
587 numCompressBlkPerMetaBlk
=
588 Max(numCompressBlkPerMetaBlk
, m_se
* m_rbPerSe
* (dataThick
? 262144 : thinBlkSize
));
590 if (numCompressBlkPerMetaBlk
> 65536 * pIn
->bpp
)
592 numCompressBlkPerMetaBlk
= 65536 * pIn
->bpp
;
596 Dim3d compressBlkDim
= GetDccCompressBlk(pIn
->resourceType
, pIn
->swizzleMode
, pIn
->bpp
);
597 Dim3d metaBlkDim
= compressBlkDim
;
599 for (UINT_32 index
= 1; index
< numCompressBlkPerMetaBlk
; index
<<= 1)
601 if ((metaBlkDim
.h
< metaBlkDim
.w
) ||
602 ((pIn
->numMipLevels
> 1) && (metaBlkDim
.h
== metaBlkDim
.w
)))
604 if ((dataThick
== FALSE
) || (metaBlkDim
.h
<= metaBlkDim
.d
))
615 if ((dataThick
== FALSE
) || (metaBlkDim
.w
<= metaBlkDim
.d
))
630 GetMetaMipInfo(pIn
->numMipLevels
, &metaBlkDim
, dataThick
, pOut
->pMipInfo
,
631 pIn
->unalignedWidth
, pIn
->unalignedHeight
, numSlices
,
632 &numMetaBlkX
, &numMetaBlkY
, &numMetaBlkZ
);
634 UINT_32 sizeAlign
= numPipeTotal
* numRbTotal
* m_pipeInterleaveBytes
;
636 if (numFrags
> m_maxCompFrag
)
638 sizeAlign
*= (numFrags
/ m_maxCompFrag
);
641 pOut
->dccRamSize
= numMetaBlkX
* numMetaBlkY
* numMetaBlkZ
*
642 numCompressBlkPerMetaBlk
* numFrags
;
643 pOut
->dccRamSize
= PowTwoAlign(pOut
->dccRamSize
, sizeAlign
);
644 pOut
->dccRamBaseAlign
= Max(numCompressBlkPerMetaBlk
, sizeAlign
);
646 if (m_settings
.metaBaseAlignFix
)
648 pOut
->dccRamBaseAlign
= Max(pOut
->dccRamBaseAlign
, GetBlockSize(pIn
->swizzleMode
));
651 pOut
->pitch
= numMetaBlkX
* metaBlkDim
.w
;
652 pOut
->height
= numMetaBlkY
* metaBlkDim
.h
;
653 pOut
->depth
= numMetaBlkZ
* metaBlkDim
.d
;
655 pOut
->compressBlkWidth
= compressBlkDim
.w
;
656 pOut
->compressBlkHeight
= compressBlkDim
.h
;
657 pOut
->compressBlkDepth
= compressBlkDim
.d
;
659 pOut
->metaBlkWidth
= metaBlkDim
.w
;
660 pOut
->metaBlkHeight
= metaBlkDim
.h
;
661 pOut
->metaBlkDepth
= metaBlkDim
.d
;
663 pOut
->metaBlkNumPerSlice
= numMetaBlkX
* numMetaBlkY
;
664 pOut
->fastClearSizePerSlice
=
665 pOut
->metaBlkNumPerSlice
* numCompressBlkPerMetaBlk
* Min(numFrags
, m_maxCompFrag
);
672 ************************************************************************************************************************
673 * Gfx9Lib::HwlGetMaxAlignments
676 * Gets maximum alignments
679 ************************************************************************************************************************
681 ADDR_E_RETURNCODE
Gfx9Lib::HwlGetMaxAlignments(
682 ADDR_GET_MAX_ALIGNMENTS_OUTPUT
* pOut
///< [out] output structure
685 pOut
->baseAlign
= HwlComputeSurfaceBaseAlign(ADDR_SW_64KB
);
691 ************************************************************************************************************************
692 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
695 * Interface function stub of AddrComputeCmaskAddrFromCoord
699 ************************************************************************************************************************
701 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeCmaskAddrFromCoord(
702 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
703 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
705 ADDR2_COMPUTE_CMASK_INFO_INPUT input
= {0};
706 input
.size
= sizeof(input
);
707 input
.cMaskFlags
= pIn
->cMaskFlags
;
708 input
.colorFlags
= pIn
->colorFlags
;
709 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
710 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
711 input
.numSlices
= Max(pIn
->numSlices
, 1u);
712 input
.swizzleMode
= pIn
->swizzleMode
;
713 input
.resourceType
= pIn
->resourceType
;
715 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output
= {0};
716 output
.size
= sizeof(output
);
718 ADDR_E_RETURNCODE returnCode
= ComputeCmaskInfo(&input
, &output
);
720 if (returnCode
== ADDR_OK
)
722 UINT_32 fmaskBpp
= GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
);
723 UINT_32 fmaskElementBytesLog2
= Log2(fmaskBpp
>> 3);
724 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
725 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
727 const CoordEq
* pMetaEq
= GetMetaEquation({0, fmaskElementBytesLog2
, 0, pIn
->cMaskFlags
,
728 Gfx9DataFmask
, pIn
->swizzleMode
, pIn
->resourceType
,
729 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0});
731 UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
732 UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
733 UINT_32 zb
= pIn
->slice
;
735 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
736 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
737 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
739 UINT_64 address
= pMetaEq
->solve(pIn
->x
, pIn
->y
, pIn
->slice
, 0, blockIndex
);
741 pOut
->addr
= address
>> 1;
742 pOut
->bitPosition
= static_cast<UINT_32
>((address
& 1) << 2);
745 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->cMaskFlags
.pipeAligned
,
748 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
750 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
757 ************************************************************************************************************************
758 * Gfx9Lib::HwlComputeHtileAddrFromCoord
761 * Interface function stub of AddrComputeHtileAddrFromCoord
765 ************************************************************************************************************************
767 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileAddrFromCoord(
768 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
769 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
* pOut
) ///< [out] output structure
771 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
773 if (pIn
->numMipLevels
> 1)
775 returnCode
= ADDR_NOTIMPLEMENTED
;
779 ADDR2_COMPUTE_HTILE_INFO_INPUT input
= {0};
780 input
.size
= sizeof(input
);
781 input
.hTileFlags
= pIn
->hTileFlags
;
782 input
.depthFlags
= pIn
->depthflags
;
783 input
.swizzleMode
= pIn
->swizzleMode
;
784 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
785 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
786 input
.numSlices
= Max(pIn
->numSlices
, 1u);
787 input
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
789 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output
= {0};
790 output
.size
= sizeof(output
);
792 returnCode
= ComputeHtileInfo(&input
, &output
);
794 if (returnCode
== ADDR_OK
)
796 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
797 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
798 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
799 UINT_32 numSamplesLog2
= Log2(pIn
->numSamples
);
801 const CoordEq
* pMetaEq
= GetMetaEquation({0, elementBytesLog2
, numSamplesLog2
, pIn
->hTileFlags
,
802 Gfx9DataDepthStencil
, pIn
->swizzleMode
, ADDR_RSRC_TEX_2D
,
803 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0});
805 UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
806 UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
807 UINT_32 zb
= pIn
->slice
;
809 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
810 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
811 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
813 UINT_64 address
= pMetaEq
->solve(pIn
->x
, pIn
->y
, pIn
->slice
, 0, blockIndex
);
815 pOut
->addr
= address
>> 1;
817 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
820 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
822 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
830 ************************************************************************************************************************
831 * Gfx9Lib::HwlComputeHtileCoordFromAddr
834 * Interface function stub of AddrComputeHtileCoordFromAddr
838 ************************************************************************************************************************
840 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeHtileCoordFromAddr(
841 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT
* pIn
, ///< [in] input structure
842 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
* pOut
) ///< [out] output structure
844 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
846 if (pIn
->numMipLevels
> 1)
848 returnCode
= ADDR_NOTIMPLEMENTED
;
852 ADDR2_COMPUTE_HTILE_INFO_INPUT input
= {0};
853 input
.size
= sizeof(input
);
854 input
.hTileFlags
= pIn
->hTileFlags
;
855 input
.swizzleMode
= pIn
->swizzleMode
;
856 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
857 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
858 input
.numSlices
= Max(pIn
->numSlices
, 1u);
859 input
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
861 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output
= {0};
862 output
.size
= sizeof(output
);
864 returnCode
= ComputeHtileInfo(&input
, &output
);
866 if (returnCode
== ADDR_OK
)
868 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
869 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
870 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
871 UINT_32 numSamplesLog2
= Log2(pIn
->numSamples
);
873 const CoordEq
* pMetaEq
= GetMetaEquation({0, elementBytesLog2
, numSamplesLog2
, pIn
->hTileFlags
,
874 Gfx9DataDepthStencil
, pIn
->swizzleMode
, ADDR_RSRC_TEX_2D
,
875 metaBlkWidthLog2
, metaBlkHeightLog2
, 0, 3, 3, 0});
877 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->hTileFlags
.pipeAligned
,
880 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
882 UINT_64 nibbleAddress
= (pIn
->addr
^ (pipeXor
<< m_pipeInterleaveLog2
)) << 1;
884 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
885 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
887 UINT_32 x
, y
, z
, s
, m
;
888 pMetaEq
->solveAddr(nibbleAddress
, sliceSizeInBlock
, x
, y
, z
, s
, m
);
890 pOut
->slice
= m
/ sliceSizeInBlock
;
891 pOut
->y
= ((m
% sliceSizeInBlock
) / pitchInBlock
) * output
.metaBlkHeight
+ y
;
892 pOut
->x
= (m
% pitchInBlock
) * output
.metaBlkWidth
+ x
;
900 ************************************************************************************************************************
901 * Gfx9Lib::HwlComputeDccAddrFromCoord
904 * Interface function stub of AddrComputeDccAddrFromCoord
908 ************************************************************************************************************************
910 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeDccAddrFromCoord(
911 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
* pIn
,
912 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT
* pOut
)
914 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
916 if ((pIn
->numMipLevels
> 1) || (pIn
->mipId
> 1) || pIn
->dccKeyFlags
.linear
)
918 returnCode
= ADDR_NOTIMPLEMENTED
;
922 ADDR2_COMPUTE_DCCINFO_INPUT input
= {0};
923 input
.size
= sizeof(input
);
924 input
.dccKeyFlags
= pIn
->dccKeyFlags
;
925 input
.colorFlags
= pIn
->colorFlags
;
926 input
.swizzleMode
= pIn
->swizzleMode
;
927 input
.resourceType
= pIn
->resourceType
;
928 input
.bpp
= pIn
->bpp
;
929 input
.unalignedWidth
= Max(pIn
->unalignedWidth
, 1u);
930 input
.unalignedHeight
= Max(pIn
->unalignedHeight
, 1u);
931 input
.numSlices
= Max(pIn
->numSlices
, 1u);
932 input
.numFrags
= Max(pIn
->numFrags
, 1u);
933 input
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
935 ADDR2_COMPUTE_DCCINFO_OUTPUT output
= {0};
936 output
.size
= sizeof(output
);
938 returnCode
= ComputeDccInfo(&input
, &output
);
940 if (returnCode
== ADDR_OK
)
942 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
943 UINT_32 numSamplesLog2
= Log2(pIn
->numFrags
);
944 UINT_32 metaBlkWidthLog2
= Log2(output
.metaBlkWidth
);
945 UINT_32 metaBlkHeightLog2
= Log2(output
.metaBlkHeight
);
946 UINT_32 metaBlkDepthLog2
= Log2(output
.metaBlkDepth
);
947 UINT_32 compBlkWidthLog2
= Log2(output
.compressBlkWidth
);
948 UINT_32 compBlkHeightLog2
= Log2(output
.compressBlkHeight
);
949 UINT_32 compBlkDepthLog2
= Log2(output
.compressBlkDepth
);
951 const CoordEq
* pMetaEq
= GetMetaEquation({pIn
->mipId
, elementBytesLog2
, numSamplesLog2
, pIn
->dccKeyFlags
,
952 Gfx9DataColor
, pIn
->swizzleMode
, pIn
->resourceType
,
953 metaBlkWidthLog2
, metaBlkHeightLog2
, metaBlkDepthLog2
,
954 compBlkWidthLog2
, compBlkHeightLog2
, compBlkDepthLog2
});
956 UINT_32 xb
= pIn
->x
/ output
.metaBlkWidth
;
957 UINT_32 yb
= pIn
->y
/ output
.metaBlkHeight
;
958 UINT_32 zb
= pIn
->slice
/ output
.metaBlkDepth
;
960 UINT_32 pitchInBlock
= output
.pitch
/ output
.metaBlkWidth
;
961 UINT_32 sliceSizeInBlock
= (output
.height
/ output
.metaBlkHeight
) * pitchInBlock
;
962 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
964 UINT_64 address
= pMetaEq
->solve(pIn
->x
, pIn
->y
, pIn
->slice
, pIn
->sample
, blockIndex
);
966 pOut
->addr
= address
>> 1;
968 UINT_32 numPipeBits
= GetPipeLog2ForMetaAddressing(pIn
->dccKeyFlags
.pipeAligned
,
971 UINT_64 pipeXor
= static_cast<UINT_64
>(pIn
->pipeXor
& ((1 << numPipeBits
) - 1));
973 pOut
->addr
^= (pipeXor
<< m_pipeInterleaveLog2
);
981 ************************************************************************************************************************
982 * Gfx9Lib::HwlInitGlobalParams
985 * Initializes global parameters
988 * TRUE if all settings are valid
990 ************************************************************************************************************************
992 BOOL_32
Gfx9Lib::HwlInitGlobalParams(
993 const ADDR_CREATE_INPUT
* pCreateIn
) ///< [in] create input
995 BOOL_32 valid
= TRUE
;
997 if (m_settings
.isArcticIsland
)
999 GB_ADDR_CONFIG gbAddrConfig
;
1001 gbAddrConfig
.u32All
= pCreateIn
->regValue
.gbAddrConfig
;
1003 // These values are copied from CModel code
1004 switch (gbAddrConfig
.bits
.NUM_PIPES
)
1006 case ADDR_CONFIG_1_PIPE
:
1010 case ADDR_CONFIG_2_PIPE
:
1014 case ADDR_CONFIG_4_PIPE
:
1018 case ADDR_CONFIG_8_PIPE
:
1022 case ADDR_CONFIG_16_PIPE
:
1026 case ADDR_CONFIG_32_PIPE
:
1031 ADDR_ASSERT_ALWAYS();
1035 switch (gbAddrConfig
.bits
.PIPE_INTERLEAVE_SIZE
)
1037 case ADDR_CONFIG_PIPE_INTERLEAVE_256B
:
1038 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_256B
;
1039 m_pipeInterleaveLog2
= 8;
1041 case ADDR_CONFIG_PIPE_INTERLEAVE_512B
:
1042 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_512B
;
1043 m_pipeInterleaveLog2
= 9;
1045 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB
:
1046 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_1KB
;
1047 m_pipeInterleaveLog2
= 10;
1049 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB
:
1050 m_pipeInterleaveBytes
= ADDR_PIPEINTERLEAVE_2KB
;
1051 m_pipeInterleaveLog2
= 11;
1054 ADDR_ASSERT_ALWAYS();
1058 switch (gbAddrConfig
.bits
.NUM_BANKS
)
1060 case ADDR_CONFIG_1_BANK
:
1064 case ADDR_CONFIG_2_BANK
:
1068 case ADDR_CONFIG_4_BANK
:
1072 case ADDR_CONFIG_8_BANK
:
1076 case ADDR_CONFIG_16_BANK
:
1081 ADDR_ASSERT_ALWAYS();
1085 switch (gbAddrConfig
.bits
.NUM_SHADER_ENGINES
)
1087 case ADDR_CONFIG_1_SHADER_ENGINE
:
1091 case ADDR_CONFIG_2_SHADER_ENGINE
:
1095 case ADDR_CONFIG_4_SHADER_ENGINE
:
1099 case ADDR_CONFIG_8_SHADER_ENGINE
:
1104 ADDR_ASSERT_ALWAYS();
1108 switch (gbAddrConfig
.bits
.NUM_RB_PER_SE
)
1110 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE
:
1114 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE
:
1118 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE
:
1123 ADDR_ASSERT_ALWAYS();
1127 switch (gbAddrConfig
.bits
.MAX_COMPRESSED_FRAGS
)
1129 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS
:
1131 m_maxCompFragLog2
= 0;
1133 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS
:
1135 m_maxCompFragLog2
= 1;
1137 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS
:
1139 m_maxCompFragLog2
= 2;
1141 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS
:
1143 m_maxCompFragLog2
= 3;
1146 ADDR_ASSERT_ALWAYS();
1150 m_blockVarSizeLog2
= pCreateIn
->regValue
.blockVarSizeLog2
;
1151 ADDR_ASSERT((m_blockVarSizeLog2
== 0) ||
1152 ((m_blockVarSizeLog2
>= 17u) && (m_blockVarSizeLog2
<= 20u)));
1153 m_blockVarSizeLog2
= Min(Max(17u, m_blockVarSizeLog2
), 20u);
1158 ADDR_NOT_IMPLEMENTED();
1163 InitEquationTable();
1170 ************************************************************************************************************************
1171 * Gfx9Lib::HwlConvertChipFamily
1174 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1177 ************************************************************************************************************************
1179 ChipFamily
Gfx9Lib::HwlConvertChipFamily(
1180 UINT_32 uChipFamily
, ///< [in] chip family defined in atiih.h
1181 UINT_32 uChipRevision
) ///< [in] chip revision defined in "asic_family"_id.h
1183 ChipFamily family
= ADDR_CHIP_FAMILY_AI
;
1185 switch (uChipFamily
)
1188 m_settings
.isArcticIsland
= 1;
1189 m_settings
.isVega10
= ASICREV_IS_VEGA10_P(uChipRevision
);
1191 m_settings
.isDce12
= 1;
1193 if (m_settings
.isVega10
== 0)
1195 m_settings
.htileAlignFix
= 1;
1196 m_settings
.applyAliasFix
= 1;
1199 m_settings
.metaBaseAlignFix
= 1;
1201 m_settings
.depthPipeXorDisable
= 1;
1204 m_settings
.isArcticIsland
= 1;
1205 m_settings
.isRaven
= ASICREV_IS_RAVEN(uChipRevision
);
1207 if (m_settings
.isRaven
)
1209 m_settings
.isDcn1
= 1;
1212 m_settings
.metaBaseAlignFix
= 1;
1214 if (ASICREV_IS_RAVEN(uChipRevision
))
1216 m_settings
.depthPipeXorDisable
= 1;
1221 ADDR_ASSERT(!"This should be a Fusion");
1229 ************************************************************************************************************************
1230 * Gfx9Lib::InitRbEquation
1236 ************************************************************************************************************************
1238 VOID
Gfx9Lib::GetRbEquation(
1239 CoordEq
* pRbEq
, ///< [out] rb equation
1240 UINT_32 numRbPerSeLog2
, ///< [in] number of rb per shader engine
1241 UINT_32 numSeLog2
) ///< [in] number of shader engine
1244 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1245 UINT_32 rbRegion
= (numRbPerSeLog2
== 0) ? 5 : 4;
1246 Coordinate
cx('x', rbRegion
);
1247 Coordinate
cy('y', rbRegion
);
1250 UINT_32 numRbTotalLog2
= numRbPerSeLog2
+ numSeLog2
;
1252 // Clear the rb equation
1254 pRbEq
->resize(numRbTotalLog2
);
1256 if ((numSeLog2
> 0) && (numRbPerSeLog2
== 1))
1258 // Special case when more than 1 SE, and 2 RB per SE
1259 (*pRbEq
)[0].add(cx
);
1260 (*pRbEq
)[0].add(cy
);
1264 if (m_settings
.applyAliasFix
== false)
1266 (*pRbEq
)[0].add(cy
);
1269 (*pRbEq
)[0].add(cy
);
1273 UINT_32 numBits
= 2 * (numRbTotalLog2
- start
);
1275 for (UINT_32 i
= 0; i
< numBits
; i
++)
1278 start
+ (((start
+ i
) >= numRbTotalLog2
) ? (2 * (numRbTotalLog2
- start
) - i
- 1) : i
);
1282 (*pRbEq
)[idx
].add(cx
);
1287 (*pRbEq
)[idx
].add(cy
);
1294 ************************************************************************************************************************
1295 * Gfx9Lib::GetDataEquation
1298 * Get data equation for fmask and Z
1301 ************************************************************************************************************************
1303 VOID
Gfx9Lib::GetDataEquation(
1304 CoordEq
* pDataEq
, ///< [out] data surface equation
1305 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1306 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1307 AddrResourceType resourceType
, ///< [in] data surface resource type
1308 UINT_32 elementBytesLog2
, ///< [in] data surface element bytes
1309 UINT_32 numSamplesLog2
) ///< [in] data surface sample count
1312 Coordinate
cx('x', 0);
1313 Coordinate
cy('y', 0);
1314 Coordinate
cz('z', 0);
1315 Coordinate
cs('s', 0);
1317 // Clear the equation
1319 pDataEq
->resize(27);
1321 if (dataSurfaceType
== Gfx9DataColor
)
1323 if (IsLinear(swizzleMode
))
1325 Coordinate
cm('m', 0);
1327 pDataEq
->resize(49);
1329 for (UINT_32 i
= 0; i
< 49; i
++)
1331 (*pDataEq
)[i
].add(cm
);
1335 else if (IsThick(resourceType
, swizzleMode
))
1337 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1339 if (IsStandardSwizzle(resourceType
, swizzleMode
))
1341 // Standard 3d swizzle
1342 // Fill in bottom x bits
1343 for (i
= elementBytesLog2
; i
< 4; i
++)
1345 (*pDataEq
)[i
].add(cx
);
1348 // Fill in 2 bits of y and then z
1349 for (i
= 4; i
< 6; i
++)
1351 (*pDataEq
)[i
].add(cy
);
1354 for (i
= 6; i
< 8; i
++)
1356 (*pDataEq
)[i
].add(cz
);
1359 if (elementBytesLog2
< 2)
1361 // fill in z & y bit
1362 (*pDataEq
)[8].add(cz
);
1363 (*pDataEq
)[9].add(cy
);
1367 else if (elementBytesLog2
== 2)
1369 // fill in y and x bit
1370 (*pDataEq
)[8].add(cy
);
1371 (*pDataEq
)[9].add(cx
);
1378 (*pDataEq
)[8].add(cx
);
1380 (*pDataEq
)[9].add(cx
);
1387 UINT_32 m2dEnd
= (elementBytesLog2
==0) ? 3 : ((elementBytesLog2
< 4) ? 4 : 5);
1388 UINT_32 numZs
= (elementBytesLog2
== 0 || elementBytesLog2
== 4) ?
1389 2 : ((elementBytesLog2
== 1) ? 3 : 1);
1390 pDataEq
->mort2d(cx
, cy
, elementBytesLog2
, m2dEnd
);
1391 for (i
= m2dEnd
+ 1; i
<= m2dEnd
+ numZs
; i
++)
1393 (*pDataEq
)[i
].add(cz
);
1396 if ((elementBytesLog2
== 0) || (elementBytesLog2
== 3))
1399 (*pDataEq
)[6].add(cx
);
1400 (*pDataEq
)[7].add(cz
);
1404 else if (elementBytesLog2
== 2)
1407 (*pDataEq
)[6].add(cy
);
1408 (*pDataEq
)[7].add(cz
);
1413 (*pDataEq
)[8].add(cy
);
1414 (*pDataEq
)[9].add(cx
);
1418 // Fill in bit 10 and up
1419 pDataEq
->mort3d( cz
, cy
, cx
, 10 );
1421 else if (IsThin(resourceType
, swizzleMode
))
1423 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swizzleMode
);
1425 UINT_32 microYBits
= (8 - elementBytesLog2
) / 2;
1426 UINT_32 tileSplitStart
= blockSizeLog2
- numSamplesLog2
;
1428 // Fill in bottom x bits
1429 for (i
= elementBytesLog2
; i
< 4; i
++)
1431 (*pDataEq
)[i
].add(cx
);
1434 // Fill in bottom y bits
1435 for (i
= 4; i
< 4 + microYBits
; i
++)
1437 (*pDataEq
)[i
].add(cy
);
1440 // Fill in last of the micro_x bits
1441 for (i
= 4 + microYBits
; i
< 8; i
++)
1443 (*pDataEq
)[i
].add(cx
);
1446 // Fill in x/y bits below sample split
1447 pDataEq
->mort2d(cy
, cx
, 8, tileSplitStart
- 1);
1448 // Fill in sample bits
1449 for (i
= 0; i
< numSamplesLog2
; i
++)
1452 (*pDataEq
)[tileSplitStart
+ i
].add(cs
);
1454 // Fill in x/y bits above sample split
1455 if ((numSamplesLog2
& 1) ^ (blockSizeLog2
& 1))
1457 pDataEq
->mort2d(cx
, cy
, blockSizeLog2
);
1461 pDataEq
->mort2d(cy
, cx
, blockSizeLog2
);
1466 ADDR_ASSERT_ALWAYS();
1472 UINT_32 sampleStart
= elementBytesLog2
;
1473 UINT_32 pixelStart
= elementBytesLog2
+ numSamplesLog2
;
1474 UINT_32 ymajStart
= 6 + numSamplesLog2
;
1476 for (UINT_32 s
= 0; s
< numSamplesLog2
; s
++)
1479 (*pDataEq
)[sampleStart
+ s
].add(cs
);
1482 // Put in the x-major order pixel bits
1483 pDataEq
->mort2d(cx
, cy
, pixelStart
, ymajStart
- 1);
1484 // Put in the y-major order pixel bits
1485 pDataEq
->mort2d(cy
, cx
, ymajStart
);
1490 ************************************************************************************************************************
1491 * Gfx9Lib::GetPipeEquation
1497 ************************************************************************************************************************
1499 VOID
Gfx9Lib::GetPipeEquation(
1500 CoordEq
* pPipeEq
, ///< [out] pipe equation
1501 CoordEq
* pDataEq
, ///< [in] data equation
1502 UINT_32 pipeInterleaveLog2
, ///< [in] pipe interleave
1503 UINT_32 numPipeLog2
, ///< [in] number of pipes
1504 UINT_32 numSamplesLog2
, ///< [in] data surface sample count
1505 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1506 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1507 AddrResourceType resourceType
///< [in] data surface resource type
1510 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swizzleMode
);
1513 pDataEq
->copy(dataEq
);
1515 if (dataSurfaceType
== Gfx9DataColor
)
1517 INT_32 shift
= static_cast<INT_32
>(numSamplesLog2
);
1518 dataEq
.shift(-shift
, blockSizeLog2
- numSamplesLog2
);
1521 dataEq
.copy(*pPipeEq
, pipeInterleaveLog2
, numPipeLog2
);
1523 // This section should only apply to z/stencil, maybe fmask
1524 // If the pipe bit is below the comp block size,
1525 // then keep moving up the address until we find a bit that is above
1526 UINT_32 pipeStart
= 0;
1528 if (dataSurfaceType
!= Gfx9DataColor
)
1530 Coordinate
tileMin('x', 3);
1532 while (dataEq
[pipeInterleaveLog2
+ pipeStart
][0] < tileMin
)
1537 // if pipe is 0, then the first pipe bit is above the comp block size,
1538 // so we don't need to do anything
1539 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1540 // we will get the same pipe equation
1543 for (UINT_32 i
= 0; i
< numPipeLog2
; i
++)
1545 // Copy the jth bit above pipe interleave to the current pipe equation bit
1546 dataEq
[pipeInterleaveLog2
+ pipeStart
+ i
].copyto((*pPipeEq
)[i
]);
1551 if (IsPrt(swizzleMode
))
1553 // Clear out bits above the block size if prt's are enabled
1554 dataEq
.resize(blockSizeLog2
);
1558 if (IsXor(swizzleMode
))
1562 if (IsThick(resourceType
, swizzleMode
))
1566 dataEq
.copy(xorMask2
, pipeInterleaveLog2
+ numPipeLog2
, 2 * numPipeLog2
);
1568 xorMask
.resize(numPipeLog2
);
1570 for (UINT_32 pipeIdx
= 0; pipeIdx
< numPipeLog2
; pipeIdx
++)
1572 xorMask
[pipeIdx
].add(xorMask2
[2 * pipeIdx
]);
1573 xorMask
[pipeIdx
].add(xorMask2
[2 * pipeIdx
+ 1]);
1578 // Xor in the bits above the pipe+gpu bits
1579 dataEq
.copy(xorMask
, pipeInterleaveLog2
+ pipeStart
+ numPipeLog2
, numPipeLog2
);
1581 if ((numSamplesLog2
== 0) && (IsPrt(swizzleMode
) == FALSE
))
1585 // if 1xaa and not prt, then xor in the z bits
1587 xorMask2
.resize(numPipeLog2
);
1588 for (UINT_32 pipeIdx
= 0; pipeIdx
< numPipeLog2
; pipeIdx
++)
1590 co
.set('z', numPipeLog2
- 1 - pipeIdx
);
1591 xorMask2
[pipeIdx
].add(co
);
1594 pPipeEq
->xorin(xorMask2
);
1599 pPipeEq
->xorin(xorMask
);
1603 ************************************************************************************************************************
1604 * Gfx9Lib::GetMetaEquation
1607 * Get meta equation for cmask/htile/DCC
1609 * Pointer to a calculated meta equation
1610 ************************************************************************************************************************
1612 const CoordEq
* Gfx9Lib::GetMetaEquation(
1613 const MetaEqParams
& metaEqParams
)
1615 UINT_32 cachedMetaEqIndex
;
1617 for (cachedMetaEqIndex
= 0; cachedMetaEqIndex
< MaxCachedMetaEq
; cachedMetaEqIndex
++)
1619 if (memcmp(&metaEqParams
,
1620 &m_cachedMetaEqKey
[cachedMetaEqIndex
],
1621 static_cast<UINT_32
>(sizeof(metaEqParams
))) == 0)
1627 CoordEq
* pMetaEq
= NULL
;
1629 if (cachedMetaEqIndex
< MaxCachedMetaEq
)
1631 pMetaEq
= &m_cachedMetaEq
[cachedMetaEqIndex
];
1635 m_cachedMetaEqKey
[m_metaEqOverrideIndex
] = metaEqParams
;
1637 pMetaEq
= &m_cachedMetaEq
[m_metaEqOverrideIndex
++];
1639 m_metaEqOverrideIndex
%= MaxCachedMetaEq
;
1641 GenMetaEquation(pMetaEq
,
1642 metaEqParams
.maxMip
,
1643 metaEqParams
.elementBytesLog2
,
1644 metaEqParams
.numSamplesLog2
,
1645 metaEqParams
.metaFlag
,
1646 metaEqParams
.dataSurfaceType
,
1647 metaEqParams
.swizzleMode
,
1648 metaEqParams
.resourceType
,
1649 metaEqParams
.metaBlkWidthLog2
,
1650 metaEqParams
.metaBlkHeightLog2
,
1651 metaEqParams
.metaBlkDepthLog2
,
1652 metaEqParams
.compBlkWidthLog2
,
1653 metaEqParams
.compBlkHeightLog2
,
1654 metaEqParams
.compBlkDepthLog2
);
1661 ************************************************************************************************************************
1662 * Gfx9Lib::GenMetaEquation
1665 * Get meta equation for cmask/htile/DCC
1668 ************************************************************************************************************************
1670 VOID
Gfx9Lib::GenMetaEquation(
1671 CoordEq
* pMetaEq
, ///< [out] meta equation
1672 UINT_32 maxMip
, ///< [in] max mip Id
1673 UINT_32 elementBytesLog2
, ///< [in] data surface element bytes
1674 UINT_32 numSamplesLog2
, ///< [in] data surface sample count
1675 ADDR2_META_FLAGS metaFlag
, ///< [in] meta falg
1676 Gfx9DataType dataSurfaceType
, ///< [in] data surface type
1677 AddrSwizzleMode swizzleMode
, ///< [in] data surface swizzle mode
1678 AddrResourceType resourceType
, ///< [in] data surface resource type
1679 UINT_32 metaBlkWidthLog2
, ///< [in] meta block width
1680 UINT_32 metaBlkHeightLog2
, ///< [in] meta block height
1681 UINT_32 metaBlkDepthLog2
, ///< [in] meta block depth
1682 UINT_32 compBlkWidthLog2
, ///< [in] compress block width
1683 UINT_32 compBlkHeightLog2
, ///< [in] compress block height
1684 UINT_32 compBlkDepthLog2
) ///< [in] compress block depth
1687 UINT_32 numPipeTotalLog2
= GetPipeLog2ForMetaAddressing(metaFlag
.pipeAligned
, swizzleMode
);
1688 UINT_32 pipeInterleaveLog2
= m_pipeInterleaveLog2
;
1690 // Get the correct data address and rb equation
1692 GetDataEquation(&dataEq
, dataSurfaceType
, swizzleMode
, resourceType
,
1693 elementBytesLog2
, numSamplesLog2
);
1695 // Get pipe and rb equations
1696 CoordEq pipeEquation
;
1697 GetPipeEquation(&pipeEquation
, &dataEq
, pipeInterleaveLog2
, numPipeTotalLog2
,
1698 numSamplesLog2
, dataSurfaceType
, swizzleMode
, resourceType
);
1699 numPipeTotalLog2
= pipeEquation
.getsize();
1701 if (metaFlag
.linear
)
1703 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1704 ADDR_ASSERT_ALWAYS();
1706 ADDR_ASSERT(dataSurfaceType
== Gfx9DataColor
);
1708 dataEq
.copy(*pMetaEq
);
1710 if (IsLinear(swizzleMode
))
1712 if (metaFlag
.pipeAligned
)
1714 // Remove the pipe bits
1715 INT_32 shift
= static_cast<INT_32
>(numPipeTotalLog2
);
1716 pMetaEq
->shift(-shift
, pipeInterleaveLog2
);
1718 // Divide by comp block size, which for linear (which is always color) is 256 B
1721 if (metaFlag
.pipeAligned
)
1723 // Put pipe bits back in
1724 pMetaEq
->shift(numPipeTotalLog2
, pipeInterleaveLog2
);
1726 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1728 pipeEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+ i
]);
1737 UINT_32 maxCompFragLog2
= static_cast<INT_32
>(m_maxCompFragLog2
);
1738 UINT_32 compFragLog2
=
1739 ((dataSurfaceType
== Gfx9DataColor
) && (numSamplesLog2
> maxCompFragLog2
)) ?
1740 maxCompFragLog2
: numSamplesLog2
;
1742 UINT_32 uncompFragLog2
= numSamplesLog2
- compFragLog2
;
1744 // Make sure the metaaddr is cleared
1746 pMetaEq
->resize(27);
1748 if (IsThick(resourceType
, swizzleMode
))
1750 Coordinate
cx('x', 0);
1751 Coordinate
cy('y', 0);
1752 Coordinate
cz('z', 0);
1756 pMetaEq
->mort3d(cy
, cx
, cz
);
1760 pMetaEq
->mort3d(cx
, cy
, cz
);
1765 Coordinate
cx('x', 0);
1766 Coordinate
cy('y', 0);
1771 pMetaEq
->mort2d(cy
, cx
, compFragLog2
);
1775 pMetaEq
->mort2d(cx
, cy
, compFragLog2
);
1778 //------------------------------------------------------------------------------------------------------------------------
1779 // Put the compressible fragments at the lsb
1780 // the uncompressible frags will be at the msb of the micro address
1781 //------------------------------------------------------------------------------------------------------------------------
1782 for (UINT_32 s
= 0; s
< compFragLog2
; s
++)
1785 (*pMetaEq
)[s
].add(cs
);
1789 // Keep a copy of the pipe equations
1790 CoordEq origPipeEquation
;
1791 pipeEquation
.copy(origPipeEquation
);
1794 // filter out everything under the compressed block size
1795 co
.set('x', compBlkWidthLog2
);
1796 pMetaEq
->Filter('<', co
, 0, 'x');
1797 co
.set('y', compBlkHeightLog2
);
1798 pMetaEq
->Filter('<', co
, 0, 'y');
1799 co
.set('z', compBlkDepthLog2
);
1800 pMetaEq
->Filter('<', co
, 0, 'z');
1802 // For non-color, filter out sample bits
1803 if (dataSurfaceType
!= Gfx9DataColor
)
1806 pMetaEq
->Filter('<', co
, 0, 's');
1809 // filter out everything above the metablock size
1810 co
.set('x', metaBlkWidthLog2
- 1);
1811 pMetaEq
->Filter('>', co
, 0, 'x');
1812 co
.set('y', metaBlkHeightLog2
- 1);
1813 pMetaEq
->Filter('>', co
, 0, 'y');
1814 co
.set('z', metaBlkDepthLog2
- 1);
1815 pMetaEq
->Filter('>', co
, 0, 'z');
1817 // filter out everything above the metablock size for the channel bits
1818 co
.set('x', metaBlkWidthLog2
- 1);
1819 pipeEquation
.Filter('>', co
, 0, 'x');
1820 co
.set('y', metaBlkHeightLog2
- 1);
1821 pipeEquation
.Filter('>', co
, 0, 'y');
1822 co
.set('z', metaBlkDepthLog2
- 1);
1823 pipeEquation
.Filter('>', co
, 0, 'z');
1825 // Make sure we still have the same number of channel bits
1826 if (pipeEquation
.getsize() != numPipeTotalLog2
)
1828 ADDR_ASSERT_ALWAYS();
1831 // Loop through all channel and rb bits,
1832 // and make sure these components exist in the metadata address
1833 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1835 for (UINT_32 j
= pipeEquation
[i
].getsize(); j
> 0; j
--)
1837 if (pMetaEq
->Exists(pipeEquation
[i
][j
- 1]) == FALSE
)
1839 ADDR_ASSERT_ALWAYS();
1844 const UINT_32 numSeLog2
= metaFlag
.rbAligned
? m_seLog2
: 0;
1845 const UINT_32 numRbPeSeLog2
= metaFlag
.rbAligned
? m_rbPerSeLog2
: 0;
1846 const UINT_32 numRbTotalLog2
= numRbPeSeLog2
+ numSeLog2
;
1847 CoordEq origRbEquation
;
1849 GetRbEquation(&origRbEquation
, numRbPeSeLog2
, numSeLog2
);
1851 CoordEq rbEquation
= origRbEquation
;
1853 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
1855 for (UINT_32 j
= rbEquation
[i
].getsize(); j
> 0; j
--)
1857 if (pMetaEq
->Exists(rbEquation
[i
][j
- 1]) == FALSE
)
1859 ADDR_ASSERT_ALWAYS();
1864 if (m_settings
.applyAliasFix
)
1869 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1870 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
1872 for (UINT_32 j
= 0; j
< numPipeTotalLog2
; j
++)
1874 BOOL_32 isRbEquationInPipeEquation
= FALSE
;
1876 if (m_settings
.applyAliasFix
)
1878 CoordTerm filteredPipeEq
;
1879 filteredPipeEq
= pipeEquation
[j
];
1881 filteredPipeEq
.Filter('>', co
, 0, 'z');
1883 isRbEquationInPipeEquation
= (rbEquation
[i
] == filteredPipeEq
);
1887 isRbEquationInPipeEquation
= (rbEquation
[i
] == pipeEquation
[j
]);
1890 if (isRbEquationInPipeEquation
)
1892 rbEquation
[i
].Clear();
1897 bool rbAppendedWithPipeBits
[1 << (MaxSeLog2
+ MaxRbPerSeLog2
)] = {};
1899 // Loop through each bit of the channel, get the smallest coordinate,
1900 // and remove it from the metaaddr, and rb_equation
1901 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
1903 pipeEquation
[i
].getsmallest(co
);
1905 UINT_32 old_size
= pMetaEq
->getsize();
1906 pMetaEq
->Filter('=', co
);
1907 UINT_32 new_size
= pMetaEq
->getsize();
1908 if (new_size
!= old_size
-1)
1910 ADDR_ASSERT_ALWAYS();
1912 pipeEquation
.remove(co
);
1913 for (UINT_32 j
= 0; j
< numRbTotalLog2
; j
++)
1915 if (rbEquation
[j
].remove(co
))
1917 // if we actually removed something from this bit, then add the remaining
1918 // channel bits, as these can be removed for this bit
1919 for (UINT_32 k
= 0; k
< pipeEquation
[i
].getsize(); k
++)
1921 if (pipeEquation
[i
][k
] != co
)
1923 rbEquation
[j
].add(pipeEquation
[i
][k
]);
1924 rbAppendedWithPipeBits
[j
] = true;
1931 // Loop through the rb bits and see what remain;
1932 // filter out the smallest coordinate if it remains
1933 UINT_32 rbBitsLeft
= 0;
1934 for (UINT_32 i
= 0; i
< numRbTotalLog2
; i
++)
1936 BOOL_32 isRbEqAppended
= FALSE
;
1938 if (m_settings
.applyAliasFix
)
1940 isRbEqAppended
= (rbEquation
[i
].getsize() > (rbAppendedWithPipeBits
[i
] ? 1 : 0));
1944 isRbEqAppended
= (rbEquation
[i
].getsize() > 0);
1950 rbEquation
[i
].getsmallest(co
);
1951 UINT_32 old_size
= pMetaEq
->getsize();
1952 pMetaEq
->Filter('=', co
);
1953 UINT_32 new_size
= pMetaEq
->getsize();
1954 if (new_size
!= old_size
- 1)
1958 for (UINT_32 j
= i
+ 1; j
< numRbTotalLog2
; j
++)
1960 if (rbEquation
[j
].remove(co
))
1962 // if we actually removed something from this bit, then add the remaining
1963 // rb bits, as these can be removed for this bit
1964 for (UINT_32 k
= 0; k
< rbEquation
[i
].getsize(); k
++)
1966 if (rbEquation
[i
][k
] != co
)
1968 rbEquation
[j
].add(rbEquation
[i
][k
]);
1969 rbAppendedWithPipeBits
[j
] |= rbAppendedWithPipeBits
[i
];
1977 // capture the size of the metaaddr
1978 UINT_32 metaSize
= pMetaEq
->getsize();
1979 // resize to 49 bits...make this a nibble address
1980 pMetaEq
->resize(49);
1981 // Concatenate the macro address above the current address
1982 for (UINT_32 i
= metaSize
, j
= 0; i
< 49; i
++, j
++)
1985 (*pMetaEq
)[i
].add(co
);
1988 // Multiply by meta element size (in nibbles)
1989 if (dataSurfaceType
== Gfx9DataColor
)
1993 else if (dataSurfaceType
== Gfx9DataDepthStencil
)
1998 //------------------------------------------------------------------------------------------
1999 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2000 // Shift up from pipe interleave number of channel
2001 // and rb bits left, and uncompressed fragments
2002 //------------------------------------------------------------------------------------------
2004 pMetaEq
->shift(numPipeTotalLog2
+ rbBitsLeft
+ uncompFragLog2
, pipeInterleaveLog2
+ 1);
2006 // Put in the channel bits
2007 for (UINT_32 i
= 0; i
< numPipeTotalLog2
; i
++)
2009 origPipeEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+1 + i
]);
2012 // Put in remaining rb bits
2013 for (UINT_32 i
= 0, j
= 0; j
< rbBitsLeft
; i
= (i
+ 1) % numRbTotalLog2
)
2015 BOOL_32 isRbEqAppended
= FALSE
;
2017 if (m_settings
.applyAliasFix
)
2019 isRbEqAppended
= (rbEquation
[i
].getsize() > (rbAppendedWithPipeBits
[i
] ? 1 : 0));
2023 isRbEqAppended
= (rbEquation
[i
].getsize() > 0);
2028 origRbEquation
[i
].copyto((*pMetaEq
)[pipeInterleaveLog2
+ 1 + numPipeTotalLog2
+ j
]);
2029 // Mark any rb bit we add in to the rb mask
2034 //------------------------------------------------------------------------------------------
2035 // Put in the uncompressed fragment bits
2036 //------------------------------------------------------------------------------------------
2037 for (UINT_32 i
= 0; i
< uncompFragLog2
; i
++)
2039 co
.set('s', compFragLog2
+ i
);
2040 (*pMetaEq
)[pipeInterleaveLog2
+ 1 + numPipeTotalLog2
+ rbBitsLeft
+ i
].add(co
);
2046 ************************************************************************************************************************
2047 * Gfx9Lib::IsEquationSupported
2050 * Check if equation is supported for given swizzle mode and resource type.
2054 ************************************************************************************************************************
2056 BOOL_32
Gfx9Lib::IsEquationSupported(
2057 AddrResourceType rsrcType
,
2058 AddrSwizzleMode swMode
,
2059 UINT_32 elementBytesLog2
) const
2061 BOOL_32 supported
= (elementBytesLog2
< MaxElementBytesLog2
) &&
2062 (IsLinear(swMode
) == FALSE
) &&
2063 (((IsTex2d(rsrcType
) == TRUE
) &&
2064 ((elementBytesLog2
< 4) ||
2065 ((IsRotateSwizzle(swMode
) == FALSE
) &&
2066 (IsZOrderSwizzle(swMode
) == FALSE
)))) ||
2067 ((IsTex3d(rsrcType
) == TRUE
) &&
2068 (IsRotateSwizzle(swMode
) == FALSE
) &&
2069 (IsBlock256b(swMode
) == FALSE
)));
2075 ************************************************************************************************************************
2076 * Gfx9Lib::InitEquationTable
2079 * Initialize Equation table.
2083 ************************************************************************************************************************
2085 VOID
Gfx9Lib::InitEquationTable()
2087 memset(m_equationTable
, 0, sizeof(m_equationTable
));
2089 // Loop all possible resource type (2D/3D)
2090 for (UINT_32 rsrcTypeIdx
= 0; rsrcTypeIdx
< MaxRsrcType
; rsrcTypeIdx
++)
2092 AddrResourceType rsrcType
= static_cast<AddrResourceType
>(rsrcTypeIdx
+ ADDR_RSRC_TEX_2D
);
2094 // Loop all possible swizzle mode
2095 for (UINT_32 swModeIdx
= 0; swModeIdx
< MaxSwMode
; swModeIdx
++)
2097 AddrSwizzleMode swMode
= static_cast<AddrSwizzleMode
>(swModeIdx
);
2099 // Loop all possible bpp
2100 for (UINT_32 bppIdx
= 0; bppIdx
< MaxElementBytesLog2
; bppIdx
++)
2102 UINT_32 equationIndex
= ADDR_INVALID_EQUATION_INDEX
;
2104 // Check if the input is supported
2105 if (IsEquationSupported(rsrcType
, swMode
, bppIdx
))
2107 ADDR_EQUATION equation
;
2108 ADDR_E_RETURNCODE retCode
;
2110 memset(&equation
, 0, sizeof(ADDR_EQUATION
));
2112 // Generate the equation
2113 if (IsBlock256b(swMode
) && IsTex2d(rsrcType
))
2115 retCode
= ComputeBlock256Equation(rsrcType
, swMode
, bppIdx
, &equation
);
2117 else if (IsThin(rsrcType
, swMode
))
2119 retCode
= ComputeThinEquation(rsrcType
, swMode
, bppIdx
, &equation
);
2123 retCode
= ComputeThickEquation(rsrcType
, swMode
, bppIdx
, &equation
);
2126 // Only fill the equation into the table if the return code is ADDR_OK,
2127 // otherwise if the return code is not ADDR_OK, it indicates this is not
2128 // a valid input, we do nothing but just fill invalid equation index
2129 // into the lookup table.
2130 if (retCode
== ADDR_OK
)
2132 equationIndex
= m_numEquations
;
2133 ADDR_ASSERT(equationIndex
< EquationTableSize
);
2135 m_equationTable
[equationIndex
] = equation
;
2141 ADDR_ASSERT_ALWAYS();
2145 // Fill the index into the lookup table, if the combination is not supported
2146 // fill the invalid equation index
2147 m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][bppIdx
] = equationIndex
;
2154 ************************************************************************************************************************
2155 * Gfx9Lib::HwlGetEquationIndex
2158 * Interface function stub of GetEquationIndex
2162 ************************************************************************************************************************
2164 UINT_32
Gfx9Lib::HwlGetEquationIndex(
2165 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
,
2166 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
2169 AddrResourceType rsrcType
= pIn
->resourceType
;
2170 AddrSwizzleMode swMode
= pIn
->swizzleMode
;
2171 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
2172 UINT_32 index
= ADDR_INVALID_EQUATION_INDEX
;
2174 if (IsEquationSupported(rsrcType
, swMode
, elementBytesLog2
))
2176 UINT_32 rsrcTypeIdx
= static_cast<UINT_32
>(rsrcType
) - 1;
2177 UINT_32 swModeIdx
= static_cast<UINT_32
>(swMode
);
2179 index
= m_equationLookupTable
[rsrcTypeIdx
][swModeIdx
][elementBytesLog2
];
2182 if (pOut
->pMipInfo
!= NULL
)
2184 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
2186 pOut
->pMipInfo
[i
].equationIndex
= index
;
2194 ************************************************************************************************************************
2195 * Gfx9Lib::HwlComputeBlock256Equation
2198 * Interface function stub of ComputeBlock256Equation
2202 ************************************************************************************************************************
2204 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeBlock256Equation(
2205 AddrResourceType rsrcType
,
2206 AddrSwizzleMode swMode
,
2207 UINT_32 elementBytesLog2
,
2208 ADDR_EQUATION
* pEquation
) const
2210 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2212 pEquation
->numBits
= 8;
2215 for (; i
< elementBytesLog2
; i
++)
2217 InitChannel(1, 0 , i
, &pEquation
->addr
[i
]);
2220 ADDR_CHANNEL_SETTING
* pixelBit
= &pEquation
->addr
[elementBytesLog2
];
2222 const UINT_32 maxBitsUsed
= 4;
2223 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2224 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2226 for (i
= 0; i
< maxBitsUsed
; i
++)
2228 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2229 InitChannel(1, 1, i
, &y
[i
]);
2232 if (IsStandardSwizzle(rsrcType
, swMode
))
2234 switch (elementBytesLog2
)
2277 ADDR_ASSERT_ALWAYS();
2278 ret
= ADDR_INVALIDPARAMS
;
2282 else if (IsDisplaySwizzle(rsrcType
, swMode
))
2284 switch (elementBytesLog2
)
2327 ADDR_ASSERT_ALWAYS();
2328 ret
= ADDR_INVALIDPARAMS
;
2332 else if (IsRotateSwizzle(swMode
))
2334 switch (elementBytesLog2
)
2371 ADDR_ASSERT_ALWAYS();
2373 ret
= ADDR_INVALIDPARAMS
;
2379 ADDR_ASSERT_ALWAYS();
2380 ret
= ADDR_INVALIDPARAMS
;
2386 Dim2d microBlockDim
= Block256_2d
[elementBytesLog2
];
2387 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation
->addr
, 8, 0)) ==
2388 (microBlockDim
.w
* (1 << elementBytesLog2
)));
2389 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation
->addr
, 8, 1)) == microBlockDim
.h
);
2396 ************************************************************************************************************************
2397 * Gfx9Lib::HwlComputeThinEquation
2400 * Interface function stub of ComputeThinEquation
2404 ************************************************************************************************************************
2406 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeThinEquation(
2407 AddrResourceType rsrcType
,
2408 AddrSwizzleMode swMode
,
2409 UINT_32 elementBytesLog2
,
2410 ADDR_EQUATION
* pEquation
) const
2412 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2414 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swMode
);
2416 UINT_32 maxXorBits
= blockSizeLog2
;
2417 if (IsNonPrtXor(swMode
))
2419 // For non-prt-xor, maybe need to initialize some more bits for xor
2420 // The highest xor bit used in equation will be max the following 3 items:
2421 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2422 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2425 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+ 2 * GetPipeXorBits(blockSizeLog2
));
2426 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+
2427 GetPipeXorBits(blockSizeLog2
) +
2428 2 * GetBankXorBits(blockSizeLog2
));
2431 const UINT_32 maxBitsUsed
= 14;
2432 ADDR_ASSERT((2 * maxBitsUsed
) >= maxXorBits
);
2433 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2434 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2436 const UINT_32 extraXorBits
= 16;
2437 ADDR_ASSERT(extraXorBits
>= maxXorBits
- blockSizeLog2
);
2438 ADDR_CHANNEL_SETTING xorExtra
[extraXorBits
] = {};
2440 for (UINT_32 i
= 0; i
< maxBitsUsed
; i
++)
2442 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2443 InitChannel(1, 1, i
, &y
[i
]);
2446 ADDR_CHANNEL_SETTING
* pixelBit
= pEquation
->addr
;
2448 for (UINT_32 i
= 0; i
< elementBytesLog2
; i
++)
2450 InitChannel(1, 0 , i
, &pixelBit
[i
]);
2455 UINT_32 lowBits
= 0;
2457 if (IsZOrderSwizzle(swMode
))
2459 if (elementBytesLog2
<= 3)
2461 for (UINT_32 i
= elementBytesLog2
; i
< 6; i
++)
2463 pixelBit
[i
] = (((i
- elementBytesLog2
) & 1) == 0) ? x
[xIdx
++] : y
[yIdx
++];
2470 ret
= ADDR_INVALIDPARAMS
;
2475 ret
= HwlComputeBlock256Equation(rsrcType
, swMode
, elementBytesLog2
, pEquation
);
2479 Dim2d microBlockDim
= Block256_2d
[elementBytesLog2
];
2480 xIdx
= Log2(microBlockDim
.w
);
2481 yIdx
= Log2(microBlockDim
.h
);
2488 for (UINT_32 i
= lowBits
; i
< blockSizeLog2
; i
++)
2490 pixelBit
[i
] = ((i
& 1) == 0) ? y
[yIdx
++] : x
[xIdx
++];
2493 for (UINT_32 i
= blockSizeLog2
; i
< maxXorBits
; i
++)
2495 xorExtra
[i
- blockSizeLog2
] = ((i
& 1) == 0) ? y
[yIdx
++] : x
[xIdx
++];
2501 UINT_32 pipeStart
= m_pipeInterleaveLog2
;
2502 UINT_32 pipeXorBits
= GetPipeXorBits(blockSizeLog2
);
2504 UINT_32 bankStart
= pipeStart
+ pipeXorBits
;
2505 UINT_32 bankXorBits
= GetBankXorBits(blockSizeLog2
);
2507 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2509 UINT_32 xor1BitPos
= pipeStart
+ 2 * pipeXorBits
- 1 - i
;
2510 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2511 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2513 InitChannel(&pEquation
->xor1
[pipeStart
+ i
], pXor1Src
);
2516 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2518 UINT_32 xor1BitPos
= bankStart
+ 2 * bankXorBits
- 1 - i
;
2519 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2520 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2522 InitChannel(&pEquation
->xor1
[bankStart
+ i
], pXor1Src
);
2525 if (IsPrt(swMode
) == FALSE
)
2527 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2529 InitChannel(1, 2, pipeXorBits
- i
- 1, &pEquation
->xor2
[pipeStart
+ i
]);
2532 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2534 InitChannel(1, 2, bankXorBits
- i
- 1 + pipeXorBits
, &pEquation
->xor2
[bankStart
+ i
]);
2539 pEquation
->numBits
= blockSizeLog2
;
2546 ************************************************************************************************************************
2547 * Gfx9Lib::HwlComputeThickEquation
2550 * Interface function stub of ComputeThickEquation
2554 ************************************************************************************************************************
2556 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeThickEquation(
2557 AddrResourceType rsrcType
,
2558 AddrSwizzleMode swMode
,
2559 UINT_32 elementBytesLog2
,
2560 ADDR_EQUATION
* pEquation
) const
2562 ADDR_E_RETURNCODE ret
= ADDR_OK
;
2564 ADDR_ASSERT(IsTex3d(rsrcType
));
2566 UINT_32 blockSizeLog2
= GetBlockSizeLog2(swMode
);
2568 UINT_32 maxXorBits
= blockSizeLog2
;
2569 if (IsNonPrtXor(swMode
))
2571 // For non-prt-xor, maybe need to initialize some more bits for xor
2572 // The highest xor bit used in equation will be max the following 3:
2573 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2574 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2577 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+ 3 * GetPipeXorBits(blockSizeLog2
));
2578 maxXorBits
= Max(maxXorBits
, m_pipeInterleaveLog2
+
2579 GetPipeXorBits(blockSizeLog2
) +
2580 3 * GetBankXorBits(blockSizeLog2
));
2583 for (UINT_32 i
= 0; i
< elementBytesLog2
; i
++)
2585 InitChannel(1, 0 , i
, &pEquation
->addr
[i
]);
2588 ADDR_CHANNEL_SETTING
* pixelBit
= &pEquation
->addr
[elementBytesLog2
];
2590 const UINT_32 maxBitsUsed
= 12;
2591 ADDR_ASSERT((3 * maxBitsUsed
) >= maxXorBits
);
2592 ADDR_CHANNEL_SETTING x
[maxBitsUsed
] = {};
2593 ADDR_CHANNEL_SETTING y
[maxBitsUsed
] = {};
2594 ADDR_CHANNEL_SETTING z
[maxBitsUsed
] = {};
2596 const UINT_32 extraXorBits
= 24;
2597 ADDR_ASSERT(extraXorBits
>= maxXorBits
- blockSizeLog2
);
2598 ADDR_CHANNEL_SETTING xorExtra
[extraXorBits
] = {};
2600 for (UINT_32 i
= 0; i
< maxBitsUsed
; i
++)
2602 InitChannel(1, 0, elementBytesLog2
+ i
, &x
[i
]);
2603 InitChannel(1, 1, i
, &y
[i
]);
2604 InitChannel(1, 2, i
, &z
[i
]);
2607 if (IsZOrderSwizzle(swMode
))
2609 switch (elementBytesLog2
)
2662 ADDR_ASSERT_ALWAYS();
2663 ret
= ADDR_INVALIDPARAMS
;
2667 else if (IsStandardSwizzle(rsrcType
, swMode
))
2669 switch (elementBytesLog2
)
2722 ADDR_ASSERT_ALWAYS();
2723 ret
= ADDR_INVALIDPARAMS
;
2729 ADDR_ASSERT_ALWAYS();
2730 ret
= ADDR_INVALIDPARAMS
;
2735 Dim3d microBlockDim
= Block1K_3d
[elementBytesLog2
];
2736 UINT_32 xIdx
= Log2(microBlockDim
.w
);
2737 UINT_32 yIdx
= Log2(microBlockDim
.h
);
2738 UINT_32 zIdx
= Log2(microBlockDim
.d
);
2740 pixelBit
= pEquation
->addr
;
2742 const UINT_32 lowBits
= 10;
2743 ADDR_ASSERT(pEquation
->addr
[lowBits
- 1].valid
== 1);
2744 ADDR_ASSERT(pEquation
->addr
[lowBits
].valid
== 0);
2746 for (UINT_32 i
= lowBits
; i
< blockSizeLog2
; i
++)
2750 pixelBit
[i
] = x
[xIdx
++];
2752 else if ((i
% 3) == 1)
2754 pixelBit
[i
] = z
[zIdx
++];
2758 pixelBit
[i
] = y
[yIdx
++];
2762 for (UINT_32 i
= blockSizeLog2
; i
< maxXorBits
; i
++)
2766 xorExtra
[i
- blockSizeLog2
] = x
[xIdx
++];
2768 else if ((i
% 3) == 1)
2770 xorExtra
[i
- blockSizeLog2
] = z
[zIdx
++];
2774 xorExtra
[i
- blockSizeLog2
] = y
[yIdx
++];
2781 UINT_32 pipeStart
= m_pipeInterleaveLog2
;
2782 UINT_32 pipeXorBits
= GetPipeXorBits(blockSizeLog2
);
2783 for (UINT_32 i
= 0; i
< pipeXorBits
; i
++)
2785 UINT_32 xor1BitPos
= pipeStart
+ (3 * pipeXorBits
) - 1 - (2 * i
);
2786 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2787 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2789 InitChannel(&pEquation
->xor1
[pipeStart
+ i
], pXor1Src
);
2791 UINT_32 xor2BitPos
= pipeStart
+ (3 * pipeXorBits
) - 2 - (2 * i
);
2792 ADDR_CHANNEL_SETTING
* pXor2Src
= (xor2BitPos
< blockSizeLog2
) ?
2793 &pEquation
->addr
[xor2BitPos
] : &xorExtra
[xor2BitPos
- blockSizeLog2
];
2795 InitChannel(&pEquation
->xor2
[pipeStart
+ i
], pXor2Src
);
2798 UINT_32 bankStart
= pipeStart
+ pipeXorBits
;
2799 UINT_32 bankXorBits
= GetBankXorBits(blockSizeLog2
);
2800 for (UINT_32 i
= 0; i
< bankXorBits
; i
++)
2802 UINT_32 xor1BitPos
= bankStart
+ (3 * bankXorBits
) - 1 - (2 * i
);
2803 ADDR_CHANNEL_SETTING
* pXor1Src
= (xor1BitPos
< blockSizeLog2
) ?
2804 &pEquation
->addr
[xor1BitPos
] : &xorExtra
[xor1BitPos
- blockSizeLog2
];
2806 InitChannel(&pEquation
->xor1
[bankStart
+ i
], pXor1Src
);
2808 UINT_32 xor2BitPos
= bankStart
+ (3 * bankXorBits
) - 2 - (2 * i
);
2809 ADDR_CHANNEL_SETTING
* pXor2Src
= (xor2BitPos
< blockSizeLog2
) ?
2810 &pEquation
->addr
[xor2BitPos
] : &xorExtra
[xor2BitPos
- blockSizeLog2
];
2812 InitChannel(&pEquation
->xor2
[bankStart
+ i
], pXor2Src
);
2816 pEquation
->numBits
= blockSizeLog2
;
2823 ************************************************************************************************************************
2824 * Gfx9Lib::IsValidDisplaySwizzleMode
2827 * Check if a swizzle mode is supported by display engine
2830 * TRUE is swizzle mode is supported by display engine
2831 ************************************************************************************************************************
2833 BOOL_32
Gfx9Lib::IsValidDisplaySwizzleMode(
2834 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
2836 BOOL_32 support
= FALSE
;
2838 const AddrResourceType resourceType
= pIn
->resourceType
;
2840 const AddrSwizzleMode swizzleMode
= pIn
->swizzleMode
;
2842 if (m_settings
.isDce12
)
2844 switch (swizzleMode
)
2846 case ADDR_SW_256B_D
:
2847 case ADDR_SW_256B_R
:
2848 support
= (pIn
->bpp
== 32);
2851 case ADDR_SW_LINEAR
:
2854 case ADDR_SW_64KB_D
:
2855 case ADDR_SW_64KB_R
:
2858 case ADDR_SW_4KB_D_X
:
2859 case ADDR_SW_4KB_R_X
:
2860 case ADDR_SW_64KB_D_X
:
2861 case ADDR_SW_64KB_R_X
:
2862 case ADDR_SW_VAR_D_X
:
2863 case ADDR_SW_VAR_R_X
:
2864 support
= (pIn
->bpp
<= 64);
2871 else if (m_settings
.isDcn1
)
2873 switch (swizzleMode
)
2876 case ADDR_SW_64KB_D
:
2878 case ADDR_SW_64KB_D_T
:
2879 case ADDR_SW_4KB_D_X
:
2880 case ADDR_SW_64KB_D_X
:
2881 case ADDR_SW_VAR_D_X
:
2882 support
= (pIn
->bpp
== 64);
2885 case ADDR_SW_LINEAR
:
2887 case ADDR_SW_64KB_S
:
2889 case ADDR_SW_64KB_S_T
:
2890 case ADDR_SW_4KB_S_X
:
2891 case ADDR_SW_64KB_S_X
:
2892 case ADDR_SW_VAR_S_X
:
2893 support
= (pIn
->bpp
<= 64);
2902 ADDR_NOT_IMPLEMENTED();
2909 ************************************************************************************************************************
2910 * Gfx9Lib::HwlComputePipeBankXor
2913 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2917 ************************************************************************************************************************
2919 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputePipeBankXor(
2920 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT
* pIn
,
2921 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
* pOut
) const
2923 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
2924 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
2925 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
2927 UINT_32 pipeXor
= 0;
2928 UINT_32 bankXor
= 0;
2930 const UINT_32 bankMask
= (1 << bankBits
) - 1;
2931 const UINT_32 index
= pIn
->surfIndex
& bankMask
;
2933 const UINT_32 bpp
= pIn
->flags
.fmask
?
2934 GetFmaskBpp(pIn
->numSamples
, pIn
->numFrags
) : GetElemLib()->GetBitsPerPixel(pIn
->format
);
2937 static const UINT_32 BankXorSmallBpp
[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
2938 static const UINT_32 BankXorLargeBpp
[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
2940 bankXor
= (bpp
<= 32) ? BankXorSmallBpp
[index
] : BankXorLargeBpp
[index
];
2942 else if (bankBits
> 0)
2944 UINT_32 bankIncrease
= (1 << (bankBits
- 1)) - 1;
2945 bankIncrease
= (bankIncrease
== 0) ? 1 : bankIncrease
;
2946 bankXor
= (index
* bankIncrease
) & bankMask
;
2949 pOut
->pipeBankXor
= (bankXor
<< pipeBits
) | pipeXor
;
2955 ************************************************************************************************************************
2956 * Gfx9Lib::HwlComputeSlicePipeBankXor
2959 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2963 ************************************************************************************************************************
2965 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSlicePipeBankXor(
2966 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT
* pIn
,
2967 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
* pOut
) const
2969 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
2970 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
2971 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
2973 UINT_32 pipeXor
= ReverseBitVector(pIn
->slice
, pipeBits
);
2974 UINT_32 bankXor
= ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
);
2976 pOut
->pipeBankXor
= pIn
->basePipeBankXor
^ (pipeXor
| (bankXor
<< pipeBits
));
2982 ************************************************************************************************************************
2983 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2986 * Compute sub resource offset to support swizzle pattern
2990 ************************************************************************************************************************
2992 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2993 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
* pIn
,
2994 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
* pOut
) const
2996 ADDR_ASSERT(IsThin(pIn
->resourceType
, pIn
->swizzleMode
));
2998 UINT_32 macroBlockBits
= GetBlockSizeLog2(pIn
->swizzleMode
);
2999 UINT_32 pipeBits
= GetPipeXorBits(macroBlockBits
);
3000 UINT_32 bankBits
= GetBankXorBits(macroBlockBits
);
3001 UINT_32 pipeXor
= ReverseBitVector(pIn
->slice
, pipeBits
);
3002 UINT_32 bankXor
= ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
);
3003 UINT_32 pipeBankXor
= ((pipeXor
| (bankXor
<< pipeBits
)) ^ (pIn
->pipeBankXor
)) << m_pipeInterleaveLog2
;
3005 pOut
->offset
= pIn
->slice
* pIn
->sliceSize
+
3006 pIn
->macroBlockOffset
+
3007 (pIn
->mipTailOffset
^ pipeBankXor
) -
3008 static_cast<UINT_64
>(pipeBankXor
);
3013 ************************************************************************************************************************
3014 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3017 * Compute surface info sanity check
3021 ************************************************************************************************************************
3023 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3024 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
) const
3026 BOOL_32 invalid
= FALSE
;
3028 if ((pIn
->bpp
> 128) || (pIn
->width
== 0) || (pIn
->numFrags
> 8) || (pIn
->numSamples
> 16))
3032 else if ((pIn
->swizzleMode
>= ADDR_SW_MAX_TYPE
) ||
3033 (pIn
->resourceType
>= ADDR_RSRC_MAX_TYPE
))
3038 BOOL_32 mipmap
= (pIn
->numMipLevels
> 1);
3039 BOOL_32 msaa
= (pIn
->numFrags
> 1);
3041 ADDR2_SURFACE_FLAGS flags
= pIn
->flags
;
3042 BOOL_32 zbuffer
= (flags
.depth
|| flags
.stencil
);
3043 BOOL_32 color
= flags
.color
;
3044 BOOL_32 display
= flags
.display
|| flags
.rotated
;
3046 AddrResourceType rsrcType
= pIn
->resourceType
;
3047 BOOL_32 tex3d
= IsTex3d(rsrcType
);
3048 AddrSwizzleMode swizzle
= pIn
->swizzleMode
;
3049 BOOL_32 linear
= IsLinear(swizzle
);
3050 BOOL_32 blk256B
= IsBlock256b(swizzle
);
3051 BOOL_32 blkVar
= IsBlockVariable(swizzle
);
3052 BOOL_32 isNonPrtXor
= IsNonPrtXor(swizzle
);
3053 BOOL_32 prt
= flags
.prt
;
3054 BOOL_32 stereo
= flags
.qbStereo
;
3056 if (invalid
== FALSE
)
3058 if ((pIn
->numFrags
> 1) &&
3059 (GetBlockSize(swizzle
) < (m_pipeInterleaveBytes
* pIn
->numFrags
)))
3061 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3066 if (invalid
== FALSE
)
3070 case ADDR_RSRC_TEX_1D
:
3071 invalid
= msaa
|| zbuffer
|| display
|| (linear
== FALSE
) || stereo
;
3073 case ADDR_RSRC_TEX_2D
:
3074 invalid
= (msaa
&& mipmap
) || (stereo
&& msaa
) || (stereo
&& mipmap
);
3076 case ADDR_RSRC_TEX_3D
:
3077 invalid
= msaa
|| zbuffer
|| display
|| stereo
;
3085 if (invalid
== FALSE
)
3089 invalid
= (IsValidDisplaySwizzleMode(pIn
) == FALSE
);
3093 if (invalid
== FALSE
)
3097 invalid
= ((ADDR_RSRC_TEX_1D
!= rsrcType
) && prt
) ||
3098 zbuffer
|| msaa
|| (pIn
->bpp
== 0) || ((pIn
->bpp
% 8) != 0);
3102 if (blk256B
|| blkVar
|| isNonPrtXor
)
3107 invalid
= invalid
|| zbuffer
|| tex3d
|| mipmap
|| msaa
;
3111 if (invalid
== FALSE
)
3113 if (IsZOrderSwizzle(swizzle
))
3115 invalid
= color
&& msaa
;
3117 else if (IsStandardSwizzle(rsrcType
, swizzle
))
3121 else if (IsDisplaySwizzle(rsrcType
, swizzle
))
3125 else if (IsRotateSwizzle(swizzle
))
3127 invalid
= zbuffer
|| (pIn
->bpp
> 64) || tex3d
;
3131 ADDR_ASSERT(!"invalid swizzle mode");
3138 ADDR_ASSERT(invalid
== FALSE
);
3140 return invalid
? ADDR_INVALIDPARAMS
: ADDR_OK
;
3144 ************************************************************************************************************************
3145 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3148 * Internal function to get suggested surface information for cliet to use
3152 ************************************************************************************************************************
3154 ADDR_E_RETURNCODE
Gfx9Lib::HwlGetPreferredSurfaceSetting(
3155 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
* pIn
,
3156 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
* pOut
) const
3158 // Macro define resource block type
3161 AddrBlockMicro
= 0, // Resource uses 256B block
3162 AddrBlock4KB
= 1, // Resource uses 4KB block
3163 AddrBlock64KB
= 2, // Resource uses 64KB block
3164 AddrBlockVar
= 3, // Resource uses var blcok
3165 AddrBlockLinear
= 4, // Resource uses linear swizzle mode
3167 AddrBlockMaxTiledType
= AddrBlock64KB
+ 1,
3172 AddrBlockSetMicro
= 1 << AddrBlockMicro
,
3173 AddrBlockSetMacro4KB
= 1 << AddrBlock4KB
,
3174 AddrBlockSetMacro64KB
= 1 << AddrBlock64KB
,
3175 AddrBlockSetVar
= 1 << AddrBlockVar
,
3176 AddrBlockSetLinear
= 1 << AddrBlockLinear
,
3178 AddrBlockSetMacro
= AddrBlockSetMacro4KB
| AddrBlockSetMacro64KB
,
3183 AddrSwSetZ
= 1 << ADDR_SW_Z
,
3184 AddrSwSetS
= 1 << ADDR_SW_S
,
3185 AddrSwSetD
= 1 << ADDR_SW_D
,
3186 AddrSwSetR
= 1 << ADDR_SW_R
,
3188 AddrSwSetAll
= AddrSwSetZ
| AddrSwSetS
| AddrSwSetD
| AddrSwSetR
,
3191 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
3192 ElemLib
* pElemLib
= GetElemLib();
3194 // Set format to INVALID will skip this conversion
3195 UINT_32 expandX
= 1;
3196 UINT_32 expandY
= 1;
3197 UINT_32 bpp
= pIn
->bpp
;
3198 UINT_32 width
= pIn
->width
;
3199 UINT_32 height
= pIn
->height
;
3201 if (pIn
->format
!= ADDR_FMT_INVALID
)
3203 // Don't care for this case
3204 ElemMode elemMode
= ADDR_UNCOMPRESSED
;
3206 // Get compression/expansion factors and element mode which indicates compression/expansion
3207 bpp
= pElemLib
->GetBitsPerPixel(pIn
->format
,
3212 UINT_32 basePitch
= 0;
3213 GetElemLib()->AdjustSurfaceInfo(elemMode
,
3222 UINT_32 numSamples
= Max(pIn
->numSamples
, 1u);
3223 UINT_32 numFrags
= (pIn
->numFrags
== 0) ? numSamples
: pIn
->numFrags
;
3224 UINT_32 slice
= Max(pIn
->numSlices
, 1u);
3225 UINT_32 numMipLevels
= Max(pIn
->numMipLevels
, 1u);
3226 UINT_32 minSizeAlign
= NextPow2(pIn
->minSizeAlign
);
3228 if (pIn
->flags
.fmask
)
3230 bpp
= GetFmaskBpp(numSamples
, numFrags
);
3233 pOut
->resourceType
= ADDR_RSRC_TEX_2D
;
3237 // The output may get changed for volume(3D) texture resource in future
3238 pOut
->resourceType
= pIn
->resourceType
;
3243 ADDR_ASSERT_ALWAYS();
3245 returnCode
= ADDR_INVALIDPARAMS
;
3247 else if (IsTex1d(pOut
->resourceType
))
3249 pOut
->swizzleMode
= ADDR_SW_LINEAR
;
3250 pOut
->validBlockSet
.value
= AddrBlockSetLinear
;
3251 pOut
->canXor
= FALSE
;
3255 ADDR2_BLOCK_SET blockSet
;
3258 ADDR2_SWTYPE_SET addrPreferredSwSet
, addrValidSwSet
, clientPreferredSwSet
;
3259 addrPreferredSwSet
.value
= AddrSwSetS
;
3260 addrValidSwSet
= addrPreferredSwSet
;
3261 clientPreferredSwSet
= pIn
->preferredSwSet
;
3263 if (clientPreferredSwSet
.value
== 0)
3265 clientPreferredSwSet
.value
= AddrSwSetAll
;
3268 // prt Xor and non-xor will have less height align requirement for stereo surface
3269 BOOL_32 prtXor
= (pIn
->flags
.prt
|| pIn
->flags
.qbStereo
) && (pIn
->noXor
== FALSE
);
3270 BOOL_32 displayResource
= FALSE
;
3272 pOut
->canXor
= (pIn
->flags
.prt
== FALSE
) && (pIn
->noXor
== FALSE
);
3274 // Filter out improper swType and blockSet by HW restriction
3275 if (pIn
->flags
.fmask
|| pIn
->flags
.depth
|| pIn
->flags
.stencil
)
3277 ADDR_ASSERT(IsTex2d(pOut
->resourceType
));
3278 blockSet
.value
= AddrBlockSetMacro
;
3279 addrPreferredSwSet
.value
= AddrSwSetZ
;
3280 addrValidSwSet
.value
= AddrSwSetZ
;
3282 if (pIn
->flags
.depth
&& pIn
->flags
.texture
)
3284 if (((bpp
== 16) && (numFrags
>= 4)) ||
3285 ((bpp
== 32) && (numFrags
>= 2)))
3287 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3288 // equation from wrong address within memory range a tile covered and use the
3289 // garbage data for compressed Z reading which finally leads to corruption.
3290 pOut
->canXor
= FALSE
;
3295 else if (ElemLib::IsBlockCompressed(pIn
->format
))
3297 // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes.
3298 // Not sure under what circumstances "_D" would be appropriate as these formats
3299 // are not displayable.
3300 blockSet
.value
= AddrBlockSetMacro
;
3302 // This isn't to be used as texture and caller doesn't allow macro tiled.
3303 if ((pIn
->flags
.texture
== FALSE
) &&
3304 (pIn
->forbiddenBlock
.macro4KB
&& pIn
->forbiddenBlock
.macro64KB
))
3306 blockSet
.value
|= AddrBlockSetLinear
;
3309 addrPreferredSwSet
.value
= AddrSwSetD
;
3310 addrValidSwSet
.value
= AddrSwSetS
| AddrSwSetD
;
3312 else if (ElemLib::IsMacroPixelPacked(pIn
->format
))
3314 // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes.
3315 // Its notclear under what circumstances the D or R modes would be appropriate
3316 // since these formats are not displayable.
3317 blockSet
.value
= AddrBlockSetLinear
| AddrBlockSetMacro
;
3319 addrPreferredSwSet
.value
= AddrSwSetS
;
3320 addrValidSwSet
.value
= AddrSwSetS
| AddrSwSetD
| AddrSwSetR
;
3322 else if (IsTex3d(pOut
->resourceType
))
3324 blockSet
.value
= AddrBlockSetLinear
| AddrBlockSetMacro
;
3328 // PRT cannot use SW_D which gives an unexpected block dimension
3329 addrPreferredSwSet
.value
= AddrSwSetZ
;
3330 addrValidSwSet
.value
= AddrSwSetZ
| AddrSwSetS
;
3332 else if ((numMipLevels
> 1) && (slice
>= width
) && (slice
>= height
))
3334 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3335 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3336 addrPreferredSwSet
.value
= AddrSwSetZ
;
3337 addrValidSwSet
.value
= AddrSwSetZ
| AddrSwSetS
;
3339 else if (pIn
->flags
.color
)
3341 addrPreferredSwSet
.value
= AddrSwSetD
;
3342 addrValidSwSet
.value
= AddrSwSetZ
| AddrSwSetS
| AddrSwSetD
;
3346 addrPreferredSwSet
.value
= AddrSwSetZ
;
3347 addrValidSwSet
.value
= AddrSwSetZ
| AddrSwSetD
;
3350 addrValidSwSet
.value
|= AddrSwSetS
;
3356 addrPreferredSwSet
.value
= ((pIn
->flags
.display
== TRUE
) ||
3357 (pIn
->flags
.overlay
== TRUE
) ||
3358 (pIn
->bpp
== 128)) ? AddrSwSetD
: AddrSwSetS
;
3360 addrValidSwSet
.value
= AddrSwSetS
| AddrSwSetD
| AddrSwSetR
;
3362 if (numMipLevels
> 1)
3364 ADDR_ASSERT(numFrags
== 1);
3365 blockSet
.value
= AddrBlockSetLinear
| AddrBlockSetMacro
;
3367 else if ((numFrags
> 1) || (numSamples
> 1))
3369 ADDR_ASSERT(IsTex2d(pOut
->resourceType
));
3370 blockSet
.value
= AddrBlockSetMacro
;
3374 ADDR_ASSERT(IsTex2d(pOut
->resourceType
));
3375 blockSet
.value
= AddrBlockSetLinear
| AddrBlockSetMicro
| AddrBlockSetMacro
;
3377 displayResource
= pIn
->flags
.rotated
|| pIn
->flags
.display
;
3379 if (displayResource
)
3381 addrPreferredSwSet
.value
= pIn
->flags
.rotated
? AddrSwSetR
: AddrSwSetD
;
3387 else if (m_settings
.isDce12
)
3391 blockSet
.micro
= FALSE
;
3394 // DCE12 does not support display surface to be _T swizzle mode
3397 addrValidSwSet
.value
= AddrSwSetD
| AddrSwSetR
;
3399 else if (m_settings
.isDcn1
)
3401 // _R is not supported by Dcn1
3404 addrPreferredSwSet
.value
= AddrSwSetD
;
3405 addrValidSwSet
.value
= AddrSwSetD
;
3409 addrPreferredSwSet
.value
= AddrSwSetS
;
3410 addrValidSwSet
.value
= AddrSwSetS
| AddrSwSetD
;
3413 blockSet
.micro
= FALSE
;
3417 ADDR_NOT_IMPLEMENTED();
3418 returnCode
= ADDR_NOTSUPPORTED
;
3424 ADDR_ASSERT((addrValidSwSet
.value
& addrPreferredSwSet
.value
) == addrPreferredSwSet
.value
);
3426 pOut
->clientPreferredSwSet
= clientPreferredSwSet
;
3428 // Clamp client preferred set to valid set
3429 clientPreferredSwSet
.value
&= addrValidSwSet
.value
;
3431 pOut
->validSwTypeSet
= addrValidSwSet
;
3433 if (clientPreferredSwSet
.value
== 0)
3435 // Client asks for an invalid swizzle type...
3436 ADDR_ASSERT_ALWAYS();
3437 returnCode
= ADDR_INVALIDPARAMS
;
3441 if (IsPow2(clientPreferredSwSet
.value
))
3443 // Only one swizzle type left, use it directly
3444 addrPreferredSwSet
.value
= clientPreferredSwSet
.value
;
3446 else if ((clientPreferredSwSet
.value
& addrPreferredSwSet
.value
) == 0)
3448 // Client wants 2 or more a valid swizzle type but none of them is addrlib preferred
3449 if (clientPreferredSwSet
.sw_D
)
3451 addrPreferredSwSet
.value
= AddrSwSetD
;
3453 else if (clientPreferredSwSet
.sw_Z
)
3455 addrPreferredSwSet
.value
= AddrSwSetZ
;
3457 else if (clientPreferredSwSet
.sw_R
)
3459 addrPreferredSwSet
.value
= AddrSwSetR
;
3463 ADDR_ASSERT(clientPreferredSwSet
.sw_S
);
3464 addrPreferredSwSet
.value
= AddrSwSetS
;
3468 if ((numFrags
> 1) &&
3469 (GetBlockSize(ADDR_SW_4KB
) < (m_pipeInterleaveBytes
* numFrags
)))
3471 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3472 blockSet
.macro4KB
= FALSE
;
3477 blockSet
.value
&= AddrBlockSetMacro64KB
;
3480 // Apply customized forbidden setting
3481 blockSet
.value
&= ~pIn
->forbiddenBlock
.value
;
3483 if (pIn
->maxAlign
> 0)
3485 if (pIn
->maxAlign
< GetBlockSize(ADDR_SW_64KB
))
3487 blockSet
.macro64KB
= FALSE
;
3490 if (pIn
->maxAlign
< GetBlockSize(ADDR_SW_4KB
))
3492 blockSet
.macro4KB
= FALSE
;
3495 if (pIn
->maxAlign
< GetBlockSize(ADDR_SW_256B
))
3497 blockSet
.micro
= FALSE
;
3501 Dim3d blkAlign
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}};
3502 Dim3d paddedDim
[AddrBlockMaxTiledType
] = {{0}, {0}, {0}};
3503 UINT_64 padSize
[AddrBlockMaxTiledType
] = {0};
3507 returnCode
= ComputeBlockDimensionForSurf(&blkAlign
[AddrBlockMicro
].w
,
3508 &blkAlign
[AddrBlockMicro
].h
,
3509 &blkAlign
[AddrBlockMicro
].d
,
3515 if (returnCode
== ADDR_OK
)
3517 if (displayResource
)
3519 blkAlign
[AddrBlockMicro
].w
= PowTwoAlign(blkAlign
[AddrBlockMicro
].w
, 32);
3521 else if ((blkAlign
[AddrBlockMicro
].w
>= width
) && (blkAlign
[AddrBlockMicro
].h
>= height
) &&
3522 (minSizeAlign
<= GetBlockSize(ADDR_SW_256B
)))
3524 // If one 256B block can contain the surface, don't bother bigger block type
3525 blockSet
.macro4KB
= FALSE
;
3526 blockSet
.macro64KB
= FALSE
;
3527 blockSet
.var
= FALSE
;
3530 padSize
[AddrBlockMicro
] = ComputePadSize(&blkAlign
[AddrBlockMicro
], width
, height
,
3531 slice
, &paddedDim
[AddrBlockMicro
]);
3535 if ((returnCode
== ADDR_OK
) && blockSet
.macro4KB
)
3537 returnCode
= ComputeBlockDimensionForSurf(&blkAlign
[AddrBlock4KB
].w
,
3538 &blkAlign
[AddrBlock4KB
].h
,
3539 &blkAlign
[AddrBlock4KB
].d
,
3545 if (returnCode
== ADDR_OK
)
3547 if (displayResource
)
3549 blkAlign
[AddrBlock4KB
].w
= PowTwoAlign(blkAlign
[AddrBlock4KB
].w
, 32);
3552 padSize
[AddrBlock4KB
] = ComputePadSize(&blkAlign
[AddrBlock4KB
], width
, height
,
3553 slice
, &paddedDim
[AddrBlock4KB
]);
3555 ADDR_ASSERT(padSize
[AddrBlock4KB
] >= padSize
[AddrBlockMicro
]);
3559 if ((returnCode
== ADDR_OK
) && blockSet
.macro64KB
)
3561 returnCode
= ComputeBlockDimensionForSurf(&blkAlign
[AddrBlock64KB
].w
,
3562 &blkAlign
[AddrBlock64KB
].h
,
3563 &blkAlign
[AddrBlock64KB
].d
,
3569 if (returnCode
== ADDR_OK
)
3571 if (displayResource
)
3573 blkAlign
[AddrBlock64KB
].w
= PowTwoAlign(blkAlign
[AddrBlock64KB
].w
, 32);
3576 padSize
[AddrBlock64KB
] = ComputePadSize(&blkAlign
[AddrBlock64KB
], width
, height
,
3577 slice
, &paddedDim
[AddrBlock64KB
]);
3579 ADDR_ASSERT(padSize
[AddrBlock64KB
] >= padSize
[AddrBlock4KB
]);
3580 ADDR_ASSERT(padSize
[AddrBlock64KB
] >= padSize
[AddrBlockMicro
]);
3584 if (returnCode
== ADDR_OK
)
3586 UINT_64 minSizeAlignInElement
= Max(minSizeAlign
/ (bpp
>> 3), 1u);
3588 for (UINT_32 i
= AddrBlockMicro
; i
< AddrBlockMaxTiledType
; i
++)
3590 padSize
[i
] = PowTwoAlign(padSize
[i
], minSizeAlignInElement
);
3593 // Use minimum block type which meets all conditions above if flag minimizeAlign was set
3594 if (pIn
->flags
.minimizeAlign
)
3596 // If padded size of 64KB block is larger than padded size of 256B block or 4KB
3597 // block, filter out 64KB block from candidate list
3598 if (blockSet
.macro64KB
&&
3599 ((blockSet
.micro
&& (padSize
[AddrBlockMicro
] < padSize
[AddrBlock64KB
])) ||
3600 (blockSet
.macro4KB
&& (padSize
[AddrBlock4KB
] < padSize
[AddrBlock64KB
]))))
3602 blockSet
.macro64KB
= FALSE
;
3605 // If padded size of 4KB block is larger than padded size of 256B block,
3606 // filter out 4KB block from candidate list
3607 if (blockSet
.macro4KB
&&
3609 (padSize
[AddrBlockMicro
] < padSize
[AddrBlock4KB
]))
3611 blockSet
.macro4KB
= FALSE
;
3614 // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
3615 else if (pIn
->flags
.opt4space
)
3617 UINT_64 threshold
= blockSet
.micro
? padSize
[AddrBlockMicro
] :
3618 (blockSet
.macro4KB
? padSize
[AddrBlock4KB
] : padSize
[AddrBlock64KB
]);
3620 threshold
+= threshold
>> 1;
3622 if (blockSet
.macro64KB
&& (padSize
[AddrBlock64KB
] > threshold
))
3624 blockSet
.macro64KB
= FALSE
;
3627 if (blockSet
.macro4KB
&& (padSize
[AddrBlock4KB
] > threshold
))
3629 blockSet
.macro4KB
= FALSE
;
3634 if (blockSet
.macro64KB
&&
3635 (padSize
[AddrBlock64KB
] >= static_cast<UINT_64
>(width
) * height
* slice
* 2) &&
3636 ((blockSet
.value
& ~AddrBlockSetMacro64KB
) != 0))
3638 // If 64KB block waste more than half memory on padding, filter it out from
3639 // candidate list when it is not the only choice left
3640 blockSet
.macro64KB
= FALSE
;
3644 if (blockSet
.value
== 0)
3646 // Bad things happen, client will not get any useful information from AddrLib.
3647 // Maybe we should fill in some output earlier instead of outputing nothing?
3648 ADDR_ASSERT_ALWAYS();
3649 returnCode
= ADDR_INVALIDPARAMS
;
3653 pOut
->validBlockSet
= blockSet
;
3654 pOut
->canXor
= pOut
->canXor
&&
3655 (blockSet
.macro4KB
|| blockSet
.macro64KB
|| blockSet
.var
);
3657 if (blockSet
.macro64KB
|| blockSet
.macro4KB
)
3659 if (addrPreferredSwSet
.value
== AddrSwSetZ
)
3661 pOut
->swizzleMode
= blockSet
.macro64KB
? ADDR_SW_64KB_Z
: ADDR_SW_4KB_Z
;
3663 else if (addrPreferredSwSet
.value
== AddrSwSetS
)
3665 pOut
->swizzleMode
= blockSet
.macro64KB
? ADDR_SW_64KB_S
: ADDR_SW_4KB_S
;
3667 else if (addrPreferredSwSet
.value
== AddrSwSetD
)
3669 pOut
->swizzleMode
= blockSet
.macro64KB
? ADDR_SW_64KB_D
: ADDR_SW_4KB_D
;
3673 ADDR_ASSERT(addrPreferredSwSet
.value
== AddrSwSetR
);
3674 pOut
->swizzleMode
= blockSet
.macro64KB
? ADDR_SW_64KB_R
: ADDR_SW_4KB_R
;
3677 if (prtXor
&& blockSet
.macro64KB
)
3679 // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
3680 const UINT_32 prtGap
= ADDR_SW_64KB_Z_T
- ADDR_SW_64KB_Z
;
3681 pOut
->swizzleMode
= static_cast<AddrSwizzleMode
>(pOut
->swizzleMode
+ prtGap
);
3683 else if (pOut
->canXor
)
3685 // Client wants XOR and this is allowed, return XOR version swizzle mode
3686 const UINT_32 xorGap
= ADDR_SW_4KB_Z_X
- ADDR_SW_4KB_Z
;
3687 pOut
->swizzleMode
= static_cast<AddrSwizzleMode
>(pOut
->swizzleMode
+ xorGap
);
3690 else if (blockSet
.micro
)
3692 if (addrPreferredSwSet
.value
== AddrSwSetS
)
3694 pOut
->swizzleMode
= ADDR_SW_256B_S
;
3696 else if (addrPreferredSwSet
.value
== AddrSwSetD
)
3698 pOut
->swizzleMode
= ADDR_SW_256B_D
;
3702 ADDR_ASSERT(addrPreferredSwSet
.value
== AddrSwSetR
);
3703 pOut
->swizzleMode
= ADDR_SW_256B_R
;
3706 else if (blockSet
.linear
)
3708 // Fall into this branch doesn't mean linear is suitable, only no other choices!
3709 pOut
->swizzleMode
= ADDR_SW_LINEAR
;
3713 ADDR_ASSERT(blockSet
.var
);
3715 // Designer consider VAR swizzle mode is usless for most cases
3716 ADDR_UNHANDLED_CASE();
3718 returnCode
= ADDR_NOTSUPPORTED
;
3722 // Post sanity check, at least AddrLib should accept the output generated by its own
3723 if (pOut
->swizzleMode
!= ADDR_SW_LINEAR
)
3725 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {0};
3726 localIn
.flags
= pIn
->flags
;
3727 localIn
.swizzleMode
= pOut
->swizzleMode
;
3728 localIn
.resourceType
= pOut
->resourceType
;
3729 localIn
.format
= pIn
->format
;
3731 localIn
.width
= width
;
3732 localIn
.height
= height
;
3733 localIn
.numSlices
= slice
;
3734 localIn
.numMipLevels
= numMipLevels
;
3735 localIn
.numSamples
= numSamples
;
3736 localIn
.numFrags
= numFrags
;
3738 HwlComputeSurfaceInfoSanityCheck(&localIn
);
3751 ************************************************************************************************************************
3752 * Gfx9Lib::ComputeStereoInfo
3755 * Compute height alignment and right eye pipeBankXor for stereo surface
3760 ************************************************************************************************************************
3762 ADDR_E_RETURNCODE
Gfx9Lib::ComputeStereoInfo(
3763 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
,
3764 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
,
3765 UINT_32
* pHeightAlign
3768 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
3770 UINT_32 eqIndex
= HwlGetEquationIndex(pIn
, pOut
);
3772 if (eqIndex
< m_numEquations
)
3774 if (IsXor(pIn
->swizzleMode
))
3776 const UINT_32 blkSizeLog2
= GetBlockSizeLog2(pIn
->swizzleMode
);
3777 const UINT_32 numPipeBits
= GetPipeXorBits(blkSizeLog2
);
3778 const UINT_32 numBankBits
= GetBankXorBits(blkSizeLog2
);
3779 const UINT_32 bppLog2
= Log2(pIn
->bpp
>> 3);
3780 const UINT_32 maxYCoordBlock256
= Log2(Block256_2d
[bppLog2
].h
) - 1;
3781 const ADDR_EQUATION
*pEqToCheck
= &m_equationTable
[eqIndex
];
3783 ADDR_ASSERT(maxYCoordBlock256
==
3784 GetMaxValidChannelIndex(&pEqToCheck
->addr
[0], GetBlockSizeLog2(ADDR_SW_256B
), 1));
3786 const UINT_32 maxYCoordInBaseEquation
=
3787 (blkSizeLog2
- GetBlockSizeLog2(ADDR_SW_256B
)) / 2 + maxYCoordBlock256
;
3789 ADDR_ASSERT(maxYCoordInBaseEquation
==
3790 GetMaxValidChannelIndex(&pEqToCheck
->addr
[0], blkSizeLog2
, 1));
3792 const UINT_32 maxYCoordInPipeXor
= (numPipeBits
== 0) ? 0 : maxYCoordBlock256
+ numPipeBits
;
3794 ADDR_ASSERT(maxYCoordInPipeXor
==
3795 GetMaxValidChannelIndex(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
], numPipeBits
, 1));
3797 const UINT_32 maxYCoordInBankXor
= (numBankBits
== 0) ?
3798 0 : maxYCoordBlock256
+ (numPipeBits
+ 1) / 2 + numBankBits
;
3800 ADDR_ASSERT(maxYCoordInBankXor
==
3801 GetMaxValidChannelIndex(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
+ numPipeBits
], numBankBits
, 1));
3803 const UINT_32 maxYCoordInPipeBankXor
= Max(maxYCoordInPipeXor
, maxYCoordInBankXor
);
3805 if (maxYCoordInPipeBankXor
> maxYCoordInBaseEquation
)
3807 *pHeightAlign
= 1u << maxYCoordInPipeBankXor
;
3809 if (pOut
->pStereoInfo
!= NULL
)
3811 pOut
->pStereoInfo
->rightSwizzle
= 0;
3813 if ((PowTwoAlign(pIn
->height
, *pHeightAlign
) % (*pHeightAlign
* 2)) != 0)
3815 if (maxYCoordInPipeXor
== maxYCoordInPipeBankXor
)
3817 pOut
->pStereoInfo
->rightSwizzle
|= (1u << 1);
3820 if (maxYCoordInBankXor
== maxYCoordInPipeBankXor
)
3822 pOut
->pStereoInfo
->rightSwizzle
|=
3823 1u << ((numPipeBits
% 2) ? numPipeBits
: numPipeBits
+ 1);
3826 ADDR_ASSERT(pOut
->pStereoInfo
->rightSwizzle
==
3827 GetCoordActiveMask(&pEqToCheck
->xor1
[m_pipeInterleaveLog2
],
3828 numPipeBits
+ numBankBits
, 1, maxYCoordInPipeBankXor
));
3836 ADDR_ASSERT_ALWAYS();
3837 returnCode
= ADDR_ERROR
;
3844 ************************************************************************************************************************
3845 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3848 * Internal function to calculate alignment for tiled surface
3852 ************************************************************************************************************************
3854 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceInfoTiled(
3855 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
3856 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
3859 ADDR_E_RETURNCODE returnCode
= ComputeBlockDimensionForSurf(&pOut
->blockWidth
,
3867 if (returnCode
== ADDR_OK
)
3869 UINT_32 pitchAlignInElement
= pOut
->blockWidth
;
3871 if ((IsTex2d(pIn
->resourceType
) == TRUE
) &&
3872 (pIn
->flags
.display
|| pIn
->flags
.rotated
) &&
3873 (pIn
->numMipLevels
<= 1) &&
3874 (pIn
->numSamples
<= 1) &&
3875 (pIn
->numFrags
<= 1))
3877 // Display engine needs pitch align to be at least 32 pixels.
3878 pitchAlignInElement
= PowTwoAlign(pitchAlignInElement
, 32);
3881 pOut
->pitch
= PowTwoAlign(pIn
->width
, pitchAlignInElement
);
3883 if ((pIn
->numMipLevels
<= 1) && (pIn
->pitchInElement
> 0))
3885 if ((pIn
->pitchInElement
% pitchAlignInElement
) != 0)
3887 returnCode
= ADDR_INVALIDPARAMS
;
3889 else if (pIn
->pitchInElement
< pOut
->pitch
)
3891 returnCode
= ADDR_INVALIDPARAMS
;
3895 pOut
->pitch
= pIn
->pitchInElement
;
3899 UINT_32 heightAlign
= 0;
3901 if (pIn
->flags
.qbStereo
)
3903 returnCode
= ComputeStereoInfo(pIn
, pOut
, &heightAlign
);
3906 if (returnCode
== ADDR_OK
)
3908 pOut
->height
= PowTwoAlign(pIn
->height
, pOut
->blockHeight
);
3910 if (heightAlign
> 1)
3912 pOut
->height
= PowTwoAlign(pOut
->height
, heightAlign
);
3915 pOut
->numSlices
= PowTwoAlign(pIn
->numSlices
, pOut
->blockSlices
);
3917 pOut
->epitchIsHeight
= FALSE
;
3918 pOut
->mipChainInTail
= FALSE
;
3919 pOut
->firstMipIdInTail
= pIn
->numMipLevels
;
3921 pOut
->mipChainPitch
= pOut
->pitch
;
3922 pOut
->mipChainHeight
= pOut
->height
;
3923 pOut
->mipChainSlice
= pOut
->numSlices
;
3925 if (pIn
->numMipLevels
> 1)
3927 pOut
->firstMipIdInTail
= GetMipChainInfo(pIn
->resourceType
,
3939 const UINT_32 endingMipId
= Min(pOut
->firstMipIdInTail
, pIn
->numMipLevels
- 1);
3941 if (endingMipId
== 0)
3943 const Dim3d tailMaxDim
= GetMipTailDim(pIn
->resourceType
,
3949 pOut
->epitchIsHeight
= TRUE
;
3950 pOut
->pitch
= tailMaxDim
.w
;
3951 pOut
->height
= tailMaxDim
.h
;
3952 pOut
->numSlices
= IsThick(pIn
->resourceType
, pIn
->swizzleMode
) ?
3953 tailMaxDim
.d
: pIn
->numSlices
;
3954 pOut
->mipChainInTail
= TRUE
;
3958 UINT_32 mip0WidthInBlk
= pOut
->pitch
/ pOut
->blockWidth
;
3959 UINT_32 mip0HeightInBlk
= pOut
->height
/ pOut
->blockHeight
;
3961 AddrMajorMode majorMode
= GetMajorMode(pIn
->resourceType
,
3965 pOut
->numSlices
/ pOut
->blockSlices
);
3966 if (majorMode
== ADDR_MAJOR_Y
)
3968 UINT_32 mip1WidthInBlk
= RoundHalf(mip0WidthInBlk
);
3970 if ((mip1WidthInBlk
== 1) && (endingMipId
> 2))
3975 pOut
->mipChainPitch
+= (mip1WidthInBlk
* pOut
->blockWidth
);
3977 pOut
->epitchIsHeight
= FALSE
;
3981 UINT_32 mip1HeightInBlk
= RoundHalf(mip0HeightInBlk
);
3983 if ((mip1HeightInBlk
== 1) && (endingMipId
> 2))
3988 pOut
->mipChainHeight
+= (mip1HeightInBlk
* pOut
->blockHeight
);
3990 pOut
->epitchIsHeight
= TRUE
;
3994 if (pOut
->pMipInfo
!= NULL
)
3996 UINT_32 elementBytesLog2
= Log2(pIn
->bpp
>> 3);
3998 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
4000 Dim3d mipStartPos
= {0};
4001 UINT_32 mipTailOffsetInBytes
= 0;
4003 mipStartPos
= GetMipStartPos(pIn
->resourceType
,
4013 &mipTailOffsetInBytes
);
4015 UINT_32 pitchInBlock
=
4016 pOut
->mipChainPitch
/ pOut
->blockWidth
;
4017 UINT_32 sliceInBlock
=
4018 (pOut
->mipChainHeight
/ pOut
->blockHeight
) * pitchInBlock
;
4019 UINT_64 blockIndex
=
4020 mipStartPos
.d
* sliceInBlock
+ mipStartPos
.h
* pitchInBlock
+ mipStartPos
.w
;
4021 UINT_64 macroBlockOffset
=
4022 blockIndex
<< GetBlockSizeLog2(pIn
->swizzleMode
);
4024 pOut
->pMipInfo
[i
].macroBlockOffset
= macroBlockOffset
;
4025 pOut
->pMipInfo
[i
].mipTailOffset
= mipTailOffsetInBytes
;
4029 else if (pOut
->pMipInfo
!= NULL
)
4031 pOut
->pMipInfo
[0].pitch
= pOut
->pitch
;
4032 pOut
->pMipInfo
[0].height
= pOut
->height
;
4033 pOut
->pMipInfo
[0].depth
= IsTex3d(pIn
->resourceType
)? pOut
->numSlices
: 1;
4034 pOut
->pMipInfo
[0].offset
= 0;
4037 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->mipChainPitch
) * pOut
->mipChainHeight
*
4038 (pIn
->bpp
>> 3) * pIn
->numFrags
;
4039 pOut
->surfSize
= pOut
->sliceSize
* pOut
->mipChainSlice
;
4040 pOut
->baseAlign
= HwlComputeSurfaceBaseAlign(pIn
->swizzleMode
);
4044 pOut
->baseAlign
= Max(pOut
->baseAlign
, PrtAlignment
);
4053 ************************************************************************************************************************
4054 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4057 * Internal function to calculate alignment for linear surface
4061 ************************************************************************************************************************
4063 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceInfoLinear(
4064 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input structure
4065 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
* pOut
///< [out] output structure
4068 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
4070 UINT_32 actualHeight
= 0;
4071 UINT_32 elementBytes
= pIn
->bpp
>> 3;
4072 const UINT_32 alignment
= pIn
->flags
.prt
? PrtAlignment
: 256;
4074 if (IsTex1d(pIn
->resourceType
))
4076 if (pIn
->height
> 1)
4078 returnCode
= ADDR_INVALIDPARAMS
;
4082 const UINT_32 pitchAlignInElement
= alignment
/ elementBytes
;
4084 pitch
= PowTwoAlign(pIn
->width
, pitchAlignInElement
);
4085 actualHeight
= pIn
->numMipLevels
;
4087 if (pIn
->flags
.prt
== FALSE
)
4089 returnCode
= ApplyCustomizedPitchHeight(pIn
, elementBytes
, pitchAlignInElement
,
4090 &pitch
, &actualHeight
);
4093 if (returnCode
== ADDR_OK
)
4095 if (pOut
->pMipInfo
!= NULL
)
4097 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
4099 pOut
->pMipInfo
[i
].offset
= pitch
* elementBytes
* i
;
4100 pOut
->pMipInfo
[i
].pitch
= pitch
;
4101 pOut
->pMipInfo
[i
].height
= 1;
4102 pOut
->pMipInfo
[i
].depth
= 1;
4110 returnCode
= ComputeSurfaceLinearPadding(pIn
, &pitch
, &actualHeight
, pOut
->pMipInfo
);
4113 if ((pitch
== 0) || (actualHeight
== 0))
4115 returnCode
= ADDR_INVALIDPARAMS
;
4118 if (returnCode
== ADDR_OK
)
4120 pOut
->pitch
= pitch
;
4121 pOut
->height
= pIn
->height
;
4122 pOut
->numSlices
= pIn
->numSlices
;
4123 pOut
->mipChainPitch
= pitch
;
4124 pOut
->mipChainHeight
= actualHeight
;
4125 pOut
->mipChainSlice
= pOut
->numSlices
;
4126 pOut
->epitchIsHeight
= (pIn
->numMipLevels
> 1) ? TRUE
: FALSE
;
4127 pOut
->sliceSize
= static_cast<UINT_64
>(pOut
->pitch
) * actualHeight
* elementBytes
;
4128 pOut
->surfSize
= pOut
->sliceSize
* pOut
->numSlices
;
4129 pOut
->baseAlign
= (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
) ? (pIn
->bpp
/ 8) : alignment
;
4130 pOut
->blockWidth
= (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
) ? 1 : (256 / elementBytes
);
4131 pOut
->blockHeight
= 1;
4132 pOut
->blockSlices
= 1;
4135 // Post calculation validate
4136 ADDR_ASSERT(pOut
->sliceSize
> 0);
4142 ************************************************************************************************************************
4143 * Gfx9Lib::GetMipChainInfo
4146 * Internal function to get out information about mip chain
4149 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4150 ************************************************************************************************************************
4152 UINT_32
Gfx9Lib::GetMipChainInfo(
4153 AddrResourceType resourceType
,
4154 AddrSwizzleMode swizzleMode
,
4160 UINT_32 blockHeight
,
4162 UINT_32 numMipLevel
,
4163 ADDR2_MIP_INFO
* pMipInfo
) const
4165 const Dim3d tailMaxDim
=
4166 GetMipTailDim(resourceType
, swizzleMode
, blockWidth
, blockHeight
, blockDepth
);
4168 UINT_32 mipPitch
= mip0Width
;
4169 UINT_32 mipHeight
= mip0Height
;
4170 UINT_32 mipDepth
= IsTex3d(resourceType
) ? mip0Depth
: 1;
4172 UINT_32 firstMipIdInTail
= numMipLevel
;
4173 BOOL_32 inTail
= FALSE
;
4174 BOOL_32 finalDim
= FALSE
;
4175 BOOL_32 is3dThick
= IsThick(resourceType
, swizzleMode
);
4176 BOOL_32 is3dThin
= IsTex3d(resourceType
) && (is3dThick
== FALSE
);
4178 for (UINT_32 mipId
= 0; mipId
< numMipLevel
; mipId
++)
4182 if (finalDim
== FALSE
)
4188 mipSize
= mipPitch
* mipHeight
* mipDepth
* (bpp
>> 3);
4192 mipSize
= mipPitch
* mipHeight
* (bpp
>> 3);
4197 UINT_32 index
= Log2(bpp
>> 3);
4201 mipPitch
= Block256_3dZ
[index
].w
;
4202 mipHeight
= Block256_3dZ
[index
].h
;
4203 mipDepth
= Block256_3dZ
[index
].d
;
4207 mipPitch
= Block256_2d
[index
].w
;
4208 mipHeight
= Block256_2d
[index
].h
;
4217 inTail
= IsInMipTail(resourceType
, swizzleMode
, tailMaxDim
,
4218 mipPitch
, mipHeight
, mipDepth
);
4222 firstMipIdInTail
= mipId
;
4223 mipPitch
= tailMaxDim
.w
;
4224 mipHeight
= tailMaxDim
.h
;
4228 mipDepth
= tailMaxDim
.d
;
4233 mipPitch
= PowTwoAlign(mipPitch
, blockWidth
);
4234 mipHeight
= PowTwoAlign(mipHeight
, blockHeight
);
4238 mipDepth
= PowTwoAlign(mipDepth
, blockDepth
);
4243 if (pMipInfo
!= NULL
)
4245 pMipInfo
[mipId
].pitch
= mipPitch
;
4246 pMipInfo
[mipId
].height
= mipHeight
;
4247 pMipInfo
[mipId
].depth
= mipDepth
;
4248 pMipInfo
[mipId
].offset
= offset
;
4251 offset
+= (mipPitch
* mipHeight
* mipDepth
* (bpp
>> 3));
4257 mipDepth
= Max(mipDepth
>> 1, 1u);
4262 mipPitch
= Max(mipPitch
>> 1, 1u);
4263 mipHeight
= Max(mipHeight
>> 1, 1u);
4265 if (is3dThick
|| is3dThin
)
4267 mipDepth
= Max(mipDepth
>> 1, 1u);
4272 return firstMipIdInTail
;
4276 ************************************************************************************************************************
4277 * Gfx9Lib::GetMetaMiptailInfo
4280 * Get mip tail coordinate information.
4284 ************************************************************************************************************************
4286 VOID
Gfx9Lib::GetMetaMiptailInfo(
4287 ADDR2_META_MIP_INFO
* pInfo
, ///< [out] output structure to store per mip coord
4288 Dim3d mipCoord
, ///< [in] mip tail base coord
4289 UINT_32 numMipInTail
, ///< [in] number of mips in tail
4290 Dim3d
* pMetaBlkDim
///< [in] meta block width/height/depth
4293 BOOL_32 isThick
= (pMetaBlkDim
->d
> 1);
4294 UINT_32 mipWidth
= pMetaBlkDim
->w
;
4295 UINT_32 mipHeight
= pMetaBlkDim
->h
>> 1;
4296 UINT_32 mipDepth
= pMetaBlkDim
->d
;
4301 minInc
= (pMetaBlkDim
->h
>= 512) ? 128 : ((pMetaBlkDim
->h
== 256) ? 64 : 32);
4303 else if (pMetaBlkDim
->h
>= 1024)
4307 else if (pMetaBlkDim
->h
== 512)
4316 UINT_32 blk32MipId
= 0xFFFFFFFF;
4318 for (UINT_32 mip
= 0; mip
< numMipInTail
; mip
++)
4320 pInfo
[mip
].inMiptail
= TRUE
;
4321 pInfo
[mip
].startX
= mipCoord
.w
;
4322 pInfo
[mip
].startY
= mipCoord
.h
;
4323 pInfo
[mip
].startZ
= mipCoord
.d
;
4324 pInfo
[mip
].width
= mipWidth
;
4325 pInfo
[mip
].height
= mipHeight
;
4326 pInfo
[mip
].depth
= mipDepth
;
4330 if (blk32MipId
== 0xFFFFFFFF)
4335 mipCoord
.w
= pInfo
[blk32MipId
].startX
;
4336 mipCoord
.h
= pInfo
[blk32MipId
].startY
;
4337 mipCoord
.d
= pInfo
[blk32MipId
].startZ
;
4339 switch (mip
- blk32MipId
)
4342 mipCoord
.w
+= 32; // 16x16
4345 mipCoord
.h
+= 32; // 8x8
4348 mipCoord
.h
+= 32; // 4x4
4352 mipCoord
.h
+= 32; // 2x2
4356 mipCoord
.h
+= 32; // 1x1
4359 // The following are for BC/ASTC formats
4361 mipCoord
.h
+= 48; // 1/2 x 1/2
4364 mipCoord
.h
+= 48; // 1/4 x 1/4
4368 mipCoord
.h
+= 48; // 1/8 x 1/8
4372 mipCoord
.h
+= 48; // 1/16 x 1/16
4376 ADDR_ASSERT_ALWAYS();
4380 mipWidth
= ((mip
- blk32MipId
) == 0) ? 16 : 8;
4381 mipHeight
= mipWidth
;
4385 mipDepth
= mipWidth
;
4390 if (mipWidth
<= minInc
)
4392 // if we're below the minimal increment...
4395 // For 3d, just go in z direction
4396 mipCoord
.d
+= mipDepth
;
4400 // For 2d, first go across, then down
4401 if ((mipWidth
* 2) == minInc
)
4403 // if we're 2 mips below, that's when we go back in x, and down in y
4404 mipCoord
.w
-= minInc
;
4405 mipCoord
.h
+= minInc
;
4409 // otherwise, just go across in x
4410 mipCoord
.w
+= minInc
;
4416 // On even mip, go down, otherwise, go across
4419 mipCoord
.w
+= mipWidth
;
4423 mipCoord
.h
+= mipHeight
;
4426 // Divide the width by 2
4428 // After the first mip in tail, the mip is always a square
4429 mipHeight
= mipWidth
;
4430 // ...or for 3d, a cube
4433 mipDepth
= mipWidth
;
4440 ************************************************************************************************************************
4441 * Gfx9Lib::GetMipStartPos
4444 * Internal function to get out information about mip logical start position
4447 * logical start position in macro block width/heith/depth of one mip level within one slice
4448 ************************************************************************************************************************
4450 Dim3d
Gfx9Lib::GetMipStartPos(
4451 AddrResourceType resourceType
,
4452 AddrSwizzleMode swizzleMode
,
4457 UINT_32 blockHeight
,
4460 UINT_32 log2ElementBytes
,
4461 UINT_32
* pMipTailBytesOffset
) const
4463 Dim3d mipStartPos
= {0};
4464 const Dim3d tailMaxDim
= GetMipTailDim(resourceType
, swizzleMode
, blockWidth
, blockHeight
, blockDepth
);
4466 // Report mip in tail if Mip0 is already in mip tail
4467 BOOL_32 inMipTail
= IsInMipTail(resourceType
, swizzleMode
, tailMaxDim
, width
, height
, depth
);
4468 UINT_32 log2blkSize
= GetBlockSizeLog2(swizzleMode
);
4469 UINT_32 mipIndexInTail
= mipId
;
4471 if (inMipTail
== FALSE
)
4473 // Mip 0 dimension, unit in block
4474 UINT_32 mipWidthInBlk
= width
/ blockWidth
;
4475 UINT_32 mipHeightInBlk
= height
/ blockHeight
;
4476 UINT_32 mipDepthInBlk
= depth
/ blockDepth
;
4477 AddrMajorMode majorMode
= GetMajorMode(resourceType
,
4483 UINT_32 endingMip
= mipId
+ 1;
4485 for (UINT_32 i
= 1; i
<= mipId
; i
++)
4487 if ((i
== 1) || (i
== 3))
4489 if (majorMode
== ADDR_MAJOR_Y
)
4491 mipStartPos
.w
+= mipWidthInBlk
;
4495 mipStartPos
.h
+= mipHeightInBlk
;
4500 if (majorMode
== ADDR_MAJOR_X
)
4502 mipStartPos
.w
+= mipWidthInBlk
;
4504 else if (majorMode
== ADDR_MAJOR_Y
)
4506 mipStartPos
.h
+= mipHeightInBlk
;
4510 mipStartPos
.d
+= mipDepthInBlk
;
4514 BOOL_32 inTail
= FALSE
;
4516 if (IsThick(resourceType
, swizzleMode
))
4518 UINT_32 dim
= log2blkSize
% 3;
4523 (mipWidthInBlk
<= 2) && (mipHeightInBlk
== 1) && (mipDepthInBlk
<= 2);
4528 (mipWidthInBlk
== 1) && (mipHeightInBlk
<= 2) && (mipDepthInBlk
<= 2);
4533 (mipWidthInBlk
<= 2) && (mipHeightInBlk
<= 2) && (mipDepthInBlk
== 1);
4538 if (log2blkSize
& 1)
4540 inTail
= (mipWidthInBlk
<= 2) && (mipHeightInBlk
== 1);
4544 inTail
= (mipWidthInBlk
== 1) && (mipHeightInBlk
<= 2);
4554 mipWidthInBlk
= RoundHalf(mipWidthInBlk
);
4555 mipHeightInBlk
= RoundHalf(mipHeightInBlk
);
4556 mipDepthInBlk
= RoundHalf(mipDepthInBlk
);
4559 if (mipId
>= endingMip
)
4562 mipIndexInTail
= mipId
- endingMip
;
4568 UINT_32 index
= mipIndexInTail
+ MaxMacroBits
- log2blkSize
;
4569 ADDR_ASSERT(index
< sizeof(MipTailOffset256B
) / sizeof(UINT_32
));
4570 *pMipTailBytesOffset
= MipTailOffset256B
[index
] << 8;
4577 ************************************************************************************************************************
4578 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4581 * Internal function to calculate address from coord for tiled swizzle surface
4585 ************************************************************************************************************************
4587 ADDR_E_RETURNCODE
Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4588 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
* pIn
, ///< [in] input structure
4589 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
* pOut
///< [out] output structure
4592 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn
= {0};
4593 localIn
.swizzleMode
= pIn
->swizzleMode
;
4594 localIn
.flags
= pIn
->flags
;
4595 localIn
.resourceType
= pIn
->resourceType
;
4596 localIn
.bpp
= pIn
->bpp
;
4597 localIn
.width
= Max(pIn
->unalignedWidth
, 1u);
4598 localIn
.height
= Max(pIn
->unalignedHeight
, 1u);
4599 localIn
.numSlices
= Max(pIn
->numSlices
, 1u);
4600 localIn
.numMipLevels
= Max(pIn
->numMipLevels
, 1u);
4601 localIn
.numSamples
= Max(pIn
->numSamples
, 1u);
4602 localIn
.numFrags
= Max(pIn
->numFrags
, 1u);
4603 if (localIn
.numMipLevels
<= 1)
4605 localIn
.pitchInElement
= pIn
->pitchInElement
;
4608 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut
= {0};
4609 ADDR_E_RETURNCODE returnCode
= ComputeSurfaceInfoTiled(&localIn
, &localOut
);
4611 BOOL_32 valid
= (returnCode
== ADDR_OK
) &&
4612 (IsThin(pIn
->resourceType
, pIn
->swizzleMode
) ||
4613 IsThick(pIn
->resourceType
, pIn
->swizzleMode
)) &&
4614 ((pIn
->pipeBankXor
== 0) || (IsXor(pIn
->swizzleMode
)));
4618 UINT_32 log2ElementBytes
= Log2(pIn
->bpp
>> 3);
4619 Dim3d mipStartPos
= {0};
4620 UINT_32 mipTailBytesOffset
= 0;
4622 if (pIn
->numMipLevels
> 1)
4624 // Mip-map chain cannot be MSAA surface
4625 ADDR_ASSERT((pIn
->numSamples
<= 1) && (pIn
->numFrags
<= 1));
4627 mipStartPos
= GetMipStartPos(pIn
->resourceType
,
4632 localOut
.blockWidth
,
4633 localOut
.blockHeight
,
4634 localOut
.blockSlices
,
4637 &mipTailBytesOffset
);
4640 UINT_32 interleaveOffset
= 0;
4641 UINT_32 pipeBits
= 0;
4642 UINT_32 pipeXor
= 0;
4643 UINT_32 bankBits
= 0;
4644 UINT_32 bankXor
= 0;
4646 if (IsThin(pIn
->resourceType
, pIn
->swizzleMode
))
4648 UINT_32 blockOffset
= 0;
4649 UINT_32 log2blkSize
= GetBlockSizeLog2(pIn
->swizzleMode
);
4651 if (IsZOrderSwizzle(pIn
->swizzleMode
))
4653 // Morton generation
4654 if ((log2ElementBytes
== 0) || (log2ElementBytes
== 2))
4656 UINT_32 totalLowBits
= 6 - log2ElementBytes
;
4657 UINT_32 mortBits
= totalLowBits
/ 2;
4658 UINT_32 lowBitsValue
= MortonGen2d(pIn
->y
, pIn
->x
, mortBits
);
4659 // Are 9 bits enough?
4660 UINT_32 highBitsValue
=
4661 MortonGen2d(pIn
->x
>> mortBits
, pIn
->y
>> mortBits
, 9) << totalLowBits
;
4662 blockOffset
= lowBitsValue
| highBitsValue
;
4663 ADDR_ASSERT(blockOffset
== lowBitsValue
+ highBitsValue
);
4667 blockOffset
= MortonGen2d(pIn
->y
, pIn
->x
, 13);
4670 // Fill LSBs with sample bits
4671 if (pIn
->numSamples
> 1)
4673 blockOffset
*= pIn
->numSamples
;
4674 blockOffset
|= pIn
->sample
;
4677 // Shift according to BytesPP
4678 blockOffset
<<= log2ElementBytes
;
4682 // Micro block offset
4683 UINT_32 microBlockOffset
= ComputeSurface2DMicroBlockOffset(pIn
);
4684 blockOffset
= microBlockOffset
;
4686 // Micro block dimension
4687 ADDR_ASSERT(log2ElementBytes
< MaxNumOfBpp
);
4688 Dim2d microBlockDim
= Block256_2d
[log2ElementBytes
];
4689 // Morton generation, does 12 bit enough?
4691 MortonGen2d((pIn
->x
/ microBlockDim
.w
), (pIn
->y
/ microBlockDim
.h
), 12) << 8;
4693 // Sample bits start location
4694 UINT_32 sampleStart
= log2blkSize
- Log2(pIn
->numSamples
);
4695 // Join sample bits information to the highest Macro block bits
4696 if (IsNonPrtXor(pIn
->swizzleMode
))
4698 // Non-prt-Xor : xor highest Macro block bits with sample bits
4699 blockOffset
= blockOffset
^ (pIn
->sample
<< sampleStart
);
4703 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4704 // after this op, the blockOffset only contains log2 Macro block size bits
4705 blockOffset
%= (1 << sampleStart
);
4706 blockOffset
|= (pIn
->sample
<< sampleStart
);
4707 ADDR_ASSERT((blockOffset
>> log2blkSize
) == 0);
4711 if (IsXor(pIn
->swizzleMode
))
4713 // Mask off bits above Macro block bits to keep page synonyms working for prt
4714 if (IsPrt(pIn
->swizzleMode
))
4716 blockOffset
&= ((1 << log2blkSize
) - 1);
4719 // Preserve offset inside pipe interleave
4720 interleaveOffset
= blockOffset
& ((1 << m_pipeInterleaveLog2
) - 1);
4721 blockOffset
>>= m_pipeInterleaveLog2
;
4724 pipeBits
= GetPipeXorBits(log2blkSize
);
4726 pipeXor
= FoldXor2d(blockOffset
, pipeBits
);
4727 blockOffset
>>= pipeBits
;
4730 bankBits
= GetBankXorBits(log2blkSize
);
4732 bankXor
= FoldXor2d(blockOffset
, bankBits
);
4733 blockOffset
>>= bankBits
;
4735 // Put all the part back together
4736 blockOffset
<<= bankBits
;
4737 blockOffset
|= bankXor
;
4738 blockOffset
<<= pipeBits
;
4739 blockOffset
|= pipeXor
;
4740 blockOffset
<<= m_pipeInterleaveLog2
;
4741 blockOffset
|= interleaveOffset
;
4744 ADDR_ASSERT((blockOffset
| mipTailBytesOffset
) == (blockOffset
+ mipTailBytesOffset
));
4745 ADDR_ASSERT((mipTailBytesOffset
== 0u) || (blockOffset
< (1u << log2blkSize
)));
4747 blockOffset
|= mipTailBytesOffset
;
4749 if (IsNonPrtXor(pIn
->swizzleMode
) && (pIn
->numSamples
<= 1))
4751 // Apply slice xor if not MSAA/PRT
4752 blockOffset
^= (ReverseBitVector(pIn
->slice
, pipeBits
) << m_pipeInterleaveLog2
);
4753 blockOffset
^= (ReverseBitVector(pIn
->slice
>> pipeBits
, bankBits
) <<
4754 (m_pipeInterleaveLog2
+ pipeBits
));
4757 returnCode
= ApplyCustomerPipeBankXor(pIn
->swizzleMode
, pIn
->pipeBankXor
,
4758 bankBits
, pipeBits
, &blockOffset
);
4760 blockOffset
%= (1 << log2blkSize
);
4762 UINT_32 pitchInMacroBlock
= localOut
.mipChainPitch
/ localOut
.blockWidth
;
4763 UINT_32 paddedHeightInMacroBlock
= localOut
.mipChainHeight
/ localOut
.blockHeight
;
4764 UINT_32 sliceSizeInMacroBlock
= pitchInMacroBlock
* paddedHeightInMacroBlock
;
4765 UINT_32 macroBlockIndex
=
4766 (pIn
->slice
+ mipStartPos
.d
) * sliceSizeInMacroBlock
+
4767 ((pIn
->y
/ localOut
.blockHeight
) + mipStartPos
.h
) * pitchInMacroBlock
+
4768 ((pIn
->x
/ localOut
.blockWidth
) + mipStartPos
.w
);
4770 UINT_64 macroBlockOffset
= (static_cast<UINT_64
>(macroBlockIndex
) <<
4771 GetBlockSizeLog2(pIn
->swizzleMode
));
4773 pOut
->addr
= blockOffset
| macroBlockOffset
;
4777 UINT_32 log2blkSize
= GetBlockSizeLog2(pIn
->swizzleMode
);
4779 Dim3d microBlockDim
= Block1K_3d
[log2ElementBytes
];
4781 UINT_32 blockOffset
= MortonGen3d((pIn
->x
/ microBlockDim
.w
),
4782 (pIn
->y
/ microBlockDim
.h
),
4783 (pIn
->slice
/ microBlockDim
.d
),
4787 blockOffset
|= ComputeSurface3DMicroBlockOffset(pIn
);
4789 if (IsXor(pIn
->swizzleMode
))
4791 // Mask off bits above Macro block bits to keep page synonyms working for prt
4792 if (IsPrt(pIn
->swizzleMode
))
4794 blockOffset
&= ((1 << log2blkSize
) - 1);
4797 // Preserve offset inside pipe interleave
4798 interleaveOffset
= blockOffset
& ((1 << m_pipeInterleaveLog2
) - 1);
4799 blockOffset
>>= m_pipeInterleaveLog2
;
4802 pipeBits
= GetPipeXorBits(log2blkSize
);
4804 pipeXor
= FoldXor3d(blockOffset
, pipeBits
);
4805 blockOffset
>>= pipeBits
;
4808 bankBits
= GetBankXorBits(log2blkSize
);
4810 bankXor
= FoldXor3d(blockOffset
, bankBits
);
4811 blockOffset
>>= bankBits
;
4813 // Put all the part back together
4814 blockOffset
<<= bankBits
;
4815 blockOffset
|= bankXor
;
4816 blockOffset
<<= pipeBits
;
4817 blockOffset
|= pipeXor
;
4818 blockOffset
<<= m_pipeInterleaveLog2
;
4819 blockOffset
|= interleaveOffset
;
4822 ADDR_ASSERT((blockOffset
| mipTailBytesOffset
) == (blockOffset
+ mipTailBytesOffset
));
4823 ADDR_ASSERT((mipTailBytesOffset
== 0u) || (blockOffset
< (1u << log2blkSize
)));
4824 blockOffset
|= mipTailBytesOffset
;
4826 returnCode
= ApplyCustomerPipeBankXor(pIn
->swizzleMode
, pIn
->pipeBankXor
,
4827 bankBits
, pipeBits
, &blockOffset
);
4829 blockOffset
%= (1 << log2blkSize
);
4831 UINT_32 xb
= pIn
->x
/ localOut
.blockWidth
+ mipStartPos
.w
;
4832 UINT_32 yb
= pIn
->y
/ localOut
.blockHeight
+ mipStartPos
.h
;
4833 UINT_32 zb
= pIn
->slice
/ localOut
.blockSlices
+ + mipStartPos
.d
;
4835 UINT_32 pitchInBlock
= localOut
.mipChainPitch
/ localOut
.blockWidth
;
4836 UINT_32 sliceSizeInBlock
=
4837 (localOut
.mipChainHeight
/ localOut
.blockHeight
) * pitchInBlock
;
4838 UINT_32 blockIndex
= zb
* sliceSizeInBlock
+ yb
* pitchInBlock
+ xb
;
4840 pOut
->addr
= blockOffset
| (blockIndex
<< log2blkSize
);
4845 returnCode
= ADDR_INVALIDPARAMS
;
4852 ************************************************************************************************************************
4853 * Gfx9Lib::ComputeSurfaceInfoLinear
4856 * Internal function to calculate padding for linear swizzle 2D/3D surface
4860 ************************************************************************************************************************
4862 ADDR_E_RETURNCODE
Gfx9Lib::ComputeSurfaceLinearPadding(
4863 const ADDR2_COMPUTE_SURFACE_INFO_INPUT
* pIn
, ///< [in] input srtucture
4864 UINT_32
* pMipmap0PaddedWidth
, ///< [out] padded width in element
4865 UINT_32
* pSlice0PaddedHeight
, ///< [out] padded height for HW
4866 ADDR2_MIP_INFO
* pMipInfo
///< [out] per mip information
4869 ADDR_E_RETURNCODE returnCode
= ADDR_OK
;
4871 UINT_32 elementBytes
= pIn
->bpp
>> 3;
4872 UINT_32 pitchAlignInElement
= 0;
4874 if (pIn
->swizzleMode
== ADDR_SW_LINEAR_GENERAL
)
4876 ADDR_ASSERT(pIn
->numMipLevels
<= 1);
4877 ADDR_ASSERT(pIn
->numSlices
<= 1);
4878 pitchAlignInElement
= 1;
4882 pitchAlignInElement
= (256 / elementBytes
);
4885 UINT_32 mipChainWidth
= PowTwoAlign(pIn
->width
, pitchAlignInElement
);
4886 UINT_32 slice0PaddedHeight
= pIn
->height
;
4888 returnCode
= ApplyCustomizedPitchHeight(pIn
, elementBytes
, pitchAlignInElement
,
4889 &mipChainWidth
, &slice0PaddedHeight
);
4891 if (returnCode
== ADDR_OK
)
4893 UINT_32 mipChainHeight
= 0;
4894 UINT_32 mipHeight
= pIn
->height
;
4896 for (UINT_32 i
= 0; i
< pIn
->numMipLevels
; i
++)
4898 if (pMipInfo
!= NULL
)
4900 pMipInfo
[i
].offset
= mipChainWidth
* mipChainHeight
* elementBytes
;
4901 pMipInfo
[i
].pitch
= mipChainWidth
;
4902 pMipInfo
[i
].height
= mipHeight
;
4903 pMipInfo
[i
].depth
= 1;
4906 mipChainHeight
+= mipHeight
;
4907 mipHeight
= RoundHalf(mipHeight
);
4908 mipHeight
= Max(mipHeight
, 1u);
4911 *pMipmap0PaddedWidth
= mipChainWidth
;
4912 *pSlice0PaddedHeight
= (pIn
->numMipLevels
> 1) ? mipChainHeight
: slice0PaddedHeight
;