#include "gfx9addrlib.h"
#include "gfx9_gb_reg.h"
-#include "gfx9_enum.h"
-#if BRAHMA_BUILD
-#include "amdgpu_id.h"
-#else
-#include "ai_id.h"
-#include "rv_id.h"
-#endif
+#include "amdgpu_asic_addr.h"
+
+#include "util/macros.h"
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
}
else
{
- numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
+ if (m_settings.applyAliasFix)
+ {
+ numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
+ }
+ else
+ {
+ numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
+ }
}
numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
- Dim3d metaBlkDim = {8, 8, 1};
+ Dim3d metaBlkDim = {8, 8, 1};
UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
- UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
- UINT_32 heightAmp = totalAmpBits - widthAmp;
+ UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
+ UINT_32 heightAmp = totalAmpBits - widthAmp;
metaBlkDim.w <<= widthAmp;
metaBlkDim.h <<= heightAmp;
pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
&numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
- UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
-
- pOut->pitch = numMetaBlkX * metaBlkDim.w;
- pOut->height = numMetaBlkY * metaBlkDim.h;
- pOut->sliceSize = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4;
+ const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
+ UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
- pOut->metaBlkWidth = metaBlkDim.w;
- pOut->metaBlkHeight = metaBlkDim.h;
- pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
+ if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
+ {
+ align *= (numPipeTotal >> 1);
+ }
- pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign);
+ align = Max(align, metaBlkSize);
if (m_settings.metaBaseAlignFix)
{
- pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
+ align = Max(align, GetBlockSize(pIn->swizzleMode));
}
- if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
+ if (m_settings.htileAlignFix)
{
- UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2;
+ const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
+ const INT_32 htileCachelineSizeLog2 = 11;
+ const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
- if (additionalAlign > sizeAlign)
- {
- sizeAlign = additionalAlign;
- }
+ INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
+
+ align <<= rbMaskPadding;
}
- pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
+ pOut->pitch = numMetaBlkX * metaBlkDim.w;
+ pOut->height = numMetaBlkY * metaBlkDim.h;
+ pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
+
+ pOut->metaBlkWidth = metaBlkDim.w;
+ pOut->metaBlkHeight = metaBlkDim.h;
+ pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
+
+ pOut->baseAlign = align;
+ pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
return ADDR_OK;
}
ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
) const
{
- ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
+// TODO: Clarify with AddrLib team
+// ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
pIn->swizzleMode);
}
else
{
- numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
+ if (m_settings.applyAliasFix)
+ {
+ numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
+ }
+ else
+ {
+ numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
+ }
numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
}
UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
+ if (m_settings.metaBaseAlignFix)
+ {
+ sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
+ }
+
pOut->pitch = numMetaBlkX * metaBlkDim.w;
pOut->height = numMetaBlkY * metaBlkDim.h;
pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
- if (m_settings.metaBaseAlignFix)
- {
- pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
- }
-
pOut->metaBlkWidth = metaBlkDim.w;
pOut->metaBlkHeight = metaBlkDim.h;
if ((numPipeTotal > 1) || (numRbTotal > 1))
{
+ const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
+
numCompressBlkPerMetaBlk =
- Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : 1024));
+ Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
{
sizeAlign *= (numFrags / m_maxCompFrag);
}
+ if (m_settings.metaBaseAlignFix)
+ {
+ sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
+ }
+
pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
numCompressBlkPerMetaBlk * numFrags;
pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
- if (m_settings.metaBaseAlignFix)
- {
- pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, GetBlockSize(pIn->swizzleMode));
- }
-
pOut->pitch = numMetaBlkX * metaBlkDim.w;
pOut->height = numMetaBlkY * metaBlkDim.h;
pOut->depth = numMetaBlkZ * metaBlkDim.d;
/**
************************************************************************************************************************
-* Gfx9Lib::HwlGetMaxAlignments
+* Gfx9Lib::HwlComputeMaxBaseAlignments
*
* @brief
* Gets maximum alignments
* @return
-* ADDR_E_RETURNCODE
+* maximum alignments
************************************************************************************************************************
*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments(
- ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut ///< [out] output structure
- ) const
+UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
{
- pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB);
+ return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
+}
- return ADDR_OK;
+/**
+************************************************************************************************************************
+* Gfx9Lib::HwlComputeMaxMetaBaseAlignments
+*
+* @brief
+* Gets maximum alignments for metadata
+* @return
+* maximum alignments for metadata
+************************************************************************************************************************
+*/
+UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
+{
+ // Max base alignment for Htile
+ const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
+ const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
+
+ // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
+ // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
+ ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
+ const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
+
+ UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
+
+ if (maxNumPipeTotal > 2)
+ {
+ maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
+ }
+
+ maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
+
+ if (m_settings.metaBaseAlignFix)
+ {
+ maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
+ }
+
+ if (m_settings.htileAlignFix)
+ {
+ maxBaseAlignHtile *= maxNumPipeTotal;
+ }
+
+ // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
+
+ // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
+ UINT_32 maxBaseAlignDcc3D = 65536;
+
+ if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
+ {
+ maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
+ }
+
+ // Max base alignment for Msaa Dcc
+ UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
+
+ if (m_settings.metaBaseAlignFix)
+ {
+ maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
+ }
+
+ return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
}
/**
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
- ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
- ) const
+ ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
{
ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
input.size = sizeof(input);
UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
- CoordEq metaEq;
+ MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
+ Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
- GetMetaEquation(&metaEq, 0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
- Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
+ const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
- UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
+ UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
pOut->addr = address >> 1;
pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
- ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
- ) const
+ ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
- CoordEq metaEq;
+ MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
+ Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
- GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
- Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
+ const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
- UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
+ UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
pOut->addr = address >> 1;
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
- ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
- ) const
+ ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
- CoordEq metaEq;
+ MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
+ Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
- GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
- Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
+ const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
pIn->swizzleMode);
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 x, y, z, s, m;
- metaEq.solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
+ pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
pOut->slice = m / sliceSizeInBlock;
pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const
+ ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
- CoordEq metaEq;
+ MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
+ Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
+ metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
+ compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
- GetMetaEquation(&metaEq, pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
- Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
- metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
- compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2);
+ const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
- UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
+ UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
pOut->addr = address >> 1;
break;
}
+ // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
+ // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
+ ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
+
switch (gbAddrConfig.bits.NUM_BANKS)
{
case ADDR_CONFIG_1_BANK:
ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
+
+ if ((m_rbPerSeLog2 == 1) &&
+ (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
+ ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
+ {
+ ADDR_ASSERT(m_settings.isVega10 == FALSE);
+ ADDR_ASSERT(m_settings.isRaven == FALSE);
+ ADDR_ASSERT(m_settings.isVega20 == FALSE);
+
+ if (m_settings.isVega12)
+ {
+ m_settings.htileCacheRbConflict = 1;
+ }
+ }
}
else
{
case FAMILY_AI:
m_settings.isArcticIsland = 1;
m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
+ m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
+ m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
+ m_settings.isDce12 = 1;
- if (m_settings.isVega10)
+ if (m_settings.isVega10 == 0)
{
- m_settings.isDce12 = 1;
+ m_settings.htileAlignFix = 1;
+ m_settings.applyAliasFix = 1;
}
m_settings.metaBaseAlignFix = 1;
m_settings.depthPipeXorDisable = 1;
break;
-
case FAMILY_RV:
m_settings.isArcticIsland = 1;
- m_settings.isRaven = ASICREV_IS_RAVEN(uChipRevision);
+ m_settings.isRaven = ASICREV_IS_RAVEN(uChipRevision) || ASICREV_IS_RAVEN2(uChipRevision);
if (m_settings.isRaven)
{
m_settings.metaBaseAlignFix = 1;
- m_settings.depthPipeXorDisable = 1;
+ if (ASICREV_IS_RAVEN(uChipRevision))
+ {
+ m_settings.depthPipeXorDisable = 1;
+ }
break;
default:
CoordEq* pRbEq, ///< [out] rb equation
UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
UINT_32 numSeLog2) ///< [in] number of shader engine
+ const
{
// RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
(*pRbEq)[0].add(cy);
cx++;
cy++;
+
+ if (m_settings.applyAliasFix == false)
+ {
+ (*pRbEq)[0].add(cy);
+ }
+
(*pRbEq)[0].add(cy);
start++;
}
pPipeEq->xorin(xorMask);
}
}
-
/**
************************************************************************************************************************
* Gfx9Lib::GetMetaEquation
* @brief
* Get meta equation for cmask/htile/DCC
* @return
+* Pointer to a calculated meta equation
+************************************************************************************************************************
+*/
+const CoordEq* Gfx9Lib::GetMetaEquation(
+ const MetaEqParams& metaEqParams)
+{
+ UINT_32 cachedMetaEqIndex;
+
+ for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
+ {
+ if (memcmp(&metaEqParams,
+ &m_cachedMetaEqKey[cachedMetaEqIndex],
+ static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
+ {
+ break;
+ }
+ }
+
+ CoordEq* pMetaEq = NULL;
+
+ if (cachedMetaEqIndex < MaxCachedMetaEq)
+ {
+ pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
+ }
+ else
+ {
+ m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
+
+ pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
+
+ m_metaEqOverrideIndex %= MaxCachedMetaEq;
+
+ GenMetaEquation(pMetaEq,
+ metaEqParams.maxMip,
+ metaEqParams.elementBytesLog2,
+ metaEqParams.numSamplesLog2,
+ metaEqParams.metaFlag,
+ metaEqParams.dataSurfaceType,
+ metaEqParams.swizzleMode,
+ metaEqParams.resourceType,
+ metaEqParams.metaBlkWidthLog2,
+ metaEqParams.metaBlkHeightLog2,
+ metaEqParams.metaBlkDepthLog2,
+ metaEqParams.compBlkWidthLog2,
+ metaEqParams.compBlkHeightLog2,
+ metaEqParams.compBlkDepthLog2);
+ }
+
+ return pMetaEq;
+}
+
+/**
+************************************************************************************************************************
+* Gfx9Lib::GenMetaEquation
+*
+* @brief
+* Get meta equation for cmask/htile/DCC
+* @return
* N/A
************************************************************************************************************************
*/
-VOID Gfx9Lib::GetMetaEquation(
- CoordEq* pMetaEq, ///< [out] meta equation
- UINT_32 maxMip, ///< [in] max mip Id
- UINT_32 elementBytesLog2, ///< [in] data surface element bytes
- UINT_32 numSamplesLog2, ///< [in] data surface sample count
- ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
- Gfx9DataType dataSurfaceType, ///< [in] data surface type
- AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
- AddrResourceType resourceType, ///< [in] data surface resource type
- UINT_32 metaBlkWidthLog2, ///< [in] meta block width
- UINT_32 metaBlkHeightLog2, ///< [in] meta block height
- UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
- UINT_32 compBlkWidthLog2, ///< [in] compress block width
- UINT_32 compBlkHeightLog2, ///< [in] compress block height
- UINT_32 compBlkDepthLog2) ///< [in] compress block depth
+VOID Gfx9Lib::GenMetaEquation(
+ CoordEq* pMetaEq, ///< [out] meta equation
+ UINT_32 maxMip, ///< [in] max mip Id
+ UINT_32 elementBytesLog2, ///< [in] data surface element bytes
+ UINT_32 numSamplesLog2, ///< [in] data surface sample count
+ ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
+ Gfx9DataType dataSurfaceType, ///< [in] data surface type
+ AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
+ AddrResourceType resourceType, ///< [in] data surface resource type
+ UINT_32 metaBlkWidthLog2, ///< [in] meta block width
+ UINT_32 metaBlkHeightLog2, ///< [in] meta block height
+ UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
+ UINT_32 compBlkWidthLog2, ///< [in] compress block width
+ UINT_32 compBlkHeightLog2, ///< [in] compress block height
+ UINT_32 compBlkDepthLog2) ///< [in] compress block depth
const
{
- UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
+ UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
- //UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
// Get the correct data address and rb equation
CoordEq dataEq;
}
}
- UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
- UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
- CoordEq origRbEquation;
+ const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
+ const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
+ const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
+ CoordEq origRbEquation;
GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
CoordEq rbEquation = origRbEquation;
- UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
-
for (UINT_32 i = 0; i < numRbTotalLog2; i++)
{
for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
}
}
+ if (m_settings.applyAliasFix)
+ {
+ co.set('z', -1);
+ }
+
// Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
for (UINT_32 i = 0; i < numRbTotalLog2; i++)
{
for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
{
- if (rbEquation[i] == pipeEquation[j])
+ BOOL_32 isRbEquationInPipeEquation = FALSE;
+
+ if (m_settings.applyAliasFix)
+ {
+ CoordTerm filteredPipeEq;
+ filteredPipeEq = pipeEquation[j];
+
+ filteredPipeEq.Filter('>', co, 0, 'z');
+
+ isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
+ }
+ else
+ {
+ isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
+ }
+
+ if (isRbEquationInPipeEquation)
{
rbEquation[i].Clear();
}
}
}
+ bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
+
// Loop through each bit of the channel, get the smallest coordinate,
// and remove it from the metaaddr, and rb_equation
for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
if (pipeEquation[i][k] != co)
{
rbEquation[j].add(pipeEquation[i][k]);
+ rbAppendedWithPipeBits[j] = true;
}
}
}
UINT_32 rbBitsLeft = 0;
for (UINT_32 i = 0; i < numRbTotalLog2; i++)
{
- if (rbEquation[i].getsize() > 0)
+ BOOL_32 isRbEqAppended = FALSE;
+
+ if (m_settings.applyAliasFix)
+ {
+ isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
+ }
+ else
+ {
+ isRbEqAppended = (rbEquation[i].getsize() > 0);
+ }
+
+ if (isRbEqAppended)
{
rbBitsLeft++;
rbEquation[i].getsmallest(co);
if (rbEquation[i][k] != co)
{
rbEquation[j].add(rbEquation[i][k]);
+ rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
}
}
}
// Put in remaining rb bits
for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
{
- if (rbEquation[i].getsize() > 0)
+ BOOL_32 isRbEqAppended = FALSE;
+
+ if (m_settings.applyAliasFix)
+ {
+ isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
+ }
+ else
+ {
+ isRbEqAppended = (rbEquation[i].getsize() > 0);
+ }
+
+ if (isRbEqAppended)
{
origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
// Mark any rb bit we add in to the rb mask
// Post validation
if (ret == ADDR_OK)
{
- Dim2d microBlockDim = Block256_2d[elementBytesLog2];
+ MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
(microBlockDim.w * (1 << elementBytesLog2)));
ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
{
BOOL_32 support = FALSE;
- //const AddrResourceType resourceType = pIn->resourceType;
+ const AddrResourceType resourceType = pIn->resourceType;
+ (void)resourceType;
const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
if (m_settings.isDce12)
AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
};
+ enum AddrSwSet
+ {
+ AddrSwSetZ = 1 << ADDR_SW_Z,
+ AddrSwSetS = 1 << ADDR_SW_S,
+ AddrSwSetD = 1 << ADDR_SW_D,
+ AddrSwSetR = 1 << ADDR_SW_R,
+
+ AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR,
+ };
+
ADDR_E_RETURNCODE returnCode = ADDR_OK;
ElemLib* pElemLib = GetElemLib();
pOut->resourceType = pIn->resourceType;
}
- ADDR_ASSERT(bpp >= 8u);
- UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
+ if (bpp < 8)
+ {
+ ADDR_ASSERT_ALWAYS();
- if (IsTex1d(pOut->resourceType))
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ else if (IsTex1d(pOut->resourceType))
{
pOut->swizzleMode = ADDR_SW_LINEAR;
pOut->validBlockSet.value = AddrBlockSetLinear;
ADDR2_BLOCK_SET blockSet;
blockSet.value = 0;
- AddrSwType swType = ADDR_SW_S;
+ ADDR2_SWTYPE_SET addrPreferredSwSet, addrValidSwSet, clientPreferredSwSet;
+ addrPreferredSwSet.value = AddrSwSetS;
+ addrValidSwSet = addrPreferredSwSet;
+ clientPreferredSwSet = pIn->preferredSwSet;
+
+ if (clientPreferredSwSet.value == 0)
+ {
+ clientPreferredSwSet.value = AddrSwSetAll;
+ }
// prt Xor and non-xor will have less height align requirement for stereo surface
BOOL_32 prtXor = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE);
if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
{
ADDR_ASSERT(IsTex2d(pOut->resourceType));
- blockSet.value = AddrBlockSetMacro;
- swType = ADDR_SW_Z;
+ blockSet.value = AddrBlockSetMacro;
+ addrPreferredSwSet.value = AddrSwSetZ;
+ addrValidSwSet.value = AddrSwSetZ;
- if (pIn->flags.depth && pIn->flags.texture)
+ if (pIn->flags.noMetadata == FALSE)
{
- if (((bpp == 16) && (numFrags >= 4)) ||
- ((bpp == 32) && (numFrags >= 2)))
+ if (pIn->flags.depth &&
+ pIn->flags.texture &&
+ (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
{
// When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
// equation from wrong address within memory range a tile covered and use the
pOut->canXor = FALSE;
prtXor = FALSE;
}
+
+ if (m_settings.htileCacheRbConflict &&
+ (pIn->flags.depth || pIn->flags.stencil) &&
+ (slice > 1) &&
+ (pIn->flags.metaRbUnaligned == FALSE) &&
+ (pIn->flags.metaPipeUnaligned == FALSE))
+ {
+ // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
+ pOut->canXor = FALSE;
+ }
}
}
else if (ElemLib::IsBlockCompressed(pIn->format))
{
- // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes. Not sure
- // under what circumstances "_D" would be appropriate as these formats are not
- // displayable.
+ // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes.
+ // Not sure under what circumstances "_D" would be appropriate as these formats
+ // are not displayable.
blockSet.value = AddrBlockSetMacro;
// This isn't to be used as texture and caller doesn't allow macro tiled.
{
blockSet.value |= AddrBlockSetLinear;
}
- swType = ADDR_SW_D;
+
+ addrPreferredSwSet.value = AddrSwSetD;
+ addrValidSwSet.value = AddrSwSetS | AddrSwSetD;
}
else if (ElemLib::IsMacroPixelPacked(pIn->format))
{
- // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes. Its not
- // clear under what circumstances the D or R modes would be appropriate since
- // these formats are not displayable.
- blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
- swType = ADDR_SW_S;
+ // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes.
+ // Its notclear under what circumstances the D or R modes would be appropriate
+ // since these formats are not displayable.
+ blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
+
+ addrPreferredSwSet.value = AddrSwSetS;
+ addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR;
}
else if (IsTex3d(pOut->resourceType))
{
if (pIn->flags.prt)
{
// PRT cannot use SW_D which gives an unexpected block dimension
- swType = ADDR_SW_Z;
+ addrPreferredSwSet.value = AddrSwSetZ;
+ addrValidSwSet.value = AddrSwSetZ | AddrSwSetS;
}
else if ((numMipLevels > 1) && (slice >= width) && (slice >= height))
{
// When depth (Z) is the maximum dimension then must use one of the SW_*_S
// or SW_*_Z modes if mipmapping is desired on a 3D surface
- swType = ADDR_SW_Z;
+ addrPreferredSwSet.value = AddrSwSetZ;
+ addrValidSwSet.value = AddrSwSetZ | AddrSwSetS;
}
else if (pIn->flags.color)
{
- swType = ADDR_SW_D;
+ addrPreferredSwSet.value = AddrSwSetD;
+ addrValidSwSet.value = AddrSwSetZ | AddrSwSetS | AddrSwSetD;
}
else
{
- swType = ADDR_SW_Z;
+ addrPreferredSwSet.value = AddrSwSetZ;
+ addrValidSwSet.value = AddrSwSetZ | AddrSwSetD;
+ if (bpp != 128)
+ {
+ addrValidSwSet.value |= AddrSwSetS;
+ }
}
}
else
{
- swType = ((pIn->flags.display == TRUE) ||
- (pIn->flags.overlay == TRUE) ||
- (pIn->bpp == 128)) ? ADDR_SW_D : ADDR_SW_S;
+ addrPreferredSwSet.value = ((pIn->flags.display == TRUE) ||
+ (pIn->flags.overlay == TRUE) ||
+ (pIn->bpp == 128)) ? AddrSwSetD : AddrSwSetS;
+
+ addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR;
if (numMipLevels > 1)
{
if (displayResource)
{
- swType = pIn->flags.rotated ? ADDR_SW_R : ADDR_SW_D;
+ addrPreferredSwSet.value = pIn->flags.rotated ? AddrSwSetR : AddrSwSetD;
if (pIn->bpp > 64)
{
// DCE12 does not support display surface to be _T swizzle mode
prtXor = FALSE;
+
+ addrValidSwSet.value = AddrSwSetD | AddrSwSetR;
}
else if (m_settings.isDcn1)
{
// _R is not supported by Dcn1
if (pIn->bpp == 64)
{
- swType = ADDR_SW_D;
+ addrPreferredSwSet.value = AddrSwSetD;
+ addrValidSwSet.value = AddrSwSetS | AddrSwSetD;
}
else
{
- swType = ADDR_SW_S;
+ addrPreferredSwSet.value = AddrSwSetS;
+ addrValidSwSet.value = AddrSwSetS;
}
blockSet.micro = FALSE;
}
}
- if ((numFrags > 1) &&
- (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
- {
- // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
- blockSet.macro4KB = FALSE;
- }
+ ADDR_ASSERT((addrValidSwSet.value & addrPreferredSwSet.value) == addrPreferredSwSet.value);
- if (pIn->flags.prt)
- {
- blockSet.value &= AddrBlockSetMacro64KB;
- }
+ pOut->clientPreferredSwSet = clientPreferredSwSet;
+
+ // Clamp client preferred set to valid set
+ clientPreferredSwSet.value &= addrValidSwSet.value;
- // Apply customized forbidden setting
- blockSet.value &= ~pIn->forbiddenBlock.value;
+ pOut->validSwTypeSet = addrValidSwSet;
- if (pIn->maxAlign > 0)
+ if (clientPreferredSwSet.value == 0)
{
- if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
+ // Client asks for an invalid swizzle type...
+ ADDR_ASSERT_ALWAYS();
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ else
+ {
+ if (IsPow2(clientPreferredSwSet.value))
{
- blockSet.macro64KB = FALSE;
+ // Only one swizzle type left, use it directly
+ addrPreferredSwSet.value = clientPreferredSwSet.value;
+ }
+ else if ((clientPreferredSwSet.value & addrPreferredSwSet.value) == 0)
+ {
+ // Client wants 2 or more a valid swizzle type but none of them is addrlib preferred
+ if (clientPreferredSwSet.sw_D)
+ {
+ addrPreferredSwSet.value = AddrSwSetD;
+ }
+ else if (clientPreferredSwSet.sw_Z)
+ {
+ addrPreferredSwSet.value = AddrSwSetZ;
+ }
+ else if (clientPreferredSwSet.sw_R)
+ {
+ addrPreferredSwSet.value = AddrSwSetR;
+ }
+ else
+ {
+ ADDR_ASSERT(clientPreferredSwSet.sw_S);
+ addrPreferredSwSet.value = AddrSwSetS;
+ }
}
- if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
+ if ((numFrags > 1) &&
+ (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
{
+ // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
blockSet.macro4KB = FALSE;
}
- if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
+ if (pIn->flags.prt)
{
- blockSet.micro = FALSE;
+ blockSet.value &= AddrBlockSetMacro64KB;
}
- }
- Dim3d blkAlign[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
- Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
- UINT_64 padSize[AddrBlockMaxTiledType] = {0};
+ // Apply customized forbidden setting
+ blockSet.value &= ~pIn->forbiddenBlock.value;
- if (blockSet.micro)
- {
- returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
- &blkAlign[AddrBlockMicro].h,
- &blkAlign[AddrBlockMicro].d,
- bpp,
- numFrags,
- pOut->resourceType,
- ADDR_SW_256B);
-
- if (returnCode == ADDR_OK)
+ if (pIn->maxAlign > 0)
{
- if (displayResource)
+ if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
{
- blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
+ blockSet.macro64KB = FALSE;
}
- else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
- (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
+
+ if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
{
- // If one 256B block can contain the surface, don't bother bigger block type
blockSet.macro4KB = FALSE;
- blockSet.macro64KB = FALSE;
- blockSet.var = FALSE;
}
- padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
- slice, &paddedDim[AddrBlockMicro]);
+ if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
+ {
+ blockSet.micro = FALSE;
+ }
}
- }
- if ((returnCode == ADDR_OK) && blockSet.macro4KB)
- {
- returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
- &blkAlign[AddrBlock4KB].h,
- &blkAlign[AddrBlock4KB].d,
- bpp,
- numFrags,
- pOut->resourceType,
- ADDR_SW_4KB);
+ Dim3d blkAlign[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
+ Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
+ UINT_64 padSize[AddrBlockMaxTiledType] = {0};
- if (returnCode == ADDR_OK)
+ if (blockSet.micro)
{
- if (displayResource)
+ returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
+ &blkAlign[AddrBlockMicro].h,
+ &blkAlign[AddrBlockMicro].d,
+ bpp,
+ numFrags,
+ pOut->resourceType,
+ ADDR_SW_256B);
+
+ if (returnCode == ADDR_OK)
{
- blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
- }
-
- padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
- slice, &paddedDim[AddrBlock4KB]);
+ if (displayResource)
+ {
+ blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
+ }
+ else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
+ (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
+ {
+ // If one 256B block can contain the surface, don't bother bigger block type
+ blockSet.macro4KB = FALSE;
+ blockSet.macro64KB = FALSE;
+ blockSet.var = FALSE;
+ }
- ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
+ padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
+ slice, &paddedDim[AddrBlockMicro]);
+ }
}
- }
- if ((returnCode == ADDR_OK) && blockSet.macro64KB)
- {
- returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
- &blkAlign[AddrBlock64KB].h,
- &blkAlign[AddrBlock64KB].d,
- bpp,
- numFrags,
- pOut->resourceType,
- ADDR_SW_64KB);
-
- if (returnCode == ADDR_OK)
+ if ((returnCode == ADDR_OK) && blockSet.macro4KB)
{
- if (displayResource)
+ returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
+ &blkAlign[AddrBlock4KB].h,
+ &blkAlign[AddrBlock4KB].d,
+ bpp,
+ numFrags,
+ pOut->resourceType,
+ ADDR_SW_4KB);
+
+ if (returnCode == ADDR_OK)
{
- blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
- }
-
- padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
- slice, &paddedDim[AddrBlock64KB]);
+ if (displayResource)
+ {
+ blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
+ }
- ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
- ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
- }
- }
+ padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
+ slice, &paddedDim[AddrBlock4KB]);
- if (returnCode == ADDR_OK)
- {
- for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
- {
- padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
+ ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
+ }
}
- // Use minimum block type which meets all conditions above if flag minimizeAlign was set
- if (pIn->flags.minimizeAlign)
+ if ((returnCode == ADDR_OK) && blockSet.macro64KB)
{
- // If padded size of 64KB block is larger than padded size of 256B block or 4KB
- // block, filter out 64KB block from candidate list
- if (blockSet.macro64KB &&
- ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
- (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
+ returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
+ &blkAlign[AddrBlock64KB].h,
+ &blkAlign[AddrBlock64KB].d,
+ bpp,
+ numFrags,
+ pOut->resourceType,
+ ADDR_SW_64KB);
+
+ if (returnCode == ADDR_OK)
{
- blockSet.macro64KB = FALSE;
- }
+ if (displayResource)
+ {
+ blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
+ }
- // If padded size of 4KB block is larger than padded size of 256B block,
- // filter out 4KB block from candidate list
- if (blockSet.macro4KB &&
- blockSet.micro &&
- (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
- {
- blockSet.macro4KB = FALSE;
+ padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
+ slice, &paddedDim[AddrBlock64KB]);
+
+ ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
+ ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
}
}
- // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
- else if (pIn->flags.opt4space)
- {
- UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
- (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
- threshold += threshold >> 1;
+ if (returnCode == ADDR_OK)
+ {
+ UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
- if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
+ for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
{
- blockSet.macro64KB = FALSE;
+ padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
}
- if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
+ // Use minimum block type which meets all conditions above if flag minimizeAlign was set
+ if (pIn->flags.minimizeAlign)
{
- blockSet.macro4KB = FALSE;
- }
- }
- else
- {
- if (blockSet.macro64KB &&
- (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
- ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
- {
- // If 64KB block waste more than half memory on padding, filter it out from
- // candidate list when it is not the only choice left
- blockSet.macro64KB = FALSE;
- }
- }
-
- if (blockSet.value == 0)
- {
- // Bad things happen, client will not get any useful information from AddrLib.
- // Maybe we should fill in some output earlier instead of outputing nothing?
- ADDR_ASSERT_ALWAYS();
- returnCode = ADDR_INVALIDPARAMS;
- }
- else
- {
- pOut->validBlockSet = blockSet;
- pOut->canXor = pOut->canXor &&
- (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
-
- if (blockSet.macro64KB || blockSet.macro4KB)
- {
- if (swType == ADDR_SW_Z)
+ // If padded size of 64KB block is larger than padded size of 256B block or 4KB
+ // block, filter out 64KB block from candidate list
+ if (blockSet.macro64KB &&
+ ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
+ (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
{
- pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
+ blockSet.macro64KB = FALSE;
}
- else if (swType == ADDR_SW_S)
- {
- pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
- }
- else if (swType == ADDR_SW_D)
+
+ // If padded size of 4KB block is larger than padded size of 256B block,
+ // filter out 4KB block from candidate list
+ if (blockSet.macro4KB &&
+ blockSet.micro &&
+ (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
{
- pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
+ blockSet.macro4KB = FALSE;
}
- else
+ }
+ // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
+ else if (pIn->flags.opt4space)
+ {
+ UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
+ (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
+
+ threshold += threshold >> 1;
+
+ if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
{
- ADDR_ASSERT(swType == ADDR_SW_R);
- pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
+ blockSet.macro64KB = FALSE;
}
- if (prtXor && blockSet.macro64KB)
+ if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
{
- // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
- const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
- pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
+ blockSet.macro4KB = FALSE;
}
- else if (pOut->canXor)
+ }
+ else
+ {
+ if (blockSet.macro64KB &&
+ (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
+ ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
{
- // Client wants XOR and this is allowed, return XOR version swizzle mode
- const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
- pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
+ // If 64KB block waste more than half memory on padding, filter it out from
+ // candidate list when it is not the only choice left
+ blockSet.macro64KB = FALSE;
}
}
- else if (blockSet.micro)
+
+ if (blockSet.value == 0)
{
- if (swType == ADDR_SW_S)
+ // Bad things happen, client will not get any useful information from AddrLib.
+ // Maybe we should fill in some output earlier instead of outputing nothing?
+ ADDR_ASSERT_ALWAYS();
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ else
+ {
+ pOut->validBlockSet = blockSet;
+ pOut->canXor = pOut->canXor &&
+ (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
+
+ if (blockSet.macro64KB || blockSet.macro4KB)
{
- pOut->swizzleMode = ADDR_SW_256B_S;
+ if (addrPreferredSwSet.value == AddrSwSetZ)
+ {
+ pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
+ }
+ else if (addrPreferredSwSet.value == AddrSwSetS)
+ {
+ pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
+ }
+ else if (addrPreferredSwSet.value == AddrSwSetD)
+ {
+ pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
+ }
+ else
+ {
+ ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
+ pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
+ }
+
+ if (prtXor && blockSet.macro64KB)
+ {
+ // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
+ const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
+ pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
+ }
+ else if (pOut->canXor)
+ {
+ // Client wants XOR and this is allowed, return XOR version swizzle mode
+ const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
+ pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
+ }
+ }
+ else if (blockSet.micro)
+ {
+ if (addrPreferredSwSet.value == AddrSwSetS)
+ {
+ pOut->swizzleMode = ADDR_SW_256B_S;
+ }
+ else if (addrPreferredSwSet.value == AddrSwSetD)
+ {
+ pOut->swizzleMode = ADDR_SW_256B_D;
+ }
+ else
+ {
+ ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
+ pOut->swizzleMode = ADDR_SW_256B_R;
+ }
}
- else if (swType == ADDR_SW_D)
+ else if (blockSet.linear)
{
- pOut->swizzleMode = ADDR_SW_256B_D;
+ // Fall into this branch doesn't mean linear is suitable, only no other choices!
+ pOut->swizzleMode = ADDR_SW_LINEAR;
}
else
{
- ADDR_ASSERT(swType == ADDR_SW_R);
- pOut->swizzleMode = ADDR_SW_256B_R;
- }
- }
- else if (blockSet.linear)
- {
- // Fall into this branch doesn't mean linear is suitable, only no other choices!
- pOut->swizzleMode = ADDR_SW_LINEAR;
- }
- else
- {
- ADDR_ASSERT(blockSet.var);
+ ADDR_ASSERT(blockSet.var);
- // Designer consider VAR swizzle mode is usless for most cases
- ADDR_UNHANDLED_CASE();
+ // Designer consider VAR swizzle mode is usless for most cases
+ ADDR_UNHANDLED_CASE();
- returnCode = ADDR_NOTSUPPORTED;
- }
+ returnCode = ADDR_NOTSUPPORTED;
+ }
#if DEBUG
- // Post sanity check, at least AddrLib should accept the output generated by its own
- if (pOut->swizzleMode != ADDR_SW_LINEAR)
- {
- ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
- localIn.flags = pIn->flags;
- localIn.swizzleMode = pOut->swizzleMode;
- localIn.resourceType = pOut->resourceType;
- localIn.format = pIn->format;
- localIn.bpp = bpp;
- localIn.width = width;
- localIn.height = height;
- localIn.numSlices = slice;
- localIn.numMipLevels = numMipLevels;
- localIn.numSamples = numSamples;
- localIn.numFrags = numFrags;
-
- HwlComputeSurfaceInfoSanityCheck(&localIn);
-
- // TODO : check all valid block type available in validBlockSet?
- }
+ // Post sanity check, at least AddrLib should accept the output generated by its own
+ if (pOut->swizzleMode != ADDR_SW_LINEAR)
+ {
+ ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
+ localIn.flags = pIn->flags;
+ localIn.swizzleMode = pOut->swizzleMode;
+ localIn.resourceType = pOut->resourceType;
+ localIn.format = pIn->format;
+ localIn.bpp = bpp;
+ localIn.width = width;
+ localIn.height = height;
+ localIn.numSlices = slice;
+ localIn.numMipLevels = numMipLevels;
+ localIn.numSamples = numSamples;
+ localIn.numFrags = numFrags;
+
+ HwlComputeSurfaceInfoSanityCheck(&localIn);
+
+ }
#endif
+ }
}
}
}
const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
- const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
+ MAYBE_UNUSED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
ADDR_ASSERT(maxYCoordBlock256 ==
GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
- pOut->epitchIsHeight = FALSE;
- pOut->mipChainInTail = FALSE;
+ pOut->epitchIsHeight = FALSE;
+ pOut->mipChainInTail = FALSE;
+ pOut->firstMipIdInTail = pIn->numMipLevels;
- pOut->mipChainPitch = pOut->pitch;
- pOut->mipChainHeight = pOut->height;
- pOut->mipChainSlice = pOut->numSlices;
+ pOut->mipChainPitch = pOut->pitch;
+ pOut->mipChainHeight = pOut->height;
+ pOut->mipChainSlice = pOut->numSlices;
if (pIn->numMipLevels > 1)
{
- UINT_32 numMipLevel;
- ADDR2_MIP_INFO *pMipInfo;
- ADDR2_MIP_INFO mipInfo[4];
-
- if (pOut->pMipInfo != NULL)
+ pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
+ pIn->swizzleMode,
+ pIn->bpp,
+ pIn->width,
+ pIn->height,
+ pIn->numSlices,
+ pOut->blockWidth,
+ pOut->blockHeight,
+ pOut->blockSlices,
+ pIn->numMipLevels,
+ pOut->pMipInfo);
+
+ const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
+
+ if (endingMipId == 0)
{
- pMipInfo = pOut->pMipInfo;
- numMipLevel = pIn->numMipLevels;
- }
- else
- {
- pMipInfo = mipInfo;
- numMipLevel = Min(pIn->numMipLevels, 4u);
- }
+ const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
+ pIn->swizzleMode,
+ pOut->blockWidth,
+ pOut->blockHeight,
+ pOut->blockSlices);
- UINT_32 endingMip = GetMipChainInfo(pIn->resourceType,
- pIn->swizzleMode,
- pIn->bpp,
- pIn->width,
- pIn->height,
- pIn->numSlices,
- pOut->blockWidth,
- pOut->blockHeight,
- pOut->blockSlices,
- numMipLevel,
- pMipInfo);
-
- if (endingMip == 0)
- {
pOut->epitchIsHeight = TRUE;
- pOut->pitch = pMipInfo[0].pitch;
- pOut->height = pMipInfo[0].height;
- pOut->numSlices = pMipInfo[0].depth;
+ pOut->pitch = tailMaxDim.w;
+ pOut->height = tailMaxDim.h;
+ pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
+ tailMaxDim.d : pIn->numSlices;
pOut->mipChainInTail = TRUE;
}
else
{
- UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
+ UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
{
UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
- if ((mip1WidthInBlk == 1) && (endingMip > 2))
+ if ((mip1WidthInBlk == 1) && (endingMipId > 2))
{
mip1WidthInBlk++;
}
{
UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
- if ((mip1HeightInBlk == 1) && (endingMip > 2))
+ if ((mip1HeightInBlk == 1) && (endingMipId > 2))
{
mip1HeightInBlk++;
}
UINT_64 macroBlockOffset =
blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
- pMipInfo[i].macroBlockOffset = macroBlockOffset;
- pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
+ pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
+ pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
}
}
}
else if (pOut->pMipInfo != NULL)
{
- pOut->pMipInfo[0].pitch = pOut->pitch;
+ pOut->pMipInfo[0].pitch = pOut->pitch;
pOut->pMipInfo[0].height = pOut->height;
- pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
+ pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
pOut->pMipInfo[0].offset = 0;
}
pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
(pIn->bpp >> 3) * pIn->numFrags;
- pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
- pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode);
+ pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
+ pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
if (pIn->flags.prt)
{
return returnCode;
}
+/**
+************************************************************************************************************************
+* Gfx9Lib::HwlComputeSurfaceInfoLinear
+*
+* @brief
+* Internal function to calculate alignment for linear surface
+*
+* @return
+* ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
+ const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
+ ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+ UINT_32 pitch = 0;
+ UINT_32 actualHeight = 0;
+ UINT_32 elementBytes = pIn->bpp >> 3;
+ const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
+
+ if (IsTex1d(pIn->resourceType))
+ {
+ if (pIn->height > 1)
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ else
+ {
+ const UINT_32 pitchAlignInElement = alignment / elementBytes;
+
+ pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
+ actualHeight = pIn->numMipLevels;
+
+ if (pIn->flags.prt == FALSE)
+ {
+ returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
+ &pitch, &actualHeight);
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ if (pOut->pMipInfo != NULL)
+ {
+ for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
+ {
+ pOut->pMipInfo[i].offset = pitch * elementBytes * i;
+ pOut->pMipInfo[i].pitch = pitch;
+ pOut->pMipInfo[i].height = 1;
+ pOut->pMipInfo[i].depth = 1;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
+ }
+
+ if ((pitch == 0) || (actualHeight == 0))
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ pOut->pitch = pitch;
+ pOut->height = pIn->height;
+ pOut->numSlices = pIn->numSlices;
+ pOut->mipChainPitch = pitch;
+ pOut->mipChainHeight = actualHeight;
+ pOut->mipChainSlice = pOut->numSlices;
+ pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
+ pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
+ pOut->surfSize = pOut->sliceSize * pOut->numSlices;
+ pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
+ pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
+ pOut->blockHeight = 1;
+ pOut->blockSlices = 1;
+ }
+
+ // Post calculation validate
+ ADDR_ASSERT(pOut->sliceSize > 0);
+
+ return returnCode;
+}
+
/**
************************************************************************************************************************
* Gfx9Lib::GetMipChainInfo
const Dim3d tailMaxDim =
GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
- UINT_32 mipPitch = mip0Width;
- UINT_32 mipHeight = mip0Height;
- UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
- UINT_32 offset = 0;
- UINT_32 endingMip = numMipLevel - 1;
- BOOL_32 inTail = FALSE;
- BOOL_32 finalDim = FALSE;
-
- BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
- BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
+ UINT_32 mipPitch = mip0Width;
+ UINT_32 mipHeight = mip0Height;
+ UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
+ UINT_32 offset = 0;
+ UINT_32 firstMipIdInTail = numMipLevel;
+ BOOL_32 inTail = FALSE;
+ BOOL_32 finalDim = FALSE;
+ BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
+ BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
{
if (inTail)
{
- endingMip = mipId;
-
- mipPitch = tailMaxDim.w;
- mipHeight = tailMaxDim.h;
+ firstMipIdInTail = mipId;
+ mipPitch = tailMaxDim.w;
+ mipHeight = tailMaxDim.h;
if (is3dThick)
{
}
}
- pMipInfo[mipId].pitch = mipPitch;
- pMipInfo[mipId].height = mipHeight;
- pMipInfo[mipId].depth = mipDepth;
- pMipInfo[mipId].offset = offset;
+ if (pMipInfo != NULL)
+ {
+ pMipInfo[mipId].pitch = mipPitch;
+ pMipInfo[mipId].height = mipHeight;
+ pMipInfo[mipId].depth = mipDepth;
+ pMipInfo[mipId].offset = offset;
+ }
+
offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
if (finalDim)
}
}
- return endingMip;
+ return firstMipIdInTail;
}
/**
Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
) const
{
- BOOL_32 isThick = (pMetaBlkDim->d > 1);
+ BOOL_32 isThick = (pMetaBlkDim->d > 1);
UINT_32 mipWidth = pMetaBlkDim->w;
UINT_32 mipHeight = pMetaBlkDim->h >> 1;
UINT_32 mipDepth = pMetaBlkDim->d;
UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
- UINT_32 macroBlockIndex =
+ UINT_64 macroBlockIndex =
(pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
((pIn->x / localOut.blockWidth) + mipStartPos.w);
- UINT_64 macroBlockOffset = (static_cast<UINT_64>(macroBlockIndex) <<
- GetBlockSizeLog2(pIn->swizzleMode));
-
- pOut->addr = blockOffset | macroBlockOffset;
+ pOut->addr = blockOffset | (macroBlockIndex << log2blkSize);
}
else
{
UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
UINT_32 sliceSizeInBlock =
(localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
- UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
+ UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
pOut->addr = blockOffset | (blockIndex << log2blkSize);
}
return returnCode;
}
+/**
+************************************************************************************************************************
+* Gfx9Lib::ComputeSurfaceInfoLinear
+*
+* @brief
+* Internal function to calculate padding for linear swizzle 2D/3D surface
+*
+* @return
+* N/A
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
+ const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
+ UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
+ UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
+ ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ UINT_32 elementBytes = pIn->bpp >> 3;
+ UINT_32 pitchAlignInElement = 0;
+
+ if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
+ {
+ ADDR_ASSERT(pIn->numMipLevels <= 1);
+ ADDR_ASSERT(pIn->numSlices <= 1);
+ pitchAlignInElement = 1;
+ }
+ else
+ {
+ pitchAlignInElement = (256 / elementBytes);
+ }
+
+ UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
+ UINT_32 slice0PaddedHeight = pIn->height;
+
+ returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
+ &mipChainWidth, &slice0PaddedHeight);
+
+ if (returnCode == ADDR_OK)
+ {
+ UINT_32 mipChainHeight = 0;
+ UINT_32 mipHeight = pIn->height;
+
+ for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
+ {
+ if (pMipInfo != NULL)
+ {
+ pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
+ pMipInfo[i].pitch = mipChainWidth;
+ pMipInfo[i].height = mipHeight;
+ pMipInfo[i].depth = 1;
+ }
+
+ mipChainHeight += mipHeight;
+ mipHeight = RoundHalf(mipHeight);
+ mipHeight = Max(mipHeight, 1u);
+ }
+
+ *pMipmap0PaddedWidth = mipChainWidth;
+ *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
+ }
+
+ return returnCode;
+}
+
} // V2
} // Addr