From a99f4d5382c2a3053c2938f9035b8872ab2c542f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 9 Jun 2020 02:40:20 -0400 Subject: [PATCH] amd/addrlib: don't recompute DCC info for every ComputeDccAddrFromCoord call This decreases the DCC retile map overhead from 23% to 18%. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/addrlib/inc/addrinterface.h | 9 ++ src/amd/addrlib/src/gfx10/gfx10addrlib.cpp | 113 +++++++++------------ src/amd/addrlib/src/gfx9/gfx9addrlib.cpp | 75 +++++--------- src/amd/common/ac_surface.c | 9 ++ 4 files changed, 91 insertions(+), 115 deletions(-) diff --git a/src/amd/addrlib/inc/addrinterface.h b/src/amd/addrlib/inc/addrinterface.h index 5fb3c46e489..b640dee272e 100644 --- a/src/amd/addrlib/inc/addrinterface.h +++ b/src/amd/addrlib/inc/addrinterface.h @@ -3360,6 +3360,15 @@ typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT UINT_32 numFrags; ///< Color surface fragment number UINT_32 pipeXor; ///< pipe Xor setting + UINT_32 pitch; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::pitch + UINT_32 height; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::height + UINT_32 compressBlkWidth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkWidth + UINT_32 compressBlkHeight; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkHeight + UINT_32 compressBlkDepth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkDepth + UINT_32 metaBlkWidth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkWidth + UINT_32 metaBlkHeight; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkHeight + UINT_32 metaBlkDepth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkDepth + UINT_32 dccRamSliceSize; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::dccRamSliceSize } ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT; /** diff --git a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp index 49f31550c19..4033c2398d7 100644 --- a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp +++ b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp @@ -673,88 +673,67 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord( } else { - ADDR2_COMPUTE_DCCINFO_INPUT input = {0}; - input.size = sizeof(input); - input.dccKeyFlags = pIn->dccKeyFlags; - input.colorFlags = pIn->colorFlags; - input.swizzleMode = pIn->swizzleMode; - input.resourceType = pIn->resourceType; - input.bpp = pIn->bpp; - input.unalignedWidth = Max(pIn->unalignedWidth, 1u); - input.unalignedHeight = Max(pIn->unalignedHeight, 1u); - input.numSlices = Max(pIn->numSlices, 1u); - input.numFrags = Max(pIn->numFrags, 1u); - input.numMipLevels = Max(pIn->numMipLevels, 1u); + const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); + const UINT_32 numPipeLog2 = m_pipesLog2; + const UINT_32 pipeMask = (1 << numPipeLog2) - 1; + UINT_32 index = m_dccBaseIndex + elemLog2; + const UINT_8* patIdxTable; - ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0}; - output.size = sizeof(output); - - returnCode = ComputeDccInfo(&input, &output); - - if (returnCode == ADDR_OK) + if (m_settings.supportRbPlus) { - const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); - const UINT_32 numPipeLog2 = m_pipesLog2; - const UINT_32 pipeMask = (1 << numPipeLog2) - 1; - UINT_32 index = m_dccBaseIndex + elemLog2; - const UINT_8* patIdxTable; + patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX; - if (m_settings.supportRbPlus) + if (pIn->dccKeyFlags.pipeAligned) { - patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX; + index += MaxNumOfBpp; - if (pIn->dccKeyFlags.pipeAligned) + if (m_numPkrLog2 < 2) { - index += MaxNumOfBpp; - - if (m_numPkrLog2 < 2) - { - index += m_pipesLog2 * MaxNumOfBpp; - } - else - { - // 4 groups for "m_numPkrLog2 < 2" case - index += 4 * MaxNumOfBpp; + index += m_pipesLog2 * MaxNumOfBpp; + } + else + { + // 4 groups for "m_numPkrLog2 < 2" case + index += 4 * MaxNumOfBpp; - const UINT_32 dccPipePerPkr = 3; + const UINT_32 dccPipePerPkr = 3; - index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp + - (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp; - } + index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp + + (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp; } } + } + else + { + patIdxTable = DCC_64K_R_X_PATIDX; + + if (pIn->dccKeyFlags.pipeAligned) + { + index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp; + } else { - patIdxTable = DCC_64K_R_X_PATIDX; - - if (pIn->dccKeyFlags.pipeAligned) - { - index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp; - } - else - { - index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp; - } + index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp; } + } - const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) + elemLog2 - 8; - const UINT_32 blkMask = (1 << blkSizeLog2) - 1; - const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]], - blkSizeLog2 + 1, // +1 for nibble offset - pIn->x, - pIn->y, - pIn->slice, - 0); - const UINT_32 xb = pIn->x / output.metaBlkWidth; - const UINT_32 yb = pIn->y / output.metaBlkHeight; - const UINT_32 pb = output.pitch / output.metaBlkWidth; - const UINT_32 blkIndex = (yb * pb) + xb; - const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask; + const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8; + const UINT_32 blkMask = (1 << blkSizeLog2) - 1; + const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]], + blkSizeLog2 + 1, // +1 for nibble offset + pIn->x, + pIn->y, + pIn->slice, + 0); + const UINT_32 xb = pIn->x / pIn->metaBlkWidth; + const UINT_32 yb = pIn->y / pIn->metaBlkHeight; + const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth; + const UINT_32 blkIndex = (yb * pb) + xb; + const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask; - pOut->addr = (static_cast(output.dccRamSliceSize) * pIn->slice) + - (blkIndex * (1 << blkSizeLog2)) + - ((blkOffset >> 1) ^ pipeXor); - } + pOut->addr = (static_cast(pIn->dccRamSliceSize) * pIn->slice) + + (blkIndex * (1 << blkSizeLog2)) + + ((blkOffset >> 1) ^ pipeXor); } return returnCode; diff --git a/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp index ece83592fc9..cc4d5af4c4f 100644 --- a/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp +++ b/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp @@ -987,62 +987,41 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord( } else { - ADDR2_COMPUTE_DCCINFO_INPUT input = {0}; - input.size = sizeof(input); - input.dccKeyFlags = pIn->dccKeyFlags; - input.colorFlags = pIn->colorFlags; - input.swizzleMode = pIn->swizzleMode; - input.resourceType = pIn->resourceType; - input.bpp = pIn->bpp; - input.unalignedWidth = Max(pIn->unalignedWidth, 1u); - input.unalignedHeight = Max(pIn->unalignedHeight, 1u); - input.numSlices = Max(pIn->numSlices, 1u); - input.numFrags = Max(pIn->numFrags, 1u); - input.numMipLevels = Max(pIn->numMipLevels, 1u); - - ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0}; - output.size = sizeof(output); - - returnCode = ComputeDccInfo(&input, &output); + UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); + UINT_32 numSamplesLog2 = Log2(pIn->numFrags); + UINT_32 metaBlkWidthLog2 = Log2(pIn->metaBlkWidth); + UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight); + UINT_32 metaBlkDepthLog2 = Log2(pIn->metaBlkDepth); + UINT_32 compBlkWidthLog2 = Log2(pIn->compressBlkWidth); + UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight); + UINT_32 compBlkDepthLog2 = Log2(pIn->compressBlkDepth); + + MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, + Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, + metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, + compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2}; - if (returnCode == ADDR_OK) - { - UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); - UINT_32 numSamplesLog2 = Log2(pIn->numFrags); - UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); - UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); - UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth); - UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth); - UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight); - UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth); - - MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, - Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, - metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, - compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2}; + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); - const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); + UINT_32 xb = pIn->x / pIn->metaBlkWidth; + UINT_32 yb = pIn->y / pIn->metaBlkHeight; + UINT_32 zb = pIn->slice / pIn->metaBlkDepth; - UINT_32 xb = pIn->x / output.metaBlkWidth; - UINT_32 yb = pIn->y / output.metaBlkHeight; - UINT_32 zb = pIn->slice / output.metaBlkDepth; - - UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; - UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; - UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; + UINT_32 pitchInBlock = pIn->pitch / pIn->metaBlkWidth; + UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock; + UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; - UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex }; - UINT_64 address = pMetaEq->solve(coords); + UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex }; + UINT_64 address = pMetaEq->solve(coords); - pOut->addr = address >> 1; + pOut->addr = address >> 1; - UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned, - pIn->swizzleMode); + UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned, + pIn->swizzleMode); - UINT_64 pipeXor = static_cast(pIn->pipeXor & ((1 << numPipeBits) - 1)); + UINT_64 pipeXor = static_cast(pIn->pipeXor & ((1 << numPipeBits) - 1)); - pOut->addr ^= (pipeXor << m_pipeInterleaveLog2); - } + pOut->addr ^= (pipeXor << m_pipeInterleaveLog2); } return returnCode; diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index c6180865e23..732aea871d6 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -1378,6 +1378,15 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, addrin.numSlices = 1; addrin.numMipLevels = 1; addrin.numFrags = 1; + addrin.pitch = dout.pitch; + addrin.height = dout.height; + addrin.compressBlkWidth = dout.compressBlkWidth; + addrin.compressBlkHeight = dout.compressBlkHeight; + addrin.compressBlkDepth = dout.compressBlkDepth; + addrin.metaBlkWidth = dout.metaBlkWidth; + addrin.metaBlkHeight = dout.metaBlkHeight; + addrin.metaBlkDepth = dout.metaBlkDepth; + addrin.dccRamSliceSize = dout.dccRamSliceSize; ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {}; addrout.size = sizeof(addrout); -- 2.30.2