amd/addrlib: don't recompute DCC info for every ComputeDccAddrFromCoord call
authorMarek Olšák <marek.olsak@amd.com>
Tue, 9 Jun 2020 06:40:20 +0000 (02:40 -0400)
committerMarge Bot <eric+marge@anholt.net>
Wed, 10 Jun 2020 15:35:46 +0000 (15:35 +0000)
This decreases the DCC retile map overhead from 23% to 18%.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5398>

src/amd/addrlib/inc/addrinterface.h
src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
src/amd/addrlib/src/gfx9/gfx9addrlib.cpp
src/amd/common/ac_surface.c

index 5fb3c46e489600235a8b7af4ec9e1a9c390e5453..b640dee272eff2ebe8de4830aa651dca137d5535 100644 (file)
@@ -3360,6 +3360,15 @@ typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
     UINT_32             numFrags;            ///< Color surface fragment number
 
     UINT_32             pipeXor;             ///< pipe Xor setting
+    UINT_32             pitch;               ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::pitch
+    UINT_32             height;              ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::height
+    UINT_32             compressBlkWidth;    ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkWidth
+    UINT_32             compressBlkHeight;   ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkHeight
+    UINT_32             compressBlkDepth;    ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkDepth
+    UINT_32             metaBlkWidth;        ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkWidth
+    UINT_32             metaBlkHeight;       ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkHeight
+    UINT_32             metaBlkDepth;        ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkDepth
+    UINT_32             dccRamSliceSize;     ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::dccRamSliceSize
 } ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT;
 
 /**
index 49f31550c190733a707282d929fdf01120bba17c..4033c2398d7b9e29c23958c17817510a31dbfe4d 100644 (file)
@@ -673,88 +673,67 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord(
     }
     else
     {
-        ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
-        input.size            = sizeof(input);
-        input.dccKeyFlags     = pIn->dccKeyFlags;
-        input.colorFlags      = pIn->colorFlags;
-        input.swizzleMode     = pIn->swizzleMode;
-        input.resourceType    = pIn->resourceType;
-        input.bpp             = pIn->bpp;
-        input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
-        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
-        input.numSlices       = Max(pIn->numSlices,       1u);
-        input.numFrags        = Max(pIn->numFrags,        1u);
-        input.numMipLevels    = Max(pIn->numMipLevels,    1u);
+        const UINT_32  elemLog2    = Log2(pIn->bpp >> 3);
+        const UINT_32  numPipeLog2 = m_pipesLog2;
+        const UINT_32  pipeMask    = (1 << numPipeLog2) - 1;
+        UINT_32        index       = m_dccBaseIndex + elemLog2;
+        const UINT_8*  patIdxTable;
 
-        ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
-        output.size = sizeof(output);
-
-        returnCode = ComputeDccInfo(&input, &output);
-
-        if (returnCode == ADDR_OK)
+        if (m_settings.supportRbPlus)
         {
-            const UINT_32  elemLog2    = Log2(pIn->bpp >> 3);
-            const UINT_32  numPipeLog2 = m_pipesLog2;
-            const UINT_32  pipeMask    = (1 << numPipeLog2) - 1;
-            UINT_32        index       = m_dccBaseIndex + elemLog2;
-            const UINT_8*  patIdxTable;
+            patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
 
-            if (m_settings.supportRbPlus)
+            if (pIn->dccKeyFlags.pipeAligned)
             {
-                patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
+                index += MaxNumOfBpp;
 
-                if (pIn->dccKeyFlags.pipeAligned)
+                if (m_numPkrLog2 < 2)
                 {
-                    index += MaxNumOfBpp;
-
-                    if (m_numPkrLog2 < 2)
-                    {
-                        index += m_pipesLog2 * MaxNumOfBpp;
-                    }
-                    else
-                    {
-                        // 4 groups for "m_numPkrLog2 < 2" case
-                        index += 4 * MaxNumOfBpp;
+                    index += m_pipesLog2 * MaxNumOfBpp;
+                }
+                else
+                {
+                    // 4 groups for "m_numPkrLog2 < 2" case
+                    index += 4 * MaxNumOfBpp;
 
-                        const UINT_32 dccPipePerPkr = 3;
+                    const UINT_32 dccPipePerPkr = 3;
 
-                        index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
-                                 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
-                    }
+                    index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
+                             (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
                 }
             }
+        }
+        else
+        {
+            patIdxTable = DCC_64K_R_X_PATIDX;
+
+            if (pIn->dccKeyFlags.pipeAligned)
+            {
+                index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
+            }
             else
             {
-                patIdxTable = DCC_64K_R_X_PATIDX;
-
-                if (pIn->dccKeyFlags.pipeAligned)
-                {
-                    index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
-                }
-                else
-                {
-                    index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
-                }
+                index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
             }
+        }
 
-            const UINT_32  blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) + elemLog2 - 8;
-            const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
-            const UINT_32  blkOffset   = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
-                                                                         blkSizeLog2 + 1, // +1 for nibble offset
-                                                                         pIn->x,
-                                                                         pIn->y,
-                                                                         pIn->slice,
-                                                                         0);
-            const UINT_32 xb       = pIn->x / output.metaBlkWidth;
-            const UINT_32 yb       = pIn->y / output.metaBlkHeight;
-            const UINT_32 pb       = output.pitch / output.metaBlkWidth;
-            const UINT_32 blkIndex = (yb * pb) + xb;
-            const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
+        const UINT_32  blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
+        const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
+        const UINT_32  blkOffset   = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
+                                                                     blkSizeLog2 + 1, // +1 for nibble offset
+                                                                     pIn->x,
+                                                                     pIn->y,
+                                                                     pIn->slice,
+                                                                     0);
+        const UINT_32 xb       = pIn->x / pIn->metaBlkWidth;
+        const UINT_32 yb       = pIn->y / pIn->metaBlkHeight;
+        const UINT_32 pb       = pIn->pitch / pIn->metaBlkWidth;
+        const UINT_32 blkIndex = (yb * pb) + xb;
+        const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
 
-            pOut->addr = (static_cast<UINT_64>(output.dccRamSliceSize) * pIn->slice) +
-                         (blkIndex * (1 << blkSizeLog2)) +
-                         ((blkOffset >> 1) ^ pipeXor);
-        }
+        pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
+                     (blkIndex * (1 << blkSizeLog2)) +
+                     ((blkOffset >> 1) ^ pipeXor);
     }
 
     return returnCode;
index ece83592fc98ea4d60809a6f5e39f72b2f3100f2..cc4d5af4c4fa8eed41a2f623778fc05b2e0175eb 100644 (file)
@@ -987,62 +987,41 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
     }
     else
     {
-        ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
-        input.size            = sizeof(input);
-        input.dccKeyFlags     = pIn->dccKeyFlags;
-        input.colorFlags      = pIn->colorFlags;
-        input.swizzleMode     = pIn->swizzleMode;
-        input.resourceType    = pIn->resourceType;
-        input.bpp             = pIn->bpp;
-        input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
-        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
-        input.numSlices       = Max(pIn->numSlices, 1u);
-        input.numFrags        = Max(pIn->numFrags, 1u);
-        input.numMipLevels    = Max(pIn->numMipLevels, 1u);
-
-        ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
-        output.size = sizeof(output);
-
-        returnCode = ComputeDccInfo(&input, &output);
+        UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
+        UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
+        UINT_32 metaBlkWidthLog2  = Log2(pIn->metaBlkWidth);
+        UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
+        UINT_32 metaBlkDepthLog2  = Log2(pIn->metaBlkDepth);
+        UINT_32 compBlkWidthLog2  = Log2(pIn->compressBlkWidth);
+        UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
+        UINT_32 compBlkDepthLog2  = Log2(pIn->compressBlkDepth);
+
+        MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
+                                     Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
+                                     metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
+                                     compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
 
-        if (returnCode == ADDR_OK)
-        {
-            UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
-            UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
-            UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
-            UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
-            UINT_32 metaBlkDepthLog2  = Log2(output.metaBlkDepth);
-            UINT_32 compBlkWidthLog2  = Log2(output.compressBlkWidth);
-            UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
-            UINT_32 compBlkDepthLog2  = Log2(output.compressBlkDepth);
-
-            MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
-                                         Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
-                                         metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
-                                         compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
+        const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 
-            const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
+        UINT_32 xb = pIn->x / pIn->metaBlkWidth;
+        UINT_32 yb = pIn->y / pIn->metaBlkHeight;
+        UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
 
-            UINT_32 xb = pIn->x / output.metaBlkWidth;
-            UINT_32 yb = pIn->y / output.metaBlkHeight;
-            UINT_32 zb = pIn->slice / output.metaBlkDepth;
-
-            UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
-            UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
-            UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
+        UINT_32 pitchInBlock     = pIn->pitch / pIn->metaBlkWidth;
+        UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
+        UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 
-            UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
-            UINT_64 address = pMetaEq->solve(coords);
+        UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
+        UINT_64 address = pMetaEq->solve(coords);
 
-            pOut->addr = address >> 1;
+        pOut->addr = address >> 1;
 
-            UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
-                                                               pIn->swizzleMode);
+        UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
+                                                           pIn->swizzleMode);
 
-            UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
+        UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 
-            pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
-        }
+        pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
     }
 
     return returnCode;
index c6180865e23229252aef57d2d1c077d9f3aa8fa2..732aea871d68cfc78fdd89c679ee772db257d610 100644 (file)
@@ -1378,6 +1378,15 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
                                        addrin.numSlices        = 1;
                                        addrin.numMipLevels     = 1;
                                        addrin.numFrags         = 1;
+                                       addrin.pitch            = dout.pitch;
+                                       addrin.height           = dout.height;
+                                       addrin.compressBlkWidth = dout.compressBlkWidth;
+                                       addrin.compressBlkHeight = dout.compressBlkHeight;
+                                       addrin.compressBlkDepth = dout.compressBlkDepth;
+                                       addrin.metaBlkWidth     = dout.metaBlkWidth;
+                                       addrin.metaBlkHeight    = dout.metaBlkHeight;
+                                       addrin.metaBlkDepth     = dout.metaBlkDepth;
+                                       addrin.dccRamSliceSize  = dout.dccRamSliceSize;
 
                                        ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {};
                                        addrout.size = sizeof(addrout);