This decreases the DCC retile map overhead from 23% to 18%.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5398>
UINT_32 numFrags; ///< Color surface fragment number
UINT_32 pipeXor; ///< pipe Xor setting
+ UINT_32 pitch; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::pitch
+ UINT_32 height; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::height
+ UINT_32 compressBlkWidth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkWidth
+ UINT_32 compressBlkHeight; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkHeight
+ UINT_32 compressBlkDepth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkDepth
+ UINT_32 metaBlkWidth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkWidth
+ UINT_32 metaBlkHeight; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkHeight
+ UINT_32 metaBlkDepth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkDepth
+ UINT_32 dccRamSliceSize; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::dccRamSliceSize
} ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT;
/**
}
else
{
- ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
- input.size = sizeof(input);
- input.dccKeyFlags = pIn->dccKeyFlags;
- input.colorFlags = pIn->colorFlags;
- input.swizzleMode = pIn->swizzleMode;
- input.resourceType = pIn->resourceType;
- input.bpp = pIn->bpp;
- input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
- input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
- input.numSlices = Max(pIn->numSlices, 1u);
- input.numFrags = Max(pIn->numFrags, 1u);
- input.numMipLevels = Max(pIn->numMipLevels, 1u);
+ const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
+ const UINT_32 numPipeLog2 = m_pipesLog2;
+ const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
+ UINT_32 index = m_dccBaseIndex + elemLog2;
+ const UINT_8* patIdxTable;
- ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
- output.size = sizeof(output);
-
- returnCode = ComputeDccInfo(&input, &output);
-
- if (returnCode == ADDR_OK)
+ if (m_settings.supportRbPlus)
{
- const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
- const UINT_32 numPipeLog2 = m_pipesLog2;
- const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
- UINT_32 index = m_dccBaseIndex + elemLog2;
- const UINT_8* patIdxTable;
+ patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
- if (m_settings.supportRbPlus)
+ if (pIn->dccKeyFlags.pipeAligned)
{
- patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
+ index += MaxNumOfBpp;
- if (pIn->dccKeyFlags.pipeAligned)
+ if (m_numPkrLog2 < 2)
{
- index += MaxNumOfBpp;
-
- if (m_numPkrLog2 < 2)
- {
- index += m_pipesLog2 * MaxNumOfBpp;
- }
- else
- {
- // 4 groups for "m_numPkrLog2 < 2" case
- index += 4 * MaxNumOfBpp;
+ index += m_pipesLog2 * MaxNumOfBpp;
+ }
+ else
+ {
+ // 4 groups for "m_numPkrLog2 < 2" case
+ index += 4 * MaxNumOfBpp;
- const UINT_32 dccPipePerPkr = 3;
+ const UINT_32 dccPipePerPkr = 3;
- index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
- (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
- }
+ index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
+ (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
}
}
+ }
+ else
+ {
+ patIdxTable = DCC_64K_R_X_PATIDX;
+
+ if (pIn->dccKeyFlags.pipeAligned)
+ {
+ index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
+ }
else
{
- patIdxTable = DCC_64K_R_X_PATIDX;
-
- if (pIn->dccKeyFlags.pipeAligned)
- {
- index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
- }
- else
- {
- index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
- }
+ index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
}
+ }
- const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) + elemLog2 - 8;
- const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
- const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
- blkSizeLog2 + 1, // +1 for nibble offset
- pIn->x,
- pIn->y,
- pIn->slice,
- 0);
- const UINT_32 xb = pIn->x / output.metaBlkWidth;
- const UINT_32 yb = pIn->y / output.metaBlkHeight;
- const UINT_32 pb = output.pitch / output.metaBlkWidth;
- const UINT_32 blkIndex = (yb * pb) + xb;
- const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
+ const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
+ const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
+ const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
+ blkSizeLog2 + 1, // +1 for nibble offset
+ pIn->x,
+ pIn->y,
+ pIn->slice,
+ 0);
+ const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
+ const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
+ const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
+ const UINT_32 blkIndex = (yb * pb) + xb;
+ const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
- pOut->addr = (static_cast<UINT_64>(output.dccRamSliceSize) * pIn->slice) +
- (blkIndex * (1 << blkSizeLog2)) +
- ((blkOffset >> 1) ^ pipeXor);
- }
+ pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
+ (blkIndex * (1 << blkSizeLog2)) +
+ ((blkOffset >> 1) ^ pipeXor);
}
return returnCode;
}
else
{
- ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
- input.size = sizeof(input);
- input.dccKeyFlags = pIn->dccKeyFlags;
- input.colorFlags = pIn->colorFlags;
- input.swizzleMode = pIn->swizzleMode;
- input.resourceType = pIn->resourceType;
- input.bpp = pIn->bpp;
- input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
- input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
- input.numSlices = Max(pIn->numSlices, 1u);
- input.numFrags = Max(pIn->numFrags, 1u);
- input.numMipLevels = Max(pIn->numMipLevels, 1u);
-
- ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
- output.size = sizeof(output);
-
- returnCode = ComputeDccInfo(&input, &output);
+ UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
+ UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
+ UINT_32 metaBlkWidthLog2 = Log2(pIn->metaBlkWidth);
+ UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
+ UINT_32 metaBlkDepthLog2 = Log2(pIn->metaBlkDepth);
+ UINT_32 compBlkWidthLog2 = Log2(pIn->compressBlkWidth);
+ UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
+ UINT_32 compBlkDepthLog2 = Log2(pIn->compressBlkDepth);
+
+ MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
+ Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
+ metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
+ compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
- if (returnCode == ADDR_OK)
- {
- UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
- UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
- UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
- UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
- UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
- UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
- UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
- UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
-
- MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
- Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
- metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
- compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
+ const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
- const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
+ UINT_32 xb = pIn->x / pIn->metaBlkWidth;
+ UINT_32 yb = pIn->y / pIn->metaBlkHeight;
+ UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
- UINT_32 xb = pIn->x / output.metaBlkWidth;
- UINT_32 yb = pIn->y / output.metaBlkHeight;
- UINT_32 zb = pIn->slice / output.metaBlkDepth;
-
- UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
- UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
- UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
+ UINT_32 pitchInBlock = pIn->pitch / pIn->metaBlkWidth;
+ UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
+ UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
- UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
- UINT_64 address = pMetaEq->solve(coords);
+ UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
+ UINT_64 address = pMetaEq->solve(coords);
- pOut->addr = address >> 1;
+ pOut->addr = address >> 1;
- UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
- pIn->swizzleMode);
+ UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
+ pIn->swizzleMode);
- UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
+ UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
- pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
- }
+ pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
}
return returnCode;
addrin.numSlices = 1;
addrin.numMipLevels = 1;
addrin.numFrags = 1;
+ addrin.pitch = dout.pitch;
+ addrin.height = dout.height;
+ addrin.compressBlkWidth = dout.compressBlkWidth;
+ addrin.compressBlkHeight = dout.compressBlkHeight;
+ addrin.compressBlkDepth = dout.compressBlkDepth;
+ addrin.metaBlkWidth = dout.metaBlkWidth;
+ addrin.metaBlkHeight = dout.metaBlkHeight;
+ addrin.metaBlkDepth = dout.metaBlkDepth;
+ addrin.dccRamSliceSize = dout.dccRamSliceSize;
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {};
addrout.size = sizeof(addrout);