From: Nicolai Hähnle Date: Wed, 20 Jul 2016 18:25:15 +0000 (+0200) Subject: amdgpu/addrlib: add equation generation X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=10f7d1cb0321c5c15b9eb318c2c59ea0d9ac14db;p=mesa.git amdgpu/addrlib: add equation generation 1. Add new surface flags needEquation for client driver use to force the surface tile setting equation compatible. Override 2D/3D macro tile mode to PRT_* tile mode if this flag is TRUE and num slice > 1. 2. Add numEquations and pEquationTable in ADDR_CREATE_OUTPUT structure to return number of equations and the equation table to client driver 3. Add equationIndex in ADDR_COMPUTE_SURFACE_INFO_OUTPUT structure to return the equation index to client driver Please note the use of address equation has following restrictions: 1) The surface can't be splitable 2) The surface can't have non zero tile swizzle value 3) Surface with > 1 slices must have PRT tile mode, which disable slice rotation --- diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h index cc1024b34e2..95b187fbebc 100644 --- a/src/amd/addrlib/addrinterface.h +++ b/src/amd/addrlib/addrinterface.h @@ -117,6 +117,65 @@ typedef VOID* ADDR_CLIENT_HANDLE; // Callback functions /////////////////////////////////////////////////////////////////////////////////////////////////// +/** +*************************************************************************************************** +* @brief channel setting structure +*************************************************************************************************** +*/ +typedef union _ADDR_CHANNEL_SETTING +{ + struct + { + UINT_8 valid : 1; ///< Indicate whehter this channel setting is valid + UINT_8 channel : 2; ///< 0 for x channel, 1 for y channel, 2 for z channel + UINT_8 index : 5; ///< Channel index + }; + UINT_8 value; ///< Value +} ADDR_CHANNEL_SETTING; + +/** +*************************************************************************************************** +* @brief address equation key structure +*************************************************************************************************** +*/ +typedef union _ADDR_EQUATION_KEY +{ + struct + { + UINT_32 log2ElementBytes : 3; ///< Log2 of Bytes per pixel + UINT_32 tileMode : 5; ///< Tile mode + UINT_32 microTileType : 3; ///< Micro tile type + UINT_32 pipeConfig : 5; ///< pipe config + UINT_32 numBanks : 5; ///< Number of banks + UINT_32 bankWidth : 4; ///< Bank width + UINT_32 bankHeight : 4; ///< Bank height + UINT_32 macroAspectRatio : 3; ///< Macro tile aspect ratio + } fields; + UINT_32 value; +} ADDR_EQUATION_KEY; + +/** +*************************************************************************************************** +* @brief address equation structure +*************************************************************************************************** +*/ +#define ADDR_MAX_EQUATION_BIT 20u + +// Invalid equation index +#define ADDR_INVALID_EQUATION_INDEX 0xFFFFFFFF + +typedef struct _ADDR_EQUATION +{ + ADDR_CHANNEL_SETTING addr[ADDR_MAX_EQUATION_BIT]; ///< addr setting + ///< each bit is result of addr ^ xor ^ xor2 + ADDR_CHANNEL_SETTING xor1[ADDR_MAX_EQUATION_BIT]; ///< xor setting + ADDR_CHANNEL_SETTING xor2[ADDR_MAX_EQUATION_BIT]; ///< xor2 setting + UINT_32 numBits; ///< The number of bits in equation + BOOL_32 stackedDepthSlices; ///< TRUE if depth slices are treated as being + ///< stacked vertically prior to swizzling +} ADDR_EQUATION; + + /** *************************************************************************************************** * @brief Alloc system memory flags. @@ -322,9 +381,12 @@ typedef struct _ADDR_CREATE_INPUT */ typedef struct _ADDR_CREATE_OUTPUT { - UINT_32 size; ///< Size of this structure in bytes + UINT_32 size; ///< Size of this structure in bytes - ADDR_HANDLE hLib; ///< Address lib handle + ADDR_HANDLE hLib; ///< Address lib handle + + UINT_32 numEquations; ///< Number of equations in the table + const ADDR_EQUATION* pEquationTable; ///< Pointer to the equation table } ADDR_CREATE_OUTPUT; /** @@ -420,33 +482,38 @@ typedef union _ADDR_SURFACE_FLAGS { struct { - UINT_32 color : 1; ///< Flag indicates this is a color buffer - UINT_32 depth : 1; ///< Flag indicates this is a depth/stencil buffer - UINT_32 stencil : 1; ///< Flag indicates this is a stencil buffer - UINT_32 texture : 1; ///< Flag indicates this is a texture - UINT_32 cube : 1; ///< Flag indicates this is a cubemap - UINT_32 volume : 1; ///< Flag indicates this is a volume texture - UINT_32 fmask : 1; ///< Flag indicates this is an fmask - UINT_32 cubeAsArray : 1; ///< Flag indicates if treat cubemap as arrays - UINT_32 compressZ : 1; ///< Flag indicates z buffer is compressed - UINT_32 overlay : 1; ///< Flag indicates this is an overlay surface - UINT_32 noStencil : 1; ///< Flag indicates this depth has no separate stencil - UINT_32 display : 1; ///< Flag indicates this should match display controller req. - UINT_32 opt4Space : 1; ///< Flag indicates this surface should be optimized for space - /// i.e. save some memory but may lose performance - UINT_32 prt : 1; ///< Flag for partially resident texture - UINT_32 qbStereo : 1; ///< Quad buffer stereo surface - UINT_32 pow2Pad : 1; ///< SI: Pad to pow2, must set for mipmap (include level0) - UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding - UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable - UINT_32 dispTileType : 1; ///< NI: force display Tiling for 128 bit shared resoruce - UINT_32 dccCompatible : 1; ///< VI: whether to support dcc fast clear - UINT_32 czDispCompatible: 1; ///< SI+: CZ family has a HW bug needs special alignment. - /// This flag indicates we need to follow the alignment with - /// CZ families or other ASICs under PX configuration + CZ. - UINT_32 nonSplit : 1; ///< CI: depth texture should not be split - UINT_32 disableLinearOpt: 1; ///< Disable tile mode optimization to linear - UINT_32 reserved : 9; ///< Reserved bits + UINT_32 color : 1; ///< Flag indicates this is a color buffer + UINT_32 depth : 1; ///< Flag indicates this is a depth/stencil buffer + UINT_32 stencil : 1; ///< Flag indicates this is a stencil buffer + UINT_32 texture : 1; ///< Flag indicates this is a texture + UINT_32 cube : 1; ///< Flag indicates this is a cubemap + UINT_32 volume : 1; ///< Flag indicates this is a volume texture + UINT_32 fmask : 1; ///< Flag indicates this is an fmask + UINT_32 cubeAsArray : 1; ///< Flag indicates if treat cubemap as arrays + UINT_32 compressZ : 1; ///< Flag indicates z buffer is compressed + UINT_32 overlay : 1; ///< Flag indicates this is an overlay surface + UINT_32 noStencil : 1; ///< Flag indicates this depth has no separate stencil + UINT_32 display : 1; ///< Flag indicates this should match display controller req. + UINT_32 opt4Space : 1; ///< Flag indicates this surface should be optimized for space + /// i.e. save some memory but may lose performance + UINT_32 prt : 1; ///< Flag for partially resident texture + UINT_32 qbStereo : 1; ///< Quad buffer stereo surface + UINT_32 pow2Pad : 1; ///< SI: Pad to pow2, must set for mipmap (include level0) + UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding + UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable + UINT_32 dispTileType : 1; ///< NI: force display Tiling for 128 bit shared resoruce + UINT_32 dccCompatible : 1; ///< VI: whether to support dcc fast clear + UINT_32 czDispCompatible : 1; ///< SI+: CZ family has a HW bug needs special alignment. + /// This flag indicates we need to follow the + /// alignment with CZ families or other ASICs under + /// PX configuration + CZ. + UINT_32 nonSplit : 1; ///< CI: depth texture should not be split + UINT_32 disableLinearOpt : 1; ///< Disable tile mode optimization to linear + UINT_32 needEquation : 1; ///< Make the surface tile setting equation compatible. + /// This flag indicates we need to override tile + /// mode to PRT_* tile mode to disable slice rotation, + /// which is needed by swizzle pattern equation. + UINT_32 reserved : 8; ///< Reserved bits }; UINT_32 value; @@ -474,6 +541,7 @@ typedef struct _ADDR_COMPUTE_SURFACE_INFO_INPUT UINT_32 numSlices; ///< Number of surface slices or depth UINT_32 slice; ///< Slice index UINT_32 mipLevel; ///< Current mipmap level + UINT_32 numMipLevels; ///< Number of mips in mip chain ADDR_SURFACE_FLAGS flags; ///< Surface type flags UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as /// number of samples for normal AA; Set it to the @@ -539,9 +607,15 @@ typedef struct _ADDR_COMPUTE_SURFACE_INFO_OUTPUT UINT_32 last2DLevel : 1; ///< TRUE if this is the last 2D(3D) tiled ///< Only meaningful when create flag checkLast2DLevel is set UINT_32 tcCompatible : 1; ///< If the surface can be shader compatible - UINT_32 reserved :30; ///< Reserved bits + UINT_32 reserved :30; ///< Reserved bits }; + UINT_32 equationIndex; ///< Equation index in the equation table; + + UINT_32 blockWidth; ///< Width in element inside one block(1D->Micro, 2D->Macro) + UINT_32 blockHeight; ///< Height in element inside one block(1D->Micro, 2D->Macro) + UINT_32 blockSlices; ///< Slice number inside one block(1D->Micro, 2D->Macro) + /// Stereo info ADDR_QBSTEREOINFO* pStereoInfo;///< Stereo information, needed when .qbStereo flag is TRUE } ADDR_COMPUTE_SURFACE_INFO_OUTPUT; diff --git a/src/amd/addrlib/core/addrcommon.h b/src/amd/addrlib/core/addrcommon.h index 35320e60c18..9902eb14297 100644 --- a/src/amd/addrlib/core/addrcommon.h +++ b/src/amd/addrlib/core/addrcommon.h @@ -576,5 +576,26 @@ static inline VOID SafeAssign( } } +/** +*************************************************************************************************** +* InitChannel +* +* @brief +* Get channel initialization value +*************************************************************************************************** +*/ +static inline ADDR_CHANNEL_SETTING InitChannel( + UINT_32 valid, ///< [in] valid setting + UINT_32 channel, ///< [in] channel setting + UINT_32 index) ///< [in] index setting +{ + ADDR_CHANNEL_SETTING t; + t.valid = valid; + t.channel = channel; + t.index = index; + + return t; +} + #endif // __ADDR_COMMON_H__ diff --git a/src/amd/addrlib/core/addrlib.cpp b/src/amd/addrlib/core/addrlib.cpp index ec62f03b4d7..88ff5722b3c 100644 --- a/src/amd/addrlib/core/addrlib.cpp +++ b/src/amd/addrlib/core/addrlib.cpp @@ -266,6 +266,13 @@ ADDR_E_RETURNCODE AddrLib::Create( pCreateOut->hLib = pLib; + if ((pLib != NULL) && + (returnCode == ADDR_OK)) + { + pCreateOut->numEquations = + pLib->HwlGetEquationTableInfo(&pCreateOut->pEquationTable); + } + if ((pLib == NULL) && (returnCode == ADDR_OK)) { diff --git a/src/amd/addrlib/core/addrlib.h b/src/amd/addrlib/core/addrlib.h index d1c5dd755d6..4ba7c2d415f 100644 --- a/src/amd/addrlib/core/addrlib.h +++ b/src/amd/addrlib/core/addrlib.h @@ -196,6 +196,14 @@ protected: /// Pure Virtual function for Hwl converting chip family virtual AddrChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision) = 0; + /// Get equation table pointer and number of equations + virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const + { + *ppEquationTable = NULL; + + return 0; + } + // // Misc helper // diff --git a/src/amd/addrlib/core/addrlib1.cpp b/src/amd/addrlib/core/addrlib1.cpp index 922455bb5b9..81455f0324e 100644 --- a/src/amd/addrlib/core/addrlib1.cpp +++ b/src/amd/addrlib/core/addrlib1.cpp @@ -324,15 +324,11 @@ ADDR_E_RETURNCODE AddrLib1::ComputeSurfaceInfo( if (returnCode == ADDR_OK) { + // HWL layer may override tile mode if necessary + HwlOverrideTileMode(&localIn); + AddrTileMode tileMode = localIn.tileMode; - AddrTileType tileType = localIn.tileType; - // HWL layer may override tile mode if necessary - if (HwlOverrideTileMode(&localIn, &tileMode, &tileType)) - { - localIn.tileMode = tileMode; - localIn.tileType = tileType; - } // Optimize tile mode if possible if (OptimizeTileMode(&localIn, &tileMode)) { @@ -1206,10 +1202,10 @@ ADDR_E_RETURNCODE AddrLib1::GetTileIndex( * AddrLib1::Thickness * * @brief -* Compute surface thickness +* Get tile mode thickness * * @return -* Surface thickness +* Tile mode thickness *************************************************************************************************** */ UINT_32 AddrLib1::Thickness( @@ -2734,6 +2730,219 @@ UINT_32 AddrLib1::ComputePipeFromAddr( return pipe; } +/** +*************************************************************************************************** +* AddrLib1::ComputeMicroTileEquation +* +* @brief +* Compute micro tile equation +* +* @return +* If equation can be computed +* +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib1::ComputeMicroTileEquation( + UINT_32 log2BytesPP, ///< [in] log2 of bytes per pixel + AddrTileMode tileMode, ///< [in] tile mode + AddrTileType microTileType, ///< [in] pixel order in display/non-display mode + ADDR_EQUATION* pEquation ///< [out] equation + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + for (UINT_32 i = 0; i < log2BytesPP; i++) + { + pEquation->addr[i].valid = 1; + pEquation->addr[i].channel = 0; + pEquation->addr[i].index = i; + } + + ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[log2BytesPP]; + + ADDR_CHANNEL_SETTING x0 = InitChannel(1, 0, log2BytesPP + 0); + ADDR_CHANNEL_SETTING x1 = InitChannel(1, 0, log2BytesPP + 1); + ADDR_CHANNEL_SETTING x2 = InitChannel(1, 0, log2BytesPP + 2); + ADDR_CHANNEL_SETTING y0 = InitChannel(1, 1, 0); + ADDR_CHANNEL_SETTING y1 = InitChannel(1, 1, 1); + ADDR_CHANNEL_SETTING y2 = InitChannel(1, 1, 2); + ADDR_CHANNEL_SETTING z0 = InitChannel(1, 2, 0); + ADDR_CHANNEL_SETTING z1 = InitChannel(1, 2, 1); + ADDR_CHANNEL_SETTING z2 = InitChannel(1, 2, 2); + + UINT_32 thickness = Thickness(tileMode); + UINT_32 bpp = 1 << (log2BytesPP + 3); + + if (microTileType != ADDR_THICK) + { + if (microTileType == ADDR_DISPLAYABLE) + { + switch (bpp) + { + case 8: + pixelBit[0] = x0; + pixelBit[1] = x1; + pixelBit[2] = x2; + pixelBit[3] = y1; + pixelBit[4] = y0; + pixelBit[5] = y2; + break; + case 16: + pixelBit[0] = x0; + pixelBit[1] = x1; + pixelBit[2] = x2; + pixelBit[3] = y0; + pixelBit[4] = y1; + pixelBit[5] = y2; + break; + case 32: + pixelBit[0] = x0; + pixelBit[1] = x1; + pixelBit[2] = y0; + pixelBit[3] = x2; + pixelBit[4] = y1; + pixelBit[5] = y2; + break; + case 64: + pixelBit[0] = x0; + pixelBit[1] = y0; + pixelBit[2] = x1; + pixelBit[3] = x2; + pixelBit[4] = y1; + pixelBit[5] = y2; + break; + case 128: + pixelBit[0] = y0; + pixelBit[1] = x0; + pixelBit[2] = x1; + pixelBit[3] = x2; + pixelBit[4] = y1; + pixelBit[5] = y2; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + } + else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER) + { + pixelBit[0] = x0; + pixelBit[1] = y0; + pixelBit[2] = x1; + pixelBit[3] = y1; + pixelBit[4] = x2; + pixelBit[5] = y2; + } + else if (microTileType == ADDR_ROTATED) + { + ADDR_ASSERT(thickness == 1); + + switch (bpp) + { + case 8: + pixelBit[0] = y0; + pixelBit[1] = y1; + pixelBit[2] = y2; + pixelBit[3] = x1; + pixelBit[4] = x0; + pixelBit[5] = x2; + break; + case 16: + pixelBit[0] = y0; + pixelBit[1] = y1; + pixelBit[2] = y2; + pixelBit[3] = x0; + pixelBit[4] = x1; + pixelBit[5] = x2; + break; + case 32: + pixelBit[0] = y0; + pixelBit[1] = y1; + pixelBit[2] = x0; + pixelBit[3] = y2; + pixelBit[4] = x1; + pixelBit[5] = x2; + break; + case 64: + pixelBit[0] = y0; + pixelBit[1] = x0; + pixelBit[2] = y1; + pixelBit[3] = x1; + pixelBit[4] = x2; + pixelBit[5] = y2; + break; + default: + retCode = ADDR_NOTSUPPORTED; + break; + } + } + + if (thickness > 1) + { + pixelBit[6] = z0; + pixelBit[7] = z1; + pEquation->numBits = 8 + log2BytesPP; + } + else + { + pEquation->numBits = 6 + log2BytesPP; + } + } + else // ADDR_THICK + { + ADDR_ASSERT(thickness > 1); + + switch (bpp) + { + case 8: + case 16: + pixelBit[0] = x0; + pixelBit[1] = y0; + pixelBit[2] = x1; + pixelBit[3] = y1; + pixelBit[4] = z0; + pixelBit[5] = z1; + break; + case 32: + pixelBit[0] = x0; + pixelBit[1] = y0; + pixelBit[2] = x1; + pixelBit[3] = z0; + pixelBit[4] = y1; + pixelBit[5] = z1; + break; + case 64: + case 128: + pixelBit[0] = y0; + pixelBit[1] = x0; + pixelBit[2] = z0; + pixelBit[3] = x1; + pixelBit[4] = y1; + pixelBit[5] = z1; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + pixelBit[6] = x2; + pixelBit[7] = y2; + pEquation->numBits = 8 + log2BytesPP; + } + + if (thickness == 8) + { + pixelBit[8] = z2; + pEquation->numBits = 9 + log2BytesPP; + } + + // stackedDepthSlices is used for addressing mode that a tile block contains multiple slices, + // which is not supported by our address lib + pEquation->stackedDepthSlices = FALSE; + + return retCode; +} + /** *************************************************************************************************** * AddrLib1::ComputePixelIndexWithinMicroTile diff --git a/src/amd/addrlib/core/addrlib1.h b/src/amd/addrlib/core/addrlib1.h index 13d915a7fb5..a852ac25bda 100644 --- a/src/amd/addrlib/core/addrlib1.h +++ b/src/amd/addrlib/core/addrlib1.h @@ -346,13 +346,9 @@ protected: virtual BOOL_32 HwlDegradeBaseLevel( const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0; - virtual BOOL_32 HwlOverrideTileMode( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - AddrTileMode* pTileMode, - AddrTileType* pTileType) const + virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const { - // not supported in hwl layer, FALSE for not-overrided - return FALSE; + // not supported in hwl layer } AddrTileMode DegradeLargeThickTile(AddrTileMode tileMode, UINT_32 bpp) const; @@ -391,6 +387,10 @@ protected: UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const; + ADDR_E_RETURNCODE ComputeMicroTileEquation( + UINT_32 bpp, AddrTileMode tileMode, + AddrTileType microTileType, ADDR_EQUATION* pEquation) const; + UINT_32 ComputePixelIndexWithinMicroTile( UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 bpp, AddrTileMode tileMode, AddrTileType microTileType) const; @@ -411,6 +411,12 @@ protected: UINT_32 ComputePipeFromAddr( UINT_64 addr, UINT_32 numPipes) const; + virtual ADDR_E_RETURNCODE ComputePipeEquation( + UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const + { + return ADDR_NOTSUPPORTED; + } + /// Pure Virtual function for Hwl computing pipe from coord virtual UINT_32 ComputePipeFromCoord( UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode, diff --git a/src/amd/addrlib/r800/ciaddrlib.cpp b/src/amd/addrlib/r800/ciaddrlib.cpp index 7585e2558b0..3322d952165 100644 --- a/src/amd/addrlib/r800/ciaddrlib.cpp +++ b/src/amd/addrlib/r800/ciaddrlib.cpp @@ -482,6 +482,11 @@ BOOL_32 CiAddrLib::HwlInitGlobalParams( valid = InitMacroTileCfgTable(pRegValue->pMacroTileConfig, pRegValue->noOfMacroEntries); } + if (valid) + { + InitEquationTable(); + } + return valid; } @@ -615,7 +620,7 @@ ADDR_E_RETURNCODE CiAddrLib::HwlSetupTileCfg( } else { - const ADDR_TILECONFIG* pCfgTable = GetTileSetting(index); + const AddrTileConfig* pCfgTable = GetTileSetting(index); if (pInfo != NULL) { @@ -864,18 +869,16 @@ AddrTileMode CiAddrLib::HwlDegradeThickTileMode( * Override THICK to THIN, for specific formats on CI * * @return -* Suitable tile mode +* N/A * *************************************************************************************************** */ -BOOL_32 CiAddrLib::HwlOverrideTileMode( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - AddrTileMode* pTileMode, ///< [in/out] pointer to the tile mode - AddrTileType* pTileType ///< [in/out] pointer to the tile type +VOID CiAddrLib::HwlOverrideTileMode( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in/out] input output structure ) const { - BOOL_32 bOverrided = FALSE; - AddrTileMode tileMode = *pTileMode; + AddrTileMode tileMode = pInOut->tileMode; + AddrTileType tileType = pInOut->tileType; // currently, all CI/VI family do not // support ADDR_TM_PRT_2D_TILED_THICK,ADDR_TM_PRT_3D_TILED_THICK and @@ -902,7 +905,7 @@ BOOL_32 CiAddrLib::HwlOverrideTileMode( // tile_thickness = (array_mode == XTHICK) ? 8 : ((array_mode == THICK) ? 4 : 1) if (thickness > 1) { - switch (pIn->format) + switch (pInOut->format) { // see //gfxip/gcB/devel/cds/src/verif/tc/models/csim/tcp.cpp // tcpError("Thick micro tiling is not supported for format... @@ -957,10 +960,10 @@ BOOL_32 CiAddrLib::HwlOverrideTileMode( } // Switch tile type from thick to thin - if (tileMode != *pTileMode) + if (tileMode != pInOut->tileMode) { // see tileIndex: 13-18 - *pTileType = ADDR_NON_DISPLAYABLE; + tileType = ADDR_NON_DISPLAYABLE; } break; @@ -970,13 +973,53 @@ BOOL_32 CiAddrLib::HwlOverrideTileMode( } } - if (tileMode != *pTileMode) + // Override 2D/3D macro tile mode to PRT_* tile mode if + // client driver requests this surface is equation compatible + if ((pInOut->flags.needEquation == TRUE) && + (pInOut->numSamples <= 1) && + (IsMacroTiled(tileMode) == TRUE) && + (IsPrtTileMode(tileMode) == FALSE)) { - *pTileMode = tileMode; - bOverrided = TRUE; + UINT_32 thickness = Thickness(tileMode); + + if (thickness == 1) + { + tileMode = ADDR_TM_PRT_TILED_THIN1; + } + else + { + static const UINT_32 PrtTileBytes = 0x10000; + // First prt thick tile index in the tile mode table + static const UINT_32 PrtThickTileIndex = 22; + ADDR_TILEINFO tileInfo = {0}; + + HwlComputeMacroModeIndex(PrtThickTileIndex, + pInOut->flags, + pInOut->bpp, + pInOut->numSamples, + &tileInfo); + + UINT_32 macroTileBytes = ((pInOut->bpp) >> 3) * 64 * pInOut->numSamples * + thickness * HwlGetPipes(&tileInfo) * + tileInfo.banks * tileInfo.bankWidth * + tileInfo.bankHeight; + + if (macroTileBytes <= PrtTileBytes) + { + tileMode = ADDR_TM_PRT_TILED_THICK; + } + else + { + tileMode = ADDR_TM_PRT_TILED_THIN1; + } + } } - return bOverrided; + if (tileMode != pInOut->tileMode) + { + pInOut->tileMode = tileMode; + pInOut->tileType = tileType; + } } /** @@ -1016,7 +1059,10 @@ VOID CiAddrLib::HwlSetupTileInfo( { inTileType = ADDR_NON_DISPLAYABLE; } - else if ((m_allowNonDispThickModes == FALSE) || (inTileType != ADDR_NON_DISPLAYABLE)) + else if ((m_allowNonDispThickModes == FALSE) || + (inTileType != ADDR_NON_DISPLAYABLE) || + // There is no PRT_THICK + THIN entry in tile mode table except Bonaire + (IsPrtTileMode(tileMode) == TRUE)) { inTileType = ADDR_THICK; } @@ -1055,7 +1101,7 @@ VOID CiAddrLib::HwlSetupTileInfo( pOut->tcCompatible = FALSE; } - if (flags.depth && (flags.nonSplit || flags.tcCompatible)) + if (flags.depth && (flags.nonSplit || flags.tcCompatible || flags.needEquation)) { // Texure readable depth surface should not be split switch (tileSize) @@ -1277,7 +1323,7 @@ VOID CiAddrLib::HwlSetupTileInfo( { if (IsMacroTiled(tileMode)) { - UINT_32 tileIndex = static_cast(pOut->tileIndex); + INT_32 tileIndex = pOut->tileIndex; if ((tileIndex == TileIndexInvalid) && (IsTileInfoAllZero(pTileInfo) == FALSE)) { @@ -1286,7 +1332,7 @@ VOID CiAddrLib::HwlSetupTileInfo( if (tileIndex != TileIndexInvalid) { - ADDR_ASSERT(tileIndex < TileTableSize); + ADDR_ASSERT(static_cast(tileIndex) < TileTableSize); // Non-depth entries store a split factor UINT_32 sampleSplit = m_tileTable[tileIndex].info.tileSplitBytes; UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness); @@ -1318,7 +1364,7 @@ VOID CiAddrLib::HwlSetupTileInfo( */ VOID CiAddrLib::ReadGbTileMode( UINT_32 regValue, ///< [in] GB_TILE_MODE register - ADDR_TILECONFIG* pCfg ///< [out] output structure + AddrTileConfig* pCfg ///< [out] output structure ) const { GB_TILE_MODE gbTileMode; @@ -1915,3 +1961,4 @@ ADDR_E_RETURNCODE CiAddrLib::HwlGetMaxAlignments( return ADDR_OK; } + diff --git a/src/amd/addrlib/r800/ciaddrlib.h b/src/amd/addrlib/r800/ciaddrlib.h index 750b2b382e1..e959df39231 100644 --- a/src/amd/addrlib/r800/ciaddrlib.h +++ b/src/amd/addrlib/r800/ciaddrlib.h @@ -141,10 +141,7 @@ protected: virtual AddrTileMode HwlDegradeThickTileMode( AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const; - virtual BOOL_32 HwlOverrideTileMode( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - AddrTileMode* pTileMode, - AddrTileType* pTileType) const; + virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; virtual ADDR_E_RETURNCODE HwlComputeDccInfo( const ADDR_COMPUTE_DCCINFO_INPUT* pIn, @@ -168,7 +165,7 @@ protected: private: VOID ReadGbTileMode( - UINT_32 regValue, ADDR_TILECONFIG* pCfg) const; + UINT_32 regValue, AddrTileConfig* pCfg) const; VOID ReadGbMacroTileCfg( UINT_32 regValue, ADDR_TILEINFO* pCfg) const; diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp b/src/amd/addrlib/r800/egbaddrlib.cpp index 52cf59b1b04..854d5725df2 100644 --- a/src/amd/addrlib/r800/egbaddrlib.cpp +++ b/src/amd/addrlib/r800/egbaddrlib.cpp @@ -448,7 +448,9 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMacroTiled( pOut->pTileInfo, &pOut->baseAlign, &pOut->pitchAlign, - &pOut->heightAlign); + &pOut->heightAlign, + &pOut->blockWidth, + &pOut->blockHeight); if (valid) { @@ -471,23 +473,20 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMacroTiled( expHeight, expNumSlices, numSamples, - pOut->pitchAlign, - pOut->heightAlign, + pOut->blockWidth, + pOut->blockHeight, pOut->pTileInfo); if (!IsMacroTiled(expTileMode)) // Downgraded to micro-tiled { return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, expTileMode); } - else + else if (microTileThickness != Thickness(expTileMode)) { - if (microTileThickness != Thickness(expTileMode)) - { - // - // Re-compute if thickness changed since bank-height may be changed! - // - return ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, expTileMode); - } + // + // Re-compute if thickness changed since bank-height may be changed! + // + return ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, expTileMode); } } @@ -507,7 +506,9 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMacroTiled( pOut->pTileInfo, &pOut->baseAlign, &pOut->pitchAlign, - &pOut->heightAlign); + &pOut->heightAlign, + &pOut->blockWidth, + &pOut->blockHeight); } // @@ -535,11 +536,51 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMacroTiled( } } - // - // Compute the size of a slice. - // - bytesPerSlice = BITS_TO_BYTES(static_cast(paddedPitch) * - paddedHeight * NextPow2(pIn->bpp) * numSamples); + if ((pIn->flags.needEquation == TRUE) && + (m_chipFamily == ADDR_CHIP_FAMILY_SI) && + (pIn->numMipLevels > 1) && + (pIn->mipLevel == 0)) + { + BOOL_32 convertTo1D = FALSE; + + ADDR_ASSERT(Thickness(expTileMode) == 1); + + for (UINT_32 i = 1; i < pIn->numMipLevels; i++) + { + UINT_32 mipPitch = Max(1u, paddedPitch >> i); + UINT_32 mipHeight = Max(1u, pIn->height >> i); + UINT_32 mipSlices = pIn->flags.volume ? + Max(1u, pIn->numSlices >> i) : pIn->numSlices; + expTileMode = ComputeSurfaceMipLevelTileMode(expTileMode, + pIn->bpp, + mipPitch, + mipHeight, + mipSlices, + numSamples, + pOut->blockWidth, + pOut->blockHeight, + pOut->pTileInfo); + + if (IsMacroTiled(expTileMode)) + { + if (PowTwoAlign(mipPitch, pOut->blockWidth) != + PowTwoAlign(mipPitch, pOut->pitchAlign)) + { + convertTo1D = TRUE; + break; + } + } + else + { + break; + } + } + + if (convertTo1D) + { + return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, ADDR_TM_1D_TILED_THIN1); + } + } pOut->pitch = paddedPitch; // Put this check right here to workaround special mipmap cases which the original height @@ -555,6 +596,12 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMacroTiled( pOut->depth = expNumSlices; + // + // Compute the size of a slice. + // + bytesPerSlice = BITS_TO_BYTES(static_cast(paddedPitch) * + paddedHeight * NextPow2(pIn->bpp) * numSamples); + pOut->surfSize = bytesPerSlice * expNumSlices; pOut->tileMode = expTileMode; @@ -797,7 +844,9 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMacroTiled( ADDR_TILEINFO* pTileInfo, ///< [in/out] bank structure. UINT_32* pBaseAlign, ///< [out] base address alignment in bytes UINT_32* pPitchAlign, ///< [out] pitch alignment in pixels - UINT_32* pHeightAlign ///< [out] height alignment in pixels + UINT_32* pHeightAlign, ///< [out] height alignment in pixels + UINT_32* pMacroTileWidth, ///< [out] macro tile width in pixels + UINT_32* pMacroTileHeight ///< [out] macro tile height in pixels ) const { BOOL_32 valid = SanityCheckMacroTiled(pTileInfo); @@ -858,6 +907,7 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMacroTiled( pTileInfo->macroAspectRatio; *pPitchAlign = macroTileWidth; + *pMacroTileWidth = macroTileWidth; AdjustPitchAlignment(flags, pPitchAlign); @@ -868,6 +918,7 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMacroTiled( pTileInfo->macroAspectRatio; *pHeightAlign = macroTileHeight; + *pMacroTileHeight = macroTileHeight; // // Compute base alignment @@ -1113,6 +1164,8 @@ BOOL_32 EgBasedAddrLib::HwlDegradeBaseLevel( UINT_32 baseAlign; UINT_32 pitchAlign; UINT_32 heightAlign; + UINT_32 macroTileWidth; + UINT_32 macroTileHeight; ADDR_ASSERT(pIn->pTileInfo); ADDR_TILEINFO tileInfo = *pIn->pTileInfo; @@ -1143,11 +1196,13 @@ BOOL_32 EgBasedAddrLib::HwlDegradeBaseLevel( &tileInfo, &baseAlign, &pitchAlign, - &heightAlign); + &heightAlign, + ¯oTileWidth, + ¯oTileHeight); if (valid) { - degrade = (pIn->width < pitchAlign || pIn->height < heightAlign); + degrade = ((pIn->width < macroTileWidth) || (pIn->height < macroTileHeight)); // Check whether 2D tiling still has too much footprint if (degrade == FALSE) { @@ -1411,6 +1466,137 @@ UINT_64 EgBasedAddrLib::DispatchComputeSurfaceAddrFromCoord( return addr; } +/** +*************************************************************************************************** +* EgBasedAddrLib::ComputeMacroTileEquation +* +* @brief +* Computes the address equation in macro tile +* @return +* If equation can be computed +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedAddrLib::ComputeMacroTileEquation( + UINT_32 log2BytesPP, ///< [in] log2 of bytes per pixel + AddrTileMode tileMode, ///< [in] tile mode + AddrTileType microTileType, ///< [in] micro tiling type + ADDR_TILEINFO* pTileInfo, ///< [in] bank structure + ADDR_EQUATION* pEquation ///< [out] Equation for addressing in macro tile + ) const +{ + ADDR_E_RETURNCODE retCode; + + // Element equation within a tile + retCode = ComputeMicroTileEquation(log2BytesPP, tileMode, microTileType, pEquation); + + if (retCode == ADDR_OK) + { + // Tile equesiton with signle pipe bank + UINT_32 numPipes = HwlGetPipes(pTileInfo); + UINT_32 numPipeBits = Log2(numPipes); + + for (UINT_32 i = 0; i < Log2(pTileInfo->bankWidth); i++) + { + pEquation->addr[pEquation->numBits].valid = 1; + pEquation->addr[pEquation->numBits].channel = 0; + pEquation->addr[pEquation->numBits].index = i + log2BytesPP + 3 + numPipeBits; + pEquation->numBits++; + } + + for (UINT_32 i = 0; i < Log2(pTileInfo->bankHeight); i++) + { + pEquation->addr[pEquation->numBits].valid = 1; + pEquation->addr[pEquation->numBits].channel = 1; + pEquation->addr[pEquation->numBits].index = i + 3; + pEquation->numBits++; + } + + ADDR_EQUATION equation; + memset(&equation, 0, sizeof(ADDR_EQUATION)); + + UINT_32 thresholdX = 32; + UINT_32 thresholdY = 32; + + if (IsPrtNoRotationTileMode(tileMode)) + { + UINT_32 macroTilePitch = + (MicroTileWidth * pTileInfo->bankWidth * numPipes) * pTileInfo->macroAspectRatio; + UINT_32 macroTileHeight = + (MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks) / + pTileInfo->macroAspectRatio; + thresholdX = Log2(macroTilePitch); + thresholdY = Log2(macroTileHeight); + } + + // Pipe equation + retCode = ComputePipeEquation(log2BytesPP, thresholdX, thresholdY, pTileInfo, &equation); + + if (retCode == ADDR_OK) + { + UINT_32 pipeBitStart = Log2(m_pipeInterleaveBytes); + + if (pEquation->numBits > pipeBitStart) + { + UINT_32 numLeftShift = pEquation->numBits - pipeBitStart; + + for (UINT_32 i = 0; i < numLeftShift; i++) + { + pEquation->addr[pEquation->numBits + equation.numBits - i - 1] = + pEquation->addr[pEquation->numBits - i - 1]; + pEquation->xor1[pEquation->numBits + equation.numBits - i - 1] = + pEquation->xor1[pEquation->numBits - i - 1]; + pEquation->xor2[pEquation->numBits + equation.numBits - i - 1] = + pEquation->xor2[pEquation->numBits - i - 1]; + } + } + + for (UINT_32 i = 0; i < equation.numBits; i++) + { + pEquation->addr[pipeBitStart + i] = equation.addr[i]; + pEquation->xor1[pipeBitStart + i] = equation.xor1[i]; + pEquation->xor2[pipeBitStart + i] = equation.xor2[i]; + pEquation->numBits++; + } + + // Bank equation + memset(&equation, 0, sizeof(ADDR_EQUATION)); + + retCode = ComputeBankEquation(log2BytesPP, thresholdX, thresholdY, + pTileInfo, &equation); + + if (retCode == ADDR_OK) + { + UINT_32 bankBitStart = pipeBitStart + numPipeBits + Log2(m_bankInterleave); + + if (pEquation->numBits > bankBitStart) + { + UINT_32 numLeftShift = pEquation->numBits - bankBitStart; + + for (UINT_32 i = 0; i < numLeftShift; i++) + { + pEquation->addr[pEquation->numBits + equation.numBits - i - 1] = + pEquation->addr[pEquation->numBits - i - 1]; + pEquation->xor1[pEquation->numBits + equation.numBits - i - 1] = + pEquation->xor1[pEquation->numBits - i - 1]; + pEquation->xor2[pEquation->numBits + equation.numBits - i - 1] = + pEquation->xor2[pEquation->numBits - i - 1]; + } + } + + for (UINT_32 i = 0; i < equation.numBits; i++) + { + pEquation->addr[bankBitStart + i] = equation.addr[i]; + pEquation->xor1[bankBitStart + i] = equation.xor1[i]; + pEquation->xor2[bankBitStart + i] = equation.xor2[i]; + pEquation->numBits++; + } + } + } + } + + return retCode; +} + /** *************************************************************************************************** * EgBasedAddrLib::ComputeSurfaceAddrFromCoordMicroTiled diff --git a/src/amd/addrlib/r800/egbaddrlib.h b/src/amd/addrlib/r800/egbaddrlib.h index d43eca81aae..a4240829fa0 100644 --- a/src/amd/addrlib/r800/egbaddrlib.h +++ b/src/amd/addrlib/r800/egbaddrlib.h @@ -254,6 +254,13 @@ protected: ADDR_TILEINFO* pTileInfo) const; /// Addressing functions + virtual ADDR_E_RETURNCODE ComputeBankEquation( + UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, + ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const + { + return ADDR_NOTSUPPORTED; + } + UINT_32 ComputeBankFromCoord( UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode, UINT_32 bankSwizzle, UINT_32 tileSpitSlice, @@ -281,6 +288,10 @@ protected: UINT_32 pitch, UINT_32 height, UINT_32 bpp, BOOL_32 isLinear, UINT_32 numSlices, UINT_64* sliceBytes, UINT_32 baseAlign) const; + ADDR_E_RETURNCODE ComputeMacroTileEquation( + UINT_32 log2BytesPP, AddrTileMode tileMode, AddrTileType microTileType, + ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const; + // Static functions static BOOL_32 IsTileInfoAllZero(ADDR_TILEINFO* pTileInfo); static UINT_32 ComputeFmaskNumPlanesFromNumSamples(UINT_32 numSamples); @@ -318,7 +329,8 @@ private: AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 mipLevel, UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, - UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const; + UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign, + UINT_32* pMacroTileWidth, UINT_32* pMacroTileHeight) const; /// Surface addressing functions UINT_64 DispatchComputeSurfaceAddrFromCoord( diff --git a/src/amd/addrlib/r800/siaddrlib.cpp b/src/amd/addrlib/r800/siaddrlib.cpp index 694c0f3a4bb..686bb7fc343 100644 --- a/src/amd/addrlib/r800/siaddrlib.cpp +++ b/src/amd/addrlib/r800/siaddrlib.cpp @@ -73,7 +73,8 @@ AddrLib* AddrSIHwlInit(const AddrClient* pClient) */ SiAddrLib::SiAddrLib(const AddrClient* pClient) : EgBasedAddrLib(pClient), - m_noOfEntries(0) + m_noOfEntries(0), + m_numEquations(0) { m_class = SI_ADDRLIB; memset(&m_settings, 0, sizeof(m_settings)); @@ -166,6 +167,338 @@ UINT_32 SiAddrLib::GetPipePerSurf( return numPipes; } +/** +*************************************************************************************************** +* SiAddrLib::ComputeBankEquation +* +* @brief +* Compute bank equation +* +* @return +* If equation can be computed +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE SiAddrLib::ComputeBankEquation( + UINT_32 log2BytesPP, ///< [in] log2 of bytes per pixel + UINT_32 threshX, ///< [in] threshold for x channel + UINT_32 threshY, ///< [in] threshold for y channel + ADDR_TILEINFO* pTileInfo, ///< [in] tile info + ADDR_EQUATION* pEquation ///< [out] bank equation + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + UINT_32 pipes = HwlGetPipes(pTileInfo); + UINT_32 bankXStart = 3 + Log2(pipes) + Log2(pTileInfo->bankWidth); + UINT_32 bankYStart = 3 + Log2(pTileInfo->bankHeight); + + ADDR_CHANNEL_SETTING x3 = InitChannel(1, 0, log2BytesPP + bankXStart); + ADDR_CHANNEL_SETTING x4 = InitChannel(1, 0, log2BytesPP + bankXStart + 1); + ADDR_CHANNEL_SETTING x5 = InitChannel(1, 0, log2BytesPP + bankXStart + 2); + ADDR_CHANNEL_SETTING x6 = InitChannel(1, 0, log2BytesPP + bankXStart + 3); + ADDR_CHANNEL_SETTING y3 = InitChannel(1, 1, bankYStart); + ADDR_CHANNEL_SETTING y4 = InitChannel(1, 1, bankYStart + 1); + ADDR_CHANNEL_SETTING y5 = InitChannel(1, 1, bankYStart + 2); + ADDR_CHANNEL_SETTING y6 = InitChannel(1, 1, bankYStart + 3); + + x3.value = (threshX > bankXStart) ? x3.value : 0; + x4.value = (threshX > bankXStart + 1) ? x4.value : 0; + x5.value = (threshX > bankXStart + 2) ? x5.value : 0; + x6.value = (threshX > bankXStart + 3) ? x6.value : 0; + y3.value = (threshY > bankYStart) ? y3.value : 0; + y4.value = (threshY > bankYStart + 1) ? y4.value : 0; + y5.value = (threshY > bankYStart + 2) ? y5.value : 0; + y6.value = (threshY > bankYStart + 3) ? y6.value : 0; + + switch (pTileInfo->banks) + { + case 16: + pEquation->addr[0] = y6; + pEquation->xor1[0] = x3; + pEquation->addr[1] = y5; + pEquation->xor1[1] = y6; + pEquation->xor2[1] = x4; + pEquation->addr[2] = y4; + pEquation->xor1[2] = x5; + pEquation->addr[3] = y3; + pEquation->xor1[3] = x6; + pEquation->numBits = 4; + break; + case 8: + pEquation->addr[0] = y5; + pEquation->xor1[0] = x3; + pEquation->addr[1] = y4; + pEquation->xor1[1] = y5; + pEquation->xor2[1] = x4; + pEquation->addr[2] = y3; + pEquation->xor1[2] = x5; + pEquation->numBits = 3; + break; + case 4: + pEquation->addr[0] = y4; + pEquation->xor1[0] = x3; + pEquation->addr[1] = y3; + pEquation->xor1[1] = x4; + pEquation->numBits = 2; + break; + case 2: + pEquation->addr[0] = y3; + pEquation->xor1[0] = x3; + pEquation->numBits = 1; + break; + default: + pEquation->numBits = 0; + retCode = ADDR_NOTSUPPORTED; + ADDR_ASSERT_ALWAYS(); + break; + } + + for (UINT_32 i = 0; i < pEquation->numBits; i++) + { + if (pEquation->addr[i].value == 0) + { + if (pEquation->xor1[i].value == 0) + { + // 00X -> X00 + pEquation->addr[i].value = pEquation->xor2[i].value; + pEquation->xor2[i].value = 0; + } + else + { + pEquation->addr[i].value = pEquation->xor1[i].value; + + if (pEquation->xor2[i].value != 0) + { + // 0XY -> XY0 + pEquation->xor1[i].value = pEquation->xor2[i].value; + pEquation->xor2[i].value = 0; + } + else + { + // 0X0 -> X00 + pEquation->xor1[i].value = 0; + } + } + } + else if (pEquation->xor1[i].value == 0) + { + if (pEquation->xor2[i].value != 0) + { + // X0Y -> XY0 + pEquation->xor1[i].value = pEquation->xor2[i].value; + pEquation->xor2[i].value = 0; + } + } + } + + if ((pTileInfo->bankWidth == 1) && + ((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) || + (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32))) + { + retCode = ADDR_NOTSUPPORTED; + } + + return retCode; +} + +/** +*************************************************************************************************** +* SiAddrLib::ComputePipeEquation +* +* @brief +* Compute pipe equation +* +* @return +* If equation can be computed +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE SiAddrLib::ComputePipeEquation( + UINT_32 log2BytesPP, ///< [in] Log2 of bytes per pixel + UINT_32 threshX, ///< [in] Threshold for X channel + UINT_32 threshY, ///< [in] Threshold for Y channel + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + ADDR_EQUATION* pEquation ///< [out] Pipe configure + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + ADDR_CHANNEL_SETTING* pAddr = pEquation->addr; + ADDR_CHANNEL_SETTING* pXor1 = pEquation->xor1; + ADDR_CHANNEL_SETTING* pXor2 = pEquation->xor2; + + ADDR_CHANNEL_SETTING x3 = InitChannel(1, 0, 3 + log2BytesPP); + ADDR_CHANNEL_SETTING x4 = InitChannel(1, 0, 4 + log2BytesPP); + ADDR_CHANNEL_SETTING x5 = InitChannel(1, 0, 5 + log2BytesPP); + ADDR_CHANNEL_SETTING x6 = InitChannel(1, 0, 6 + log2BytesPP); + ADDR_CHANNEL_SETTING y3 = InitChannel(1, 1, 3); + ADDR_CHANNEL_SETTING y4 = InitChannel(1, 1, 4); + ADDR_CHANNEL_SETTING y5 = InitChannel(1, 1, 5); + ADDR_CHANNEL_SETTING y6 = InitChannel(1, 1, 6); + + x3.value = (threshX > 3) ? x3.value : 0; + x4.value = (threshX > 4) ? x4.value : 0; + x5.value = (threshX > 5) ? x5.value : 0; + x6.value = (threshX > 6) ? x6.value : 0; + y3.value = (threshY > 3) ? y3.value : 0; + y4.value = (threshY > 4) ? y4.value : 0; + y5.value = (threshY > 5) ? y5.value : 0; + y6.value = (threshY > 6) ? y6.value : 0; + + switch (pTileInfo->pipeConfig) + { + case ADDR_PIPECFG_P2: + pAddr[0] = x3; + pXor1[0] = y3; + pEquation->numBits = 1; + break; + case ADDR_PIPECFG_P4_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pAddr[1] = x3; + pXor1[1] = y4; + pEquation->numBits = 2; + break; + case ADDR_PIPECFG_P4_16x16: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y4; + pEquation->numBits = 2; + break; + case ADDR_PIPECFG_P4_16x32: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y5; + pEquation->numBits = 2; + break; + case ADDR_PIPECFG_P4_32x32: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x5; + pXor1[1] = y5; + pEquation->numBits = 2; + break; + case ADDR_PIPECFG_P8_16x16_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x3; + pXor1[1] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_16x32_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x3; + pXor1[1] = y4; + pAddr[2] = x4; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_16x32_16x16: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x5; + pXor1[1] = y4; + pAddr[2] = x4; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_32x32_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x3; + pXor1[1] = y4; + pAddr[2] = x5; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_32x32_16x16: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y4; + pAddr[2] = x5; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_32x32_16x32: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y6; + pAddr[2] = x5; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_32x64_32x32: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x6; + pXor1[1] = y5; + pAddr[2] = x5; + pXor1[2] = y6; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P16_32x32_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pAddr[1] = x3; + pXor1[1] = y4; + pAddr[2] = x5; + pXor1[2] = y6; + pAddr[3] = x6; + pXor1[3] = y5; + pEquation->numBits = 4; + break; + case ADDR_PIPECFG_P16_32x32_16x16: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y4; + pAddr[2] = x5; + pXor1[2] = y6; + pAddr[3] = x6; + pXor1[3] = y5; + pEquation->numBits = 4; + break; + default: + ADDR_UNHANDLED_CASE(); + pEquation->numBits = 0; + retCode = ADDR_NOTSUPPORTED; + break; + } + + for (UINT_32 i = 0; i < pEquation->numBits; i++) + { + if (pAddr[i].value == 0) + { + if (pXor1[i].value == 0) + { + pAddr[i].value = pXor2[i].value; + } + else + { + pAddr[i].value = pXor1[i].value; + pXor1[i].value = 0; + } + } + } + + return retCode; +} + /** *************************************************************************************************** * SiAddrLib::ComputePipeFromCoord @@ -1889,6 +2222,11 @@ BOOL_32 SiAddrLib::HwlInitGlobalParams( valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries); + if (valid) + { + InitEquationTable(); + } + m_maxSamples = 16; } @@ -2174,7 +2512,31 @@ ADDR_E_RETURNCODE SiAddrLib::HwlComputeSurfaceInfo( { pOut->tileIndex = pIn->tileIndex; - return EgBasedAddrLib::HwlComputeSurfaceInfo(pIn,pOut); + ADDR_E_RETURNCODE retCode = EgBasedAddrLib::HwlComputeSurfaceInfo(pIn, pOut); + + UINT_32 tileIndex = static_cast(pOut->tileIndex); + + if ((pIn->flags.needEquation == TRUE) && + (pIn->numSamples <= 1) && + (tileIndex < TileTableSize)) + { + pOut->equationIndex = m_equationLookupTable[Log2(pIn->bpp >> 3)][tileIndex]; + + if (pOut->equationIndex != ADDR_INVALID_EQUATION_INDEX) + { + pOut->blockWidth = m_blockWidth[pOut->equationIndex]; + + pOut->blockHeight = m_blockHeight[pOut->equationIndex]; + + pOut->blockSlices = m_blockSlices[pOut->equationIndex]; + } + } + else + { + pOut->equationIndex = ADDR_INVALID_EQUATION_INDEX; + } + + return retCode; } /** @@ -2282,8 +2644,8 @@ VOID SiAddrLib::HwlCheckLastMacroTiledLvl( nextHeight, nextSlices, pIn->numSamples, - pOut->pitchAlign, - pOut->heightAlign, + pOut->blockWidth, + pOut->blockHeight, pOut->pTileInfo); pOut->last2DLevel = IsMicroTiled(nextTileMode); @@ -2345,7 +2707,7 @@ BOOL_32 SiAddrLib::HwlTileInfoEqual( * Tile setting info. *************************************************************************************************** */ -const ADDR_TILECONFIG* SiAddrLib::GetTileSetting( +const AddrTileConfig* SiAddrLib::GetTileSetting( UINT_32 index ///< [in] Tile index ) const { @@ -2484,7 +2846,7 @@ ADDR_E_RETURNCODE SiAddrLib::HwlSetupTileCfg( } else { - const ADDR_TILECONFIG* pCfgTable = GetTileSetting(index); + const AddrTileConfig* pCfgTable = GetTileSetting(index); if (pInfo) { @@ -2525,7 +2887,7 @@ ADDR_E_RETURNCODE SiAddrLib::HwlSetupTileCfg( */ VOID SiAddrLib::ReadGbTileMode( UINT_32 regValue, ///< [in] GB_TILE_MODE register - ADDR_TILECONFIG* pCfg ///< [out] output structure + AddrTileConfig* pCfg ///< [out] output structure ) const { GB_TILE_MODE gbTileMode; @@ -2773,18 +3135,15 @@ UINT_32 SiAddrLib::HwlComputeFmaskBits( * Override tile modes (for PRT only, avoid client passes in an invalid PRT mode for SI. * * @return -* Suitable tile mode +* N/A * *************************************************************************************************** */ -BOOL_32 SiAddrLib::HwlOverrideTileMode( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - AddrTileMode* pTileMode, ///< [in/out] pointer to the tile mode - AddrTileType* pTileType ///< [in/out] pointer to the tile type +void SiAddrLib::HwlOverrideTileMode( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in/out] input output structure ) const { - BOOL_32 bOverrided = FALSE; - AddrTileMode tileMode = *pTileMode; + AddrTileMode tileMode = pInOut->tileMode; switch (tileMode) { @@ -2808,14 +3167,34 @@ BOOL_32 SiAddrLib::HwlOverrideTileMode( break; } - if (tileMode != *pTileMode) + if ((pInOut->flags.needEquation == TRUE) && + (IsMacroTiled(tileMode) == TRUE) && + (pInOut->numSamples <= 1)) { - *pTileMode = tileMode; - bOverrided = TRUE; - ADDR_ASSERT(pIn->flags.prt == TRUE); + UINT_32 thickness = Thickness(tileMode); + + pInOut->flags.prt = TRUE; + + if (thickness > 1) + { + tileMode = ADDR_TM_1D_TILED_THICK; + } + else if (pInOut->numSlices > 1) + { + tileMode = ADDR_TM_1D_TILED_THIN1; + } + else + { + tileMode = ADDR_TM_2D_TILED_THIN1; + } } - return bOverrided; + if (tileMode != pInOut->tileMode) + { + pInOut->tileMode = tileMode; + + ADDR_ASSERT(pInOut->flags.prt == TRUE); + } } /** @@ -2864,3 +3243,249 @@ ADDR_E_RETURNCODE SiAddrLib::HwlGetMaxAlignments( return ADDR_OK; } +/** +*************************************************************************************************** +* SiAddrLib::InitEquationTable +* +* @brief +* Initialize Equation table. +* +* @return +* N/A +*************************************************************************************************** +*/ +VOID SiAddrLib::InitEquationTable() +{ + ADDR_EQUATION_KEY equationKeyTable[EquationTableSize]; + memset(equationKeyTable, 0, sizeof(equationKeyTable)); + + memset(m_equationTable, 0, sizeof(m_equationTable)); + + memset(m_blockWidth, 0, sizeof(m_blockWidth)); + + memset(m_blockHeight, 0, sizeof(m_blockHeight)); + + memset(m_blockSlices, 0, sizeof(m_blockSlices)); + + // Loop all possible bpp + for (UINT_32 log2ElementBytes = 0; log2ElementBytes < MaxNumElementBytes; log2ElementBytes++) + { + // Get bits per pixel + UINT_32 bpp = 1 << (log2ElementBytes + 3); + + // Loop all possible tile index + for (INT_32 tileIndex = 0; tileIndex < m_noOfEntries; tileIndex++) + { + UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX; + + AddrTileConfig tileConfig = m_tileTable[tileIndex]; + + ADDR_SURFACE_FLAGS flags = {{0}}; + + // Compute tile info, hardcode numSamples to 1 because MSAA is not supported + // in swizzle pattern equation + HwlComputeMacroModeIndex(tileIndex, flags, bpp, 1, &tileConfig.info, NULL, NULL); + + // Check if the input is supported + if (IsEquationSupported(bpp, tileConfig, tileIndex) == TRUE) + { + ADDR_EQUATION_KEY key = {{0}}; + + // Generate swizzle equation key from bpp and tile config + key.fields.log2ElementBytes = log2ElementBytes; + key.fields.tileMode = tileConfig.mode; + // Treat depth micro tile type and non-display micro tile type as the same key + // because they have the same equation actually + key.fields.microTileType = (tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) ? + ADDR_NON_DISPLAYABLE : tileConfig.type; + key.fields.pipeConfig = tileConfig.info.pipeConfig; + key.fields.numBanks = tileConfig.info.banks; + key.fields.bankWidth = tileConfig.info.bankWidth; + key.fields.bankHeight = tileConfig.info.bankHeight; + key.fields.macroAspectRatio = tileConfig.info.macroAspectRatio; + + // Find in the table if the equation has been built based on the key + for (UINT_32 i = 0; i < m_numEquations; i++) + { + if (key.value == equationKeyTable[i].value) + { + equationIndex = i; + break; + } + } + + // If found, just fill the index into the lookup table and no need + // to generate the equation again. Otherwise, generate the equation. + if (equationIndex == ADDR_INVALID_EQUATION_INDEX) + { + ADDR_EQUATION equation; + ADDR_E_RETURNCODE retCode; + + memset(&equation, 0, sizeof(ADDR_EQUATION)); + + // Generate the equation + if (IsMicroTiled(tileConfig.mode)) + { + retCode = ComputeMicroTileEquation(log2ElementBytes, + tileConfig.mode, + tileConfig.type, + &equation); + } + else + { + retCode = ComputeMacroTileEquation(log2ElementBytes, + tileConfig.mode, + tileConfig.type, + &tileConfig.info, + &equation); + } + // Only fill the equation into the table if the return code is ADDR_OK, + // otherwise if the return code is not ADDR_OK, it indicates this is not + // a valid input, we do nothing but just fill invalid equation index + // into the lookup table. + if (retCode == ADDR_OK) + { + equationIndex = m_numEquations; + ADDR_ASSERT(equationIndex < EquationTableSize); + + m_blockSlices[equationIndex] = Thickness(tileConfig.mode); + + if (IsMicroTiled(tileConfig.mode)) + { + m_blockWidth[equationIndex] = MicroTileWidth; + m_blockHeight[equationIndex] = MicroTileHeight; + } + else + { + const ADDR_TILEINFO* pTileInfo = &tileConfig.info; + + m_blockWidth[equationIndex] = + HwlGetPipes(pTileInfo) * MicroTileWidth * pTileInfo->bankWidth * + pTileInfo->macroAspectRatio; + m_blockHeight[equationIndex] = + MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks / + pTileInfo->macroAspectRatio; + + if (m_chipFamily == ADDR_CHIP_FAMILY_SI) + { + static const UINT_32 PrtTileSize = 0x10000; + + UINT_32 macroTileSize = + m_blockWidth[equationIndex] * m_blockHeight[equationIndex] * + bpp / 8; + + if (macroTileSize < PrtTileSize) + { + UINT_32 numMacroTiles = PrtTileSize / macroTileSize; + + ADDR_ASSERT(macroTileSize == (1u << equation.numBits)); + ADDR_ASSERT((PrtTileSize % macroTileSize) == 0); + + UINT_32 numBits = Log2(numMacroTiles); + + UINT_32 xStart = Log2(m_blockWidth[equationIndex]) + + log2ElementBytes; + + m_blockWidth[equationIndex] *= numMacroTiles; + + for (UINT_32 i = 0; i < numBits; i++) + { + equation.addr[equation.numBits + i].valid = 1; + equation.addr[equation.numBits + i].index = xStart + i; + } + + equation.numBits += numBits; + } + } + } + + equationKeyTable[equationIndex] = key; + m_equationTable[equationIndex] = equation; + + m_numEquations++; + } + } + } + + // Fill the index into the lookup table, if the combination is not supported + // fill the invalid equation index + m_equationLookupTable[log2ElementBytes][tileIndex] = equationIndex; + } + } +} + +/** +*************************************************************************************************** +* SiAddrLib::IsEquationSupported +* +* @brief +* Check if it is supported for given bpp and tile config to generate a equation. +* +* @return +* TRUE if supported +*************************************************************************************************** +*/ +BOOL_32 SiAddrLib::IsEquationSupported( + UINT_32 bpp, ///< Bits per pixel + AddrTileConfig tileConfig, ///< Tile config + INT_32 tileIndex ///< Tile index + ) const +{ + BOOL_32 supported = TRUE; + + // Linear tile mode is not supported in swizzle pattern equation + if (IsLinear(tileConfig.mode)) + { + supported = FALSE; + } + // These tile modes are for Tex2DArray and Tex3D which has depth (num_slice > 1) use, + // which is not supported in swizzle pattern equation due to slice rotation + else if ((tileConfig.mode == ADDR_TM_2D_TILED_THICK) || + (tileConfig.mode == ADDR_TM_2D_TILED_XTHICK) || + (tileConfig.mode == ADDR_TM_3D_TILED_THIN1) || + (tileConfig.mode == ADDR_TM_3D_TILED_THICK) || + (tileConfig.mode == ADDR_TM_3D_TILED_XTHICK)) + { + supported = FALSE; + } + // Only 8bpp(stencil), 16bpp and 32bpp is supported for depth + else if ((tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) && (bpp > 32)) + { + supported = FALSE; + } + // Tile split is not supported in swizzle pattern equation + else if (IsMacroTiled(tileConfig.mode)) + { + UINT_32 thickness = Thickness(tileConfig.mode); + if (((bpp >> 3) * MicroTilePixels * thickness) > tileConfig.info.tileSplitBytes) + { + supported = FALSE; + } + + if ((supported == TRUE) && (m_chipFamily == ADDR_CHIP_FAMILY_SI)) + { + // Please refer to SiAddrLib::HwlSetupTileInfo for PRT tile index selecting + // Tile index 3, 6, 21-25 are for PRT single sample + if (tileIndex == 3) + { + supported = (bpp == 16); + } + else if (tileIndex == 6) + { + supported = (bpp == 32); + } + else if ((tileIndex >= 21) && (tileIndex <= 25)) + { + supported = (bpp == 8u * (1u << (static_cast(tileIndex) - 21u))); + } + else + { + supported = FALSE; + } + } + } + + return supported; +} + + diff --git a/src/amd/addrlib/r800/siaddrlib.h b/src/amd/addrlib/r800/siaddrlib.h index 9201fb220fd..814cd0095db 100644 --- a/src/amd/addrlib/r800/siaddrlib.h +++ b/src/amd/addrlib/r800/siaddrlib.h @@ -42,7 +42,7 @@ * @brief Describes the information in tile mode table *************************************************************************************************** */ -struct ADDR_TILECONFIG +struct AddrTileConfig { AddrTileMode mode; AddrTileType type; @@ -131,6 +131,14 @@ protected: UINT_32 pitch, UINT_32 height, UINT_32 bpp, BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const; + virtual ADDR_E_RETURNCODE ComputeBankEquation( + UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, + ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const; + + virtual ADDR_E_RETURNCODE ComputePipeEquation( + UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, + ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const; + virtual UINT_32 ComputePipeFromCoord( UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode, UINT_32 pipeSwizzle, BOOL_32 ignoreSE, @@ -173,10 +181,7 @@ protected: virtual AddrTileMode HwlDegradeThickTileMode( AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const; - virtual BOOL_32 HwlOverrideTileMode( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - AddrTileMode* pTileMode, - AddrTileType* pTileType) const; + virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; virtual BOOL_32 HwlSanityCheckMacroTiled( ADDR_TILEINFO* pTileInfo) const @@ -229,6 +234,18 @@ protected: virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const; + // Get equation table pointer and number of equations + virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const + { + *ppEquationTable = m_equationTable; + + return m_numEquations; + } + + // Check if it is supported for given bpp and tile config to generate an equation + BOOL_32 IsEquationSupported( + UINT_32 bpp, AddrTileConfig tileConfig, INT_32 tileIndex) const; + // Protected non-virtual functions VOID ComputeTileCoordFromPipeAndElemIdx( UINT_32 elemIdx, UINT_32 pipe, AddrPipeCfg pipeCfg, UINT_32 pitchInMacroTile, @@ -241,19 +258,36 @@ protected: BOOL_32 DecodeGbRegs( const ADDR_REGISTER_VALUE* pRegValue); - const ADDR_TILECONFIG* GetTileSetting( + const AddrTileConfig* GetTileSetting( UINT_32 index) const; + // Initialize equation table + VOID InitEquationTable(); + static const UINT_32 TileTableSize = 32; - ADDR_TILECONFIG m_tileTable[TileTableSize]; + AddrTileConfig m_tileTable[TileTableSize]; UINT_32 m_noOfEntries; + // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp) + static const UINT_32 MaxNumElementBytes = 5; + // More than half slots in tile mode table can't support equation + static const UINT_32 EquationTableSize = (MaxNumElementBytes * TileTableSize) / 2; + // Equation table + ADDR_EQUATION m_equationTable[EquationTableSize]; + UINT_32 m_blockWidth[EquationTableSize]; + UINT_32 m_blockHeight[EquationTableSize]; + UINT_32 m_blockSlices[EquationTableSize]; + // Number of equation entries in the table + UINT_32 m_numEquations; + // Equation lookup table according to bpp and tile index + UINT_32 m_equationLookupTable[MaxNumElementBytes][TileTableSize]; + private: UINT_32 GetPipePerSurf(AddrPipeCfg pipeConfig) const; VOID ReadGbTileMode( - UINT_32 regValue, ADDR_TILECONFIG* pCfg) const; + UINT_32 regValue, AddrTileConfig* pCfg) const; BOOL_32 InitTileSettingTable( const UINT_32 *pSetting, UINT_32 noOfEntries);