From 3614999878fd1335e69ecb0d181a9f6d2b91e3f8 Mon Sep 17 00:00:00 2001 From: Xavi Zhang Date: Wed, 9 Jul 2014 02:46:00 -0400 Subject: [PATCH] amdgpu/addrlib: Rewrite tile mode optmization code Note: remove reference to degrade4Space and use opt4Space instead. --- src/amd/addrlib/addrinterface.h | 6 +-- src/amd/addrlib/core/addrcommon.h | 3 +- src/amd/addrlib/core/addrlib.cpp | 47 ++++++++++++------- src/amd/addrlib/core/addrlib.h | 2 +- src/amd/addrlib/r800/egbaddrlib.cpp | 16 +++++++ .../winsys/amdgpu/radv_amdgpu_surface.c | 5 +- .../winsys/amdgpu/drm/amdgpu_surface.c | 12 ++--- 7 files changed, 57 insertions(+), 34 deletions(-) diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h index d05c6ef08db..a50717c1ceb 100644 --- a/src/amd/addrlib/addrinterface.h +++ b/src/amd/addrlib/addrinterface.h @@ -246,9 +246,8 @@ typedef union _ADDR_CREATE_FLAGS UINT_32 useCombinedSwizzle : 1; ///< Use combined tile swizzle UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment - UINT_32 degradeBaseLevel : 1; ///< Degrade to 1D modes automatically for base level UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize - UINT_32 reserved : 24; ///< Reserved bits for future use + UINT_32 reserved : 25; ///< Reserved bits for future use }; UINT_32 value; @@ -440,7 +439,6 @@ typedef union _ADDR_SURFACE_FLAGS UINT_32 qbStereo : 1; ///< Quad buffer stereo surface UINT_32 pow2Pad : 1; ///< SI: Pad to pow2, must set for mipmap (include level0) UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding - UINT_32 degrade4Space : 1; ///< Degrade base level's tile mode to save memory UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable UINT_32 dispTileType : 1; ///< NI: force display Tiling for 128 bit shared resoruce UINT_32 dccCompatible : 1; ///< VI: whether to support dcc fast clear @@ -448,7 +446,7 @@ typedef union _ADDR_SURFACE_FLAGS /// This flag indicates we need to follow the alignment with /// CZ families or other ASICs under PX configuration + CZ. UINT_32 nonSplit : 1; ///< CI: depth texture should not be split - UINT_32 reserved : 9; ///< Reserved bits + UINT_32 reserved : 10; ///< Reserved bits }; UINT_32 value; diff --git a/src/amd/addrlib/core/addrcommon.h b/src/amd/addrlib/core/addrcommon.h index f996c9a3402..88cbad0b3ba 100644 --- a/src/amd/addrlib/core/addrcommon.h +++ b/src/amd/addrlib/core/addrcommon.h @@ -132,9 +132,8 @@ union ADDR_CONFIG_FLAGS UINT_32 useCombinedSwizzle : 1; ///< Use combined swizzle UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment - UINT_32 degradeBaseLevel : 1; ///< Degrade to 1D modes automatically for base level UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize - UINT_32 reserved : 22; ///< Reserved bits for future use + UINT_32 reserved : 23; ///< Reserved bits for future use }; UINT_32 value; diff --git a/src/amd/addrlib/core/addrlib.cpp b/src/amd/addrlib/core/addrlib.cpp index 8cf4a245229..b92568ec183 100644 --- a/src/amd/addrlib/core/addrlib.cpp +++ b/src/amd/addrlib/core/addrlib.cpp @@ -264,7 +264,6 @@ ADDR_E_RETURNCODE AddrLib::Create( pLib->m_configFlags.useCombinedSwizzle = pCreateIn->createFlags.useCombinedSwizzle; pLib->m_configFlags.checkLast2DLevel = pCreateIn->createFlags.checkLast2DLevel; pLib->m_configFlags.useHtileSliceAlign = pCreateIn->createFlags.useHtileSliceAlign; - pLib->m_configFlags.degradeBaseLevel = pCreateIn->createFlags.degradeBaseLevel; pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile; pLib->SetAddrChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision); @@ -559,8 +558,8 @@ ADDR_E_RETURNCODE AddrLib::ComputeSurfaceInfo( localIn.tileMode = tileMode; localIn.tileType = tileType; } - // Degrade base level if applicable - if (DegradeBaseLevel(&localIn, &tileMode)) + // Optimize tile mode if possible + if (OptimizeTileMode(&localIn, &tileMode)) { localIn.tileMode = tileMode; } @@ -3493,34 +3492,44 @@ VOID AddrLib::ComputeMipLevel( /** *************************************************************************************************** -* AddrLib::DegradeBaseLevel +* AddrLib::OptimizeTileMode * * @brief -* Check if base level's tile mode can be degraded +* Check if base level's tile mode can be optimized (degraded) * @return * TRUE if degraded, also returns degraded tile mode (unchanged if not degraded) *************************************************************************************************** */ -BOOL_32 AddrLib::DegradeBaseLevel( +BOOL_32 AddrLib::OptimizeTileMode( const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure for surface info AddrTileMode* pTileMode ///< [out] Degraded tile mode ) const { - BOOL_32 degraded = FALSE; AddrTileMode tileMode = pIn->tileMode; UINT_32 thickness = ComputeSurfaceThickness(tileMode); - if (m_configFlags.degradeBaseLevel) // This is a global setting + // Optimization can only be done on level 0 and samples <= 1 + if ((pIn->flags.opt4Space == TRUE) && + (pIn->mipLevel == 0) && + (pIn->numSamples <= 1) && + (pIn->flags.display == FALSE) && + (IsPrtTileMode(tileMode) == FALSE) && + (pIn->flags.prt == FALSE)) { - if (pIn->flags.degrade4Space && // Degradation per surface - pIn->mipLevel == 0 && - pIn->numSamples == 1 && - IsMacroTiled(tileMode)) + // Check if linear mode is optimal + if ((pIn->height == 1) && + (IsLinear(tileMode) == FALSE) && + (AddrElemLib::IsBlockCompressed(pIn->format) == FALSE) && + (pIn->flags.depth == FALSE) && + (pIn->flags.stencil == FALSE)) + { + tileMode = ADDR_TM_LINEAR_ALIGNED; + } + else if (IsMacroTiled(tileMode)) { if (HwlDegradeBaseLevel(pIn)) { - *pTileMode = thickness == 1 ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK; - degraded = TRUE; + tileMode = (thickness == 1) ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK; } else if (thickness > 1) { @@ -3534,15 +3543,19 @@ BOOL_32 AddrLib::DegradeBaseLevel( input.tileMode = tileMode; if (HwlDegradeBaseLevel(&input)) { - *pTileMode = ADDR_TM_1D_TILED_THICK; - degraded = TRUE; + tileMode = ADDR_TM_1D_TILED_THICK; } } } } } - return degraded; + BOOL_32 optimized = (tileMode != pIn->tileMode); + if (optimized) + { + *pTileMode = tileMode; + } + return optimized; } /** diff --git a/src/amd/addrlib/core/addrlib.h b/src/amd/addrlib/core/addrlib.h index 43c55ff32ff..d693fd2bcbf 100644 --- a/src/amd/addrlib/core/addrlib.h +++ b/src/amd/addrlib/core/addrlib.h @@ -652,7 +652,7 @@ private: VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels); - BOOL_32 DegradeBaseLevel( + BOOL_32 OptimizeTileMode( const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, AddrTileMode* pTileMode) const; protected: diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp b/src/amd/addrlib/r800/egbaddrlib.cpp index abd1a79ed80..5d80906aea3 100644 --- a/src/amd/addrlib/r800/egbaddrlib.cpp +++ b/src/amd/addrlib/r800/egbaddrlib.cpp @@ -1158,6 +1158,22 @@ BOOL_32 EgBasedAddrLib::HwlDegradeBaseLevel( if (valid) { degrade = (pIn->width < pitchAlign || pIn->height < heightAlign); + // Check whether 2D tiling still has too much footprint + if (degrade == FALSE) + { + // Only check width and height as slices are aligned to thickness + UINT_64 unalignedSize = pIn->width * pIn->height; + + UINT_32 alignedPitch = PowTwoAlign(pIn->width, pitchAlign); + UINT_32 alignedHeight = PowTwoAlign(pIn->height, heightAlign); + UINT_64 alignedSize = alignedPitch * alignedHeight; + + // alignedSize > 1.5 * unalignedSize + if (2 * alignedSize > 3 * unalignedSize) + { + degrade = TRUE; + } + } } else { diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c index 89e84d60a3a..0433952e749 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c @@ -140,7 +140,6 @@ ADDR_HANDLE radv_amdgpu_addr_create(struct amdgpu_gpu_info *amdinfo, int family, createFlags.value = 0; createFlags.useTileIndex = 1; - createFlags.degradeBaseLevel = 1; addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND; addrCreateInput.chipFamily = family; @@ -398,7 +397,7 @@ static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws, AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP; AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0; AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0; - AddrSurfInfoIn.flags.degrade4Space = 1; + AddrSurfInfoIn.flags.opt4Space = 1; /* DCC notes: * - If we add MSAA support, keep in mind that CB can't decompress 8bpp @@ -437,7 +436,7 @@ static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws, AddrTileInfoIn.macroAspectRatio = surf->mtilea; AddrTileInfoIn.tileSplitBytes = surf->tile_split; AddrTileInfoIn.pipeConfig = surf->pipe_config + 1; /* +1 compared to GB_TILE_MODE */ - AddrSurfInfoIn.flags.degrade4Space = 0; + AddrSurfInfoIn.flags.opt4Space = 0; AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn; /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c index abe2b2a67af..8632f0687fc 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c @@ -124,7 +124,6 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws) createFlags.value = 0; createFlags.useTileIndex = 1; - createFlags.degradeBaseLevel = 1; createFlags.useHtileSliceAlign = 1; addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND; @@ -401,11 +400,10 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been * requested, because TC-compatible HTILE requires 2D tiling. */ - AddrSurfInfoIn.flags.degrade4Space = !AddrSurfInfoIn.flags.tcCompatible && - !AddrSurfInfoIn.flags.fmask && - tex->nr_samples <= 1 && - (flags & RADEON_SURF_OPTIMIZE_FOR_SPACE); - AddrSurfInfoIn.flags.opt4Space = AddrSurfInfoIn.flags.degrade4Space; + AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible && + !AddrSurfInfoIn.flags.fmask && + tex->nr_samples <= 1 && + (flags & RADEON_SURF_OPTIMIZE_FOR_SPACE); /* DCC notes: * - If we add MSAA support, keep in mind that CB can't decompress 8bpp @@ -447,7 +445,7 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, AddrTileInfoIn.macroAspectRatio = surf->mtilea; AddrTileInfoIn.tileSplitBytes = surf->tile_split; AddrTileInfoIn.pipeConfig = surf->pipe_config + 1; /* +1 compared to GB_TILE_MODE */ - AddrSurfInfoIn.flags.degrade4Space = 0; + AddrSurfInfoIn.flags.opt4Space = 0; AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn; /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set -- 2.30.2