From 982d99490f228aa03ad3e06db9149f7baf1a0a58 Mon Sep 17 00:00:00 2001 From: Jan Zielinski Date: Tue, 30 Jul 2019 15:33:53 +0200 Subject: [PATCH] swr/rasterizer: do not mark tiles dirty until actually rendered Reviewed-by: Alok Hota --- src/gallium/drivers/swr/rasterizer/core/api.h | 1 + .../drivers/swr/rasterizer/core/backend.cpp | 2 +- .../swr/rasterizer/core/backend_impl.h | 24 +++++++++++++++++++ .../swr/rasterizer/core/backend_sample.cpp | 12 ++++++++++ .../rasterizer/core/backend_singlesample.cpp | 13 ++++++++++ .../drivers/swr/rasterizer/core/context.h | 5 ++++ .../swr/rasterizer/core/rasterizer_impl.h | 4 +++- .../drivers/swr/rasterizer/core/tilemgr.cpp | 4 ++-- .../drivers/swr/rasterizer/core/tilemgr.h | 3 ++- .../drivers/swr/rasterizer/jitter/builder.cpp | 3 ++- .../drivers/swr/rasterizer/jitter/builder.h | 1 + .../swr/rasterizer/jitter/builder_mem.cpp | 5 +++- .../swr/rasterizer/jitter/builder_mem.h | 3 ++- 13 files changed, 72 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index 4d523bb346d..e1ba893296e 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -588,6 +588,7 @@ SWR_FUNC(void, uint32_t threadGroupCountY, uint32_t threadGroupCountZ); +/// @note this enum needs to be kept in sync with HOTTILE_STATE! enum SWR_TILE_STATE { SWR_TILE_INVALID = 0, // tile is in unitialized state and should be loaded with surface contents diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index 8cf50879726..49b163ee6ae 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -185,7 +185,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT* pDC, pHotTile->pBuffer); } - if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_RESOLVED) + if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_RESOLVED) { if (!(pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY && pHotTile->state == HOTTILE_RESOLVED)) diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h index ad6b78a665f..9e74e2cee8e 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h @@ -28,6 +28,8 @@ ******************************************************************************/ #pragma once +#include "tilemgr.h" + void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COUNT][2][2]); void InitBackendSampleFuncTable( PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2]); @@ -624,6 +626,19 @@ inline void SetupRenderBuffers(uint8_t* pColorBuffer[SWR_NUM_RENDERT } } +INLINE void SetRenderHotTilesDirty(DRAW_CONTEXT* pDC, RenderOutputBuffers& renderBuffers) +{ + const API_STATE& state = GetApiState(pDC); + + unsigned long rtSlot = 0; + uint32_t colorHottileEnableMask = state.colorHottileEnable; + while (_BitScanForward(&rtSlot, colorHottileEnableMask)) + { + colorHottileEnableMask &= ~(1 << rtSlot); + renderBuffers.pColorHotTile[rtSlot]->state = HOTTILE_DIRTY; + } +} + template void SetupPixelShaderContext(SWR_PS_CONTEXT* psContext, const SWR_MULTISAMPLE_POS& samplePos, @@ -1029,6 +1044,8 @@ void BackendPixelRate(DRAW_CONTEXT* pDC, state.colorHottileEnable, renderBuffers); + bool isTileDirty = false; + RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0); PixelRateZTestLoop PixelRateZTest(pDC, @@ -1139,6 +1156,8 @@ void BackendPixelRate(DRAW_CONTEXT* pDC, goto Endtile; }; + isTileDirty = true; + // late-Z if (!T::bCanEarlyZ && !T::bForcedSampleCount) { @@ -1252,6 +1271,11 @@ void BackendPixelRate(DRAW_CONTEXT* pDC, psContext.vY.center = _simd_add_ps(psContext.vY.center, dy); } + if (isTileDirty) + { + SetRenderHotTilesDirty(pDC, renderBuffers); + } + RDTSC_END(pDC->pContext->pBucketMgr, BEPixelRateBackend, 0); } diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp index 04e5e3d58bc..39e078bf401 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp @@ -65,6 +65,8 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, state.colorHottileEnable, renderBuffers); + bool isTileDirty = false; + RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0); psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast(y))); @@ -211,6 +213,11 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, vCoverageMask = _simd_castsi_ps(psContext.activeMask); + if (_simd_movemask_ps(vCoverageMask)) + { + isTileDirty = true; + } + // late-Z if (!T::bCanEarlyZ) { @@ -319,6 +326,11 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, psContext.vY.center = _simd_add_ps(psContext.vY.center, dy); } + if (isTileDirty) + { + SetRenderHotTilesDirty(pDC, renderBuffers); + } + RDTSC_END(pDC->pContext->pBucketMgr, BESampleRateBackend, 0); } diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp index 2b868269477..01cb26bb253 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp @@ -66,6 +66,9 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, state.colorHottileEnable, renderBuffers); + // Indicates backend rendered something to the color buffer + bool isTileDirty = false; + RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 1); psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast(y))); @@ -192,6 +195,11 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, vCoverageMask = _simd_castsi_ps(psContext.activeMask); + if (_simd_movemask_ps(vCoverageMask)) + { + isTileDirty = true; + } + // late-Z if (!T::bCanEarlyZ) { @@ -301,6 +309,11 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, psContext.vY.center = _simd_add_ps(psContext.vY.center, dy); } + if (isTileDirty) + { + SetRenderHotTilesDirty(pDC, renderBuffers); + } + RDTSC_END(pDC->pContext->pBucketMgr, BESingleSampleBackend, 0); } diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index d17baea94a0..5a8656dcfba 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -330,12 +330,17 @@ OSALIGNLINE(struct) API_STATE class MacroTileMgr; class DispatchQueue; +class HOTTILE; struct RenderOutputBuffers { uint8_t* pColor[SWR_NUM_RENDERTARGETS]; uint8_t* pDepth; uint8_t* pStencil; + + HOTTILE* pColorHotTile[SWR_NUM_RENDERTARGETS]; + HOTTILE* pDepthHotTile; + HOTTILE* pStencilHotTile; }; // Plane equation A/B/C coeffs used to evaluate I/J barycentric coords diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h index 4a0fd0934b0..b81baa98191 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h @@ -1459,8 +1459,8 @@ void GetRenderHotTiles(DRAW_CONTEXT* pDC, true, numSamples, renderTargetArrayIndex); - pColor->state = HOTTILE_DIRTY; renderBuffers.pColor[rtSlot] = pColor->pBuffer + offset; + renderBuffers.pColorHotTile[rtSlot] = pColor; colorHottileEnableMask &= ~(1 << rtSlot); } @@ -1483,6 +1483,7 @@ void GetRenderHotTiles(DRAW_CONTEXT* pDC, pDepth->state = HOTTILE_DIRTY; SWR_ASSERT(pDepth->pBuffer != nullptr); renderBuffers.pDepth = pDepth->pBuffer + offset; + renderBuffers.pDepthHotTile = pDepth; } if (state.stencilHottileEnable) { @@ -1503,6 +1504,7 @@ void GetRenderHotTiles(DRAW_CONTEXT* pDC, pStencil->state = HOTTILE_DIRTY; SWR_ASSERT(pStencil->pBuffer != nullptr); renderBuffers.pStencil = pStencil->pBuffer + offset; + renderBuffers.pStencilHotTile = pStencil; } } diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp index 13f4e37f027..a23de56a0a5 100644 --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp @@ -204,7 +204,7 @@ HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, hotTile.pBuffer); hotTile.renderTargetArrayIndex = renderTargetArrayIndex; - hotTile.state = HOTTILE_DIRTY; + hotTile.state = HOTTILE_RESOLVED; } } return &tile.Attachment[attachment]; @@ -378,7 +378,7 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); - pHotTile->state = HOTTILE_DIRTY; + pHotTile->state = HOTTILE_RESOLVED; RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0); } else if (pHotTile->state == HOTTILE_CLEAR) diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h index ea6e28c4b05..8e3b41ea1e7 100644 --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h @@ -233,13 +233,14 @@ public: OSALIGNLINE(volatile long) mTasksOutstanding{0}; }; +/// @note this enum needs to be kept in sync with SWR_TILE_STATE! enum HOTTILE_STATE { HOTTILE_INVALID, // tile is in unitialized state and should be loaded with surface contents // before rendering HOTTILE_CLEAR, // tile should be cleared HOTTILE_DIRTY, // tile has been rendered to - HOTTILE_RESOLVED, // tile has been stored to memory + HOTTILE_RESOLVED, // tile is consistent with memory (either loaded or stored) }; struct HOTTILE diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp index ef95e0103f8..53f11d66db1 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp @@ -56,10 +56,11 @@ namespace SwrJit mInt8Ty = Type::getInt8Ty(pJitMgr->mContext); mInt16Ty = Type::getInt16Ty(pJitMgr->mContext); mInt32Ty = Type::getInt32Ty(pJitMgr->mContext); + mInt64Ty = Type::getInt64Ty(pJitMgr->mContext); mInt8PtrTy = PointerType::get(mInt8Ty, 0); mInt16PtrTy = PointerType::get(mInt16Ty, 0); mInt32PtrTy = PointerType::get(mInt32Ty, 0); - mInt64Ty = Type::getInt64Ty(pJitMgr->mContext); + mInt64PtrTy = PointerType::get(mInt64Ty, 0); mSimd4FP64Ty = VectorType::get(mDoubleTy, 4); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.h b/src/gallium/drivers/swr/rasterizer/jitter/builder.h index d252482a1d9..97550fad23d 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.h @@ -91,6 +91,7 @@ namespace SwrJit Type* mInt8PtrTy; Type* mInt16PtrTy; Type* mInt32PtrTy; + Type* mInt64PtrTy; Type* mSimd4FP64Ty; diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp index b32686c7583..3ec2cb32522 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp @@ -646,7 +646,10 @@ namespace SwrJit Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage) { AssertMemoryUsageParams(pDst, usage); - +// if (vSrc->getType() != mSimdFP32Ty) +// { +// vSrc = BITCAST(vSrc, mSimdFP32Ty); +// } SWR_ASSERT(vSrc->getType()->getVectorElementType()->isFloatTy()); VSCATTERPS(pDst, vMask, vOffsets, vSrc, C(1)); return; diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h index 49e132e3756..e548d8dd138 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h @@ -36,7 +36,8 @@ enum class MEM_CLIENT GFX_MEM_CLIENT_FETCH, GFX_MEM_CLIENT_SAMPLER, GFX_MEM_CLIENT_SHADER, - GFX_MEM_CLIENT_STREAMOUT + GFX_MEM_CLIENT_STREAMOUT, + GFX_MEM_CLIENT_URB }; protected: -- 2.30.2