swr/rasterizer: do not mark tiles dirty until actually rendered
authorJan Zielinski <jan.zielinski@intel.com>
Tue, 30 Jul 2019 13:33:53 +0000 (15:33 +0200)
committerJan Zielinski <jan.zielinski@intel.com>
Thu, 8 Aug 2019 08:16:20 +0000 (10:16 +0200)
Reviewed-by: Alok Hota <alok.hota@intel.com>
13 files changed:
src/gallium/drivers/swr/rasterizer/core/api.h
src/gallium/drivers/swr/rasterizer/core/backend.cpp
src/gallium/drivers/swr/rasterizer/core/backend_impl.h
src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
src/gallium/drivers/swr/rasterizer/core/context.h
src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h
src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
src/gallium/drivers/swr/rasterizer/core/tilemgr.h
src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
src/gallium/drivers/swr/rasterizer/jitter/builder.h
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h

index 4d523bb346d73ea98c692aac8eda5d20d032aedf..e1ba893296e9427cc02324dd04a88d918de68511 100644 (file)
@@ -588,6 +588,7 @@ SWR_FUNC(void,
          uint32_t threadGroupCountY,
          uint32_t threadGroupCountZ);
 
+/// @note this enum needs to be kept in sync with HOTTILE_STATE!
 enum SWR_TILE_STATE
 {
     SWR_TILE_INVALID = 0, // tile is in unitialized state and should be loaded with surface contents
index 8cf508797260d99b994cf00434379e3831ff6cbe..49b163ee6aed8a0190ce0b1878780189db4216de 100644 (file)
@@ -185,7 +185,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT*               pDC,
                                    pHotTile->pBuffer);
         }
 
-        if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_RESOLVED)
+        if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_RESOLVED) 
         {
             if (!(pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY &&
                   pHotTile->state == HOTTILE_RESOLVED))
index ad6b78a665f4816a849176a70d726971beebdaa8..9e74e2cee8e2e826becd2764d4df31671e63fcc3 100644 (file)
@@ -28,6 +28,8 @@
  ******************************************************************************/
 #pragma once
 
+#include "tilemgr.h"
+
 void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COUNT][2][2]);
 void InitBackendSampleFuncTable(
     PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2]);
@@ -624,6 +626,19 @@ inline void SetupRenderBuffers(uint8_t*             pColorBuffer[SWR_NUM_RENDERT
     }
 }
 
+INLINE void SetRenderHotTilesDirty(DRAW_CONTEXT* pDC, RenderOutputBuffers& renderBuffers)
+{
+    const API_STATE& state = GetApiState(pDC);
+
+    unsigned long rtSlot                 = 0;
+    uint32_t      colorHottileEnableMask = state.colorHottileEnable;
+    while (_BitScanForward(&rtSlot, colorHottileEnableMask))
+    {
+        colorHottileEnableMask &= ~(1 << rtSlot);
+        renderBuffers.pColorHotTile[rtSlot]->state = HOTTILE_DIRTY;
+    }
+}
+
 template <typename T>
 void SetupPixelShaderContext(SWR_PS_CONTEXT*            psContext,
                              const SWR_MULTISAMPLE_POS& samplePos,
@@ -1029,6 +1044,8 @@ void BackendPixelRate(DRAW_CONTEXT*        pDC,
                        state.colorHottileEnable,
                        renderBuffers);
 
+    bool isTileDirty = false;
+
     RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0);
 
     PixelRateZTestLoop<T> PixelRateZTest(pDC,
@@ -1139,6 +1156,8 @@ void BackendPixelRate(DRAW_CONTEXT*        pDC,
                 goto Endtile;
             };
 
+            isTileDirty = true;
+
             // late-Z
             if (!T::bCanEarlyZ && !T::bForcedSampleCount)
             {
@@ -1252,6 +1271,11 @@ void BackendPixelRate(DRAW_CONTEXT*        pDC,
         psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
     }
 
+    if (isTileDirty)
+    {
+        SetRenderHotTilesDirty(pDC, renderBuffers);
+    }
+
     RDTSC_END(pDC->pContext->pBucketMgr, BEPixelRateBackend, 0);
 }
 
index 04e5e3d58bc23454f438d862a2ae4bb58a39cd32..39e078bf401726d12c7cc7900d7cea242e013d44 100644 (file)
@@ -65,6 +65,8 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
                        state.colorHottileEnable,
                        renderBuffers);
 
+    bool isTileDirty = false;
+
     RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0);
 
     psContext.vY.UL     = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
@@ -211,6 +213,11 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
 
                     vCoverageMask = _simd_castsi_ps(psContext.activeMask);
 
+                    if (_simd_movemask_ps(vCoverageMask))
+                    {
+                        isTileDirty = true;
+                    }
+
                     // late-Z
                     if (!T::bCanEarlyZ)
                     {
@@ -319,6 +326,11 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
         psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
     }
 
+    if (isTileDirty)
+    {
+        SetRenderHotTilesDirty(pDC, renderBuffers);
+    }
+
     RDTSC_END(pDC->pContext->pBucketMgr, BESampleRateBackend, 0);
 }
 
index 2b868269477955af32af3ab77db420761596b241..01cb26bb2533eddc4ec2f33962e7a8c60b71aee0 100644 (file)
@@ -66,6 +66,9 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
                        state.colorHottileEnable,
                        renderBuffers);
 
+    // Indicates backend rendered something to the color buffer
+    bool isTileDirty = false;
+
     RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 1);
 
     psContext.vY.UL     = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
@@ -192,6 +195,11 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
 
                 vCoverageMask = _simd_castsi_ps(psContext.activeMask);
 
+                if (_simd_movemask_ps(vCoverageMask))
+                {
+                    isTileDirty = true;
+                }
+
                 // late-Z
                 if (!T::bCanEarlyZ)
                 {
@@ -301,6 +309,11 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
         psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
     }
 
+    if (isTileDirty)
+    {
+        SetRenderHotTilesDirty(pDC, renderBuffers);
+    }
+
     RDTSC_END(pDC->pContext->pBucketMgr, BESingleSampleBackend, 0);
 }
 
index d17baea94a08f779d7e0008985b9c3713914e56a..5a8656dcfba67a42e08c1a301f2819dc1486f849 100644 (file)
@@ -330,12 +330,17 @@ OSALIGNLINE(struct) API_STATE
 
 class MacroTileMgr;
 class DispatchQueue;
+class HOTTILE;
 
 struct RenderOutputBuffers
 {
     uint8_t* pColor[SWR_NUM_RENDERTARGETS];
     uint8_t* pDepth;
     uint8_t* pStencil;
+
+    HOTTILE* pColorHotTile[SWR_NUM_RENDERTARGETS];
+    HOTTILE* pDepthHotTile;
+    HOTTILE* pStencilHotTile;
 };
 
 // Plane equation A/B/C coeffs used to evaluate I/J barycentric coords
index 4a0fd0934b0525f898fb617c22cb5a4422291c1f..b81baa9819134f9be8fe3485e2897226ded32cbb 100644 (file)
@@ -1459,8 +1459,8 @@ void GetRenderHotTiles(DRAW_CONTEXT*        pDC,
             true,
             numSamples,
             renderTargetArrayIndex);
-        pColor->state                = HOTTILE_DIRTY;
         renderBuffers.pColor[rtSlot] = pColor->pBuffer + offset;
+        renderBuffers.pColorHotTile[rtSlot] = pColor;
 
         colorHottileEnableMask &= ~(1 << rtSlot);
     }
@@ -1483,6 +1483,7 @@ void GetRenderHotTiles(DRAW_CONTEXT*        pDC,
         pDepth->state   = HOTTILE_DIRTY;
         SWR_ASSERT(pDepth->pBuffer != nullptr);
         renderBuffers.pDepth = pDepth->pBuffer + offset;
+        renderBuffers.pDepthHotTile = pDepth;
     }
     if (state.stencilHottileEnable)
     {
@@ -1503,6 +1504,7 @@ void GetRenderHotTiles(DRAW_CONTEXT*        pDC,
         pStencil->state   = HOTTILE_DIRTY;
         SWR_ASSERT(pStencil->pBuffer != nullptr);
         renderBuffers.pStencil = pStencil->pBuffer + offset;
+        renderBuffers.pStencilHotTile = pStencil;
     }
 }
 
index 13f4e37f027f1372769563fea47b3d7ed94e84c8..a23de56a0a5d9878bc27017bb5218318da4a8d1d 100644 (file)
@@ -204,7 +204,7 @@ HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT*                pContext,
                                   hotTile.pBuffer);
 
             hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
-            hotTile.state                  = HOTTILE_DIRTY;
+            hotTile.state = HOTTILE_RESOLVED;
         }
     }
     return &tile.Attachment[attachment];
@@ -378,7 +378,7 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT*  pContext,
                                   y,
                                   pHotTile->renderTargetArrayIndex,
                                   pHotTile->pBuffer);
-            pHotTile->state = HOTTILE_DIRTY;
+            pHotTile->state = HOTTILE_RESOLVED;
             RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
         }
         else if (pHotTile->state == HOTTILE_CLEAR)
index ea6e28c4b05d0a59529585d2d8c9fc497fd08345..8e3b41ea1e743686d1df263e57d9ee15281a6e9e 100644 (file)
@@ -233,13 +233,14 @@ public:
     OSALIGNLINE(volatile long) mTasksOutstanding{0};
 };
 
+/// @note this enum needs to be kept in sync with SWR_TILE_STATE!
 enum HOTTILE_STATE
 {
     HOTTILE_INVALID,  // tile is in unitialized state and should be loaded with surface contents
                       // before rendering
     HOTTILE_CLEAR,    // tile should be cleared
     HOTTILE_DIRTY,    // tile has been rendered to
-    HOTTILE_RESOLVED, // tile has been stored to memory
+    HOTTILE_RESOLVED, // tile is consistent with memory (either loaded or stored)
 };
 
 struct HOTTILE
index ef95e0103f8e0881013cc21a2b74aec761d43fe0..53f11d66db1932e195904776c193d9aef51efa7b 100644 (file)
@@ -56,10 +56,11 @@ namespace SwrJit
         mInt8Ty     = Type::getInt8Ty(pJitMgr->mContext);
         mInt16Ty    = Type::getInt16Ty(pJitMgr->mContext);
         mInt32Ty    = Type::getInt32Ty(pJitMgr->mContext);
+        mInt64Ty    = Type::getInt64Ty(pJitMgr->mContext);
         mInt8PtrTy  = PointerType::get(mInt8Ty, 0);
         mInt16PtrTy = PointerType::get(mInt16Ty, 0);
         mInt32PtrTy = PointerType::get(mInt32Ty, 0);
-        mInt64Ty    = Type::getInt64Ty(pJitMgr->mContext);
+        mInt64PtrTy = PointerType::get(mInt64Ty, 0);
 
         mSimd4FP64Ty = VectorType::get(mDoubleTy, 4);
 
index d252482a1d9b037a294c267448b4793cd5810a64..97550fad23d0f82a24b1243cefe619ff721168c4 100644 (file)
@@ -91,6 +91,7 @@ namespace SwrJit
         Type* mInt8PtrTy;
         Type* mInt16PtrTy;
         Type* mInt32PtrTy;
+        Type* mInt64PtrTy;
 
         Type* mSimd4FP64Ty;
 
index b32686c7583d0e4a23819d8777e9b788d2a69bd3..3ec2cb32522131120ced68e9cb68f2af899ef149 100644 (file)
@@ -646,7 +646,10 @@ namespace SwrJit
         Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
     {
         AssertMemoryUsageParams(pDst, usage);
-
+//        if (vSrc->getType() != mSimdFP32Ty)
+//        {
+//            vSrc = BITCAST(vSrc, mSimdFP32Ty);
+//        }                                               
         SWR_ASSERT(vSrc->getType()->getVectorElementType()->isFloatTy());
         VSCATTERPS(pDst, vMask, vOffsets, vSrc, C(1));
         return;
index 49e132e3756537164ff99417135aea710e652fae..e548d8dd1381453f966e667972532fcee14a7fcb 100644 (file)
@@ -36,7 +36,8 @@ enum class MEM_CLIENT
     GFX_MEM_CLIENT_FETCH,
     GFX_MEM_CLIENT_SAMPLER,
     GFX_MEM_CLIENT_SHADER,
-    GFX_MEM_CLIENT_STREAMOUT
+    GFX_MEM_CLIENT_STREAMOUT,
+    GFX_MEM_CLIENT_URB
 };
 
 protected: