From 0767e820fd96e8bac2943fa8942bea3ff81b8bd9 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Tue, 22 Mar 2016 17:28:06 -0600 Subject: [PATCH] swr: [rasterizer core] Fix Compute workitem retirement --- .../drivers/swr/rasterizer/core/api.cpp | 37 +++++++------------ .../drivers/swr/rasterizer/core/arena.h | 7 +--- .../drivers/swr/rasterizer/core/threads.cpp | 7 ++-- .../drivers/swr/rasterizer/core/threads.h | 2 + 4 files changed, 22 insertions(+), 31 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 591342239d3..7ca182242e5 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -160,20 +160,12 @@ void WakeAllThreads(SWR_CONTEXT *pContext) template void QueueWork(SWR_CONTEXT *pContext) { - if (IsDraw) - { - // Each worker thread looks at a DC for both FE and BE work at different times and so we - // multiply threadDone by 2. When the threadDone counter has reached 0 then all workers - // have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and - // then moved on if all work is done.) - pContext->pCurDrawContext->threadsDone = - pContext->NumWorkerThreads ? pContext->NumWorkerThreads * 2 : 2; - } - else - { - pContext->pCurDrawContext->threadsDone = - pContext->NumWorkerThreads ? pContext->NumWorkerThreads : 1; - } + // Each worker thread looks at a DC for both FE and BE work at different times and so we + // multiply threadDone by 2. When the threadDone counter has reached 0 then all workers + // have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and + // then moved on if all work is done.) + pContext->pCurDrawContext->threadsDone = + pContext->NumWorkerThreads ? pContext->NumWorkerThreads * 2 : 2; _ReadWriteBarrier(); { @@ -201,10 +193,7 @@ void QueueWork(SWR_CONTEXT *pContext) } // Dequeue the work here, if not already done, since we're single threaded (i.e. no workers). - if (!pContext->dcRing.IsEmpty()) - { - pContext->dcRing.Dequeue(); - } + while (CompleteDrawContext(pContext, pContext->pCurDrawContext) > 0) {} // restore csr _mm_setcsr(mxcsr); @@ -252,8 +241,6 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) uint32_t dsIndex = pContext->curStateId % KNOB_MAX_DRAWS_IN_FLIGHT; pCurDrawContext->pState = &pContext->dsRing[dsIndex]; - auto& stateArena = *(pCurDrawContext->pState->pArena); - // Copy previous state to current state. if (pContext->pPrevDrawContext) { @@ -266,7 +253,9 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) { CopyState(*pCurDrawContext->pState, *pPrevDrawContext->pState); - stateArena.Reset(true); // Reset memory. + // Should have been cleaned up previously + SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true); + pCurDrawContext->pState->pPrivateState = nullptr; pContext->curStateId++; // Progress state ring index forward. @@ -276,16 +265,18 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) // If its a split draw then just copy the state pointer over // since its the same draw. pCurDrawContext->pState = pPrevDrawContext->pState; + SWR_ASSERT(pPrevDrawContext->cleanupState == false); } } else { - stateArena.Reset(); // Reset memory. + SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true); pContext->curStateId++; // Progress state ring index forward. } + SWR_ASSERT(pCurDrawContext->pArena->IsEmpty() == true); + pCurDrawContext->dependency = 0; - pCurDrawContext->pArena->Reset(); pCurDrawContext->pContext = pContext; pCurDrawContext->isCompute = false; // Dispatch has to set this to true. diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h b/src/gallium/drivers/swr/rasterizer/core/arena.h index d777c20a4ee..67d81a44347 100644 --- a/src/gallium/drivers/swr/rasterizer/core/arena.h +++ b/src/gallium/drivers/swr/rasterizer/core/arena.h @@ -51,10 +51,7 @@ public: } }; -static const size_t ARENA_BLOCK_SHIFT = 5; -static const size_t ARENA_BLOCK_ALIGN = KNOB_SIMD_WIDTH * 4; -static_assert((1U << ARENA_BLOCK_SHIFT) == ARENA_BLOCK_ALIGN, - "Invalid value for ARENA_BLOCK_ALIGN/SHIFT"); +static const size_t ARENA_BLOCK_ALIGN = 64; struct ArenaBlock { @@ -65,7 +62,7 @@ static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, "Increase BLOCK_ALIGN size"); // Caching Allocator for Arena -template +template struct CachingAllocatorT : DefaultAllocator { static uint32_t GetBucketId(size_t blockSize) diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index 521a306b96e..845e28ea497 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -279,11 +279,10 @@ bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint64_t lastReti return (pDC->dependency > lastRetiredDraw); } - - -INLINE void CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) +INLINE int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) { int64_t result = InterlockedDecrement64(&pDC->threadsDone); + SWR_ASSERT(result >= 0); if (result == 0) { @@ -299,6 +298,8 @@ INLINE void CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) pContext->dcRing.Dequeue(); // Remove from tail } + + return result; } INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint64_t& curDrawBE) diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.h b/src/gallium/drivers/swr/rasterizer/core/threads.h index 6b37e3ac179..6cc8c96a00f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.h +++ b/src/gallium/drivers/swr/rasterizer/core/threads.h @@ -34,6 +34,7 @@ typedef std::thread* THREAD_PTR; struct SWR_CONTEXT; +struct DRAW_CONTEXT; struct THREAD_DATA { @@ -63,3 +64,4 @@ void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool); void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE, int numaNode); void WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE, TileSet &usedTiles); void WorkOnCompute(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE); +int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC); \ No newline at end of file -- 2.30.2