From: Tim Rowley Date: Tue, 17 Oct 2017 20:02:53 +0000 (-0500) Subject: swr/rast: Add api to override draws in flight X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=028ffa5e1820707ef0cab52853e36a259b00c849;p=mesa.git swr/rast: Add api to override draws in flight Allow draws in flight to be overridden via SWR_CREATECONTEXT_INFO. Patch by Jan Zielinski. Reviewed-by: Bruce Cherniak --- diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 632309821f8..20eeb296815 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -74,13 +74,19 @@ HANDLE SwrCreateContext( pContext->privateStateSize = pCreateInfo->privateStateSize; - pContext->dcRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT); - pContext->dsRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT); + pContext->MAX_DRAWS_IN_FLIGHT = KNOB_MAX_DRAWS_IN_FLIGHT; + if (pCreateInfo->MAX_DRAWS_IN_FLIGHT != 0) + { + pContext->MAX_DRAWS_IN_FLIGHT = pCreateInfo->MAX_DRAWS_IN_FLIGHT; + } + + pContext->dcRing.Init(pContext->MAX_DRAWS_IN_FLIGHT); + pContext->dsRing.Init(pContext->MAX_DRAWS_IN_FLIGHT); - pContext->pMacroTileManagerArray = (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * KNOB_MAX_DRAWS_IN_FLIGHT, 64); - pContext->pDispatchQueueArray = (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * KNOB_MAX_DRAWS_IN_FLIGHT, 64); + pContext->pMacroTileManagerArray = (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * pContext->MAX_DRAWS_IN_FLIGHT, 64); + pContext->pDispatchQueueArray = (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * pContext->MAX_DRAWS_IN_FLIGHT, 64); - for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc) + for (uint32_t dc = 0; dc < pContext->MAX_DRAWS_IN_FLIGHT; ++dc) { pContext->dcRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator); new (&pContext->pMacroTileManagerArray[dc]) MacroTileMgr(*pContext->dcRing[dc].pArena); @@ -173,7 +179,7 @@ template void QueueWork(SWR_CONTEXT *pContext) { DRAW_CONTEXT* pDC = pContext->pCurDrawContext; - uint32_t dcIndex = pDC->drawId % KNOB_MAX_DRAWS_IN_FLIGHT; + uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT; if (IsDraw) { @@ -257,7 +263,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) } uint64_t curDraw = pContext->dcRing.GetHead(); - uint32_t dcIndex = curDraw % KNOB_MAX_DRAWS_IN_FLIGHT; + uint32_t dcIndex = curDraw % pContext->MAX_DRAWS_IN_FLIGHT; if ((pContext->frameCount - pContext->lastFrameChecked) > 2 || (curDraw - pContext->lastDrawChecked) > 0x10000) @@ -273,7 +279,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) pContext->pCurDrawContext = pCurDrawContext; // Assign next available entry in DS ring to this DC. - uint32_t dsIndex = pContext->curStateId % KNOB_MAX_DRAWS_IN_FLIGHT; + uint32_t dsIndex = pContext->curStateId % pContext->MAX_DRAWS_IN_FLIGHT; pCurDrawContext->pState = &pContext->dsRing[dsIndex]; // Copy previous state to current state. @@ -361,7 +367,7 @@ void SwrDestroyContext(HANDLE hContext) DestroyThreadPool(pContext, &pContext->threadPool); // free the fifos - for (uint32_t i = 0; i < KNOB_MAX_DRAWS_IN_FLIGHT; ++i) + for (uint32_t i = 0; i < pContext->MAX_DRAWS_IN_FLIGHT; ++i) { AlignedFree(pContext->dcRing[i].dynState.pStats); delete pContext->dcRing[i].pArena; @@ -1481,7 +1487,7 @@ void SwrDispatch( pTaskData->threadGroupCountZ = threadGroupCountZ; uint32_t totalThreadGroups = threadGroupCountX * threadGroupCountY * threadGroupCountZ; - uint32_t dcIndex = pDC->drawId % KNOB_MAX_DRAWS_IN_FLIGHT; + uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT; pDC->pDispatch = &pContext->pDispatchQueueArray[dcIndex]; pDC->pDispatch->initialize(totalThreadGroups, pTaskData, &ProcessComputeBE); diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index 577cfb157a5..60f56c6d76f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -215,6 +215,10 @@ struct SWR_CREATECONTEXT_INFO // Input (optional): Threading info that overrides any set KNOB values. SWR_THREADING_INFO* pThreadInfo; + + // Input: if set to non-zero value, overrides KNOB value for maximum + // number of draws in flight + uint32_t MAX_DRAWS_IN_FLIGHT; }; ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index bcd5801a3b6..ae942f182d7 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -481,6 +481,8 @@ struct SWR_CONTEXT THREAD_POOL threadPool; // Thread pool associated with this context SWR_THREADING_INFO threadInfo; + uint32_t MAX_DRAWS_IN_FLIGHT; + std::condition_variable FifosNotEmpty; std::mutex WaitLock; diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index 9ece0649b9c..f4ddc212264 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -318,7 +318,7 @@ uint32_t GetEnqueuedDraw(SWR_CONTEXT *pContext) INLINE DRAW_CONTEXT *GetDC(SWR_CONTEXT *pContext, uint32_t drawId) { - return &pContext->dcRing[(drawId-1) % KNOB_MAX_DRAWS_IN_FLIGHT]; + return &pContext->dcRing[(drawId-1) % pContext->MAX_DRAWS_IN_FLIGHT]; } INLINE @@ -421,7 +421,7 @@ INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t workerId, ui drawEnqueued = GetEnqueuedDraw(pContext); while (IDComparesLess(curDrawBE, drawEnqueued)) { - DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT]; + DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT]; // If its not compute and FE is not done then break out of loop. if (!pDC->doneFE && !pDC->isCompute) break; @@ -478,7 +478,7 @@ bool WorkOnFifoBE( return false; } - uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1; + uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1; // Reset our history for locked tiles. We'll have to re-learn which tiles are locked. lockedTiles.clear(); @@ -490,7 +490,7 @@ bool WorkOnFifoBE( // maintain order. The locked tiles provides the history to ensures this. for (uint32_t i = curDrawBE; IDComparesLess(i, drawEnqueued); ++i) { - DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT]; + DRAW_CONTEXT *pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT]; if (pDC->isCompute) return false; // We don't look at compute work. @@ -636,7 +636,7 @@ void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE) uint32_t drawEnqueued = GetEnqueuedDraw(pContext); while (IDComparesLess(curDrawFE, drawEnqueued)) { - uint32_t dcSlot = curDrawFE % KNOB_MAX_DRAWS_IN_FLIGHT; + uint32_t dcSlot = curDrawFE % pContext->MAX_DRAWS_IN_FLIGHT; DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot]; if (pDC->isCompute || pDC->doneFE) { @@ -653,7 +653,7 @@ void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE) uint32_t curDraw = curDrawFE; while (IDComparesLess(curDraw, drawEnqueued)) { - uint32_t dcSlot = curDraw % KNOB_MAX_DRAWS_IN_FLIGHT; + uint32_t dcSlot = curDraw % pContext->MAX_DRAWS_IN_FLIGHT; DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot]; if (!pDC->isCompute && !pDC->FeLock) @@ -694,11 +694,11 @@ void WorkOnCompute( return; } - uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1; + uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1; for (uint64_t i = curDrawBE; IDComparesLess(i, drawEnqueued); ++i) { - DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT]; + DRAW_CONTEXT *pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT]; if (pDC->isCompute == false) return; // check dependencies @@ -972,7 +972,7 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool) } // Initialize DRAW_CONTEXT's per-thread stats - for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc) + for (uint32_t dc = 0; dc < pContext->MAX_DRAWS_IN_FLIGHT; ++dc) { pContext->dcRing[dc].dynState.pStats = (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * numThreads, 64); memset(pContext->dcRing[dc].dynState.pStats, 0, sizeof(SWR_STATS) * numThreads);