return result;
}
-INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint64_t& curDrawBE)
+INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint64_t& curDrawBE, uint64_t& drawEnqueued)
{
// increment our current draw id to the first incomplete draw
- uint64_t drawEnqueued = GetEnqueuedDraw(pContext);
+ drawEnqueued = GetEnqueuedDraw(pContext);
while (curDrawBE < drawEnqueued)
{
DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT];
// If its not compute and FE is not done then break out of loop.
if (!pDC->doneFE && !pDC->isCompute) break;
- bool isWorkComplete = (pDC->isCompute) ?
- pDC->pDispatch->isWorkComplete() : pDC->pTileMgr->isWorkComplete();
+ bool isWorkComplete = pDC->isCompute ?
+ pDC->pDispatch->isWorkComplete() :
+ pDC->pTileMgr->isWorkComplete();
if (isWorkComplete)
{
{
// Find the first incomplete draw that has pending work. If no such draw is found then
// return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE.
- if (FindFirstIncompleteDraw(pContext, curDrawBE) == false)
+ uint64_t drawEnqueued = 0;
+ if (FindFirstIncompleteDraw(pContext, curDrawBE, drawEnqueued) == false)
{
return;
}
// 2. If we're trying to work on draws after curDrawBE, we are restricted to
// working on those macrotiles that are known to be complete in the prior draw to
// maintain order. The locked tiles provides the history to ensures this.
- for (uint64_t i = curDrawBE; i < GetEnqueuedDraw(pContext); ++i)
+ for (uint64_t i = curDrawBE; i < drawEnqueued; ++i)
{
DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT];
}
}
-void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE, uint32_t numaNode)
+void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE)
{
// Try to grab the next DC from the ring
uint64_t drawEnqueued = GetEnqueuedDraw(pContext);
uint32_t workerId,
uint64_t& curDrawBE)
{
- if (FindFirstIncompleteDraw(pContext, curDrawBE) == false)
+ uint64_t drawEnqueued = 0;
+ if (FindFirstIncompleteDraw(pContext, curDrawBE, drawEnqueued) == false)
{
return;
}
uint64_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1;
- DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT];
- if (pDC->isCompute == false) return;
-
- // check dependencies
- if (CheckDependency(pContext, pDC, lastRetiredDraw))
+ for (uint64_t i = curDrawBE; curDrawBE < drawEnqueued; ++i)
{
- return;
- }
+ DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT];
+ if (pDC->isCompute == false) return;
+
+ // check dependencies
+ if (CheckDependency(pContext, pDC, lastRetiredDraw))
+ {
+ return;
+ }
- SWR_ASSERT(pDC->pDispatch != nullptr);
- DispatchQueue& queue = *pDC->pDispatch;
+ SWR_ASSERT(pDC->pDispatch != nullptr);
+ DispatchQueue& queue = *pDC->pDispatch;
- // Is there any work remaining?
- if (queue.getNumQueued() > 0)
- {
- uint32_t threadGroupId = 0;
- while (queue.getWork(threadGroupId))
+ // Is there any work remaining?
+ if (queue.getNumQueued() > 0)
{
- ProcessComputeBE(pDC, workerId, threadGroupId);
+ uint32_t threadGroupId = 0;
+ while (queue.getWork(threadGroupId))
+ {
+ ProcessComputeBE(pDC, workerId, threadGroupId);
- queue.finishedWork();
+ queue.finishedWork();
+ }
}
}
}
+template<bool IsFEThread, bool IsBEThread>
DWORD workerThreadMain(LPVOID pData)
{
THREAD_DATA *pThreadData = (THREAD_DATA*)pData;
}
}
- RDTSC_START(WorkerWorkOnFifoBE);
- WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
- RDTSC_STOP(WorkerWorkOnFifoBE, 0, 0);
+ if (IsBEThread)
+ {
+ RDTSC_START(WorkerWorkOnFifoBE);
+ WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
+ RDTSC_STOP(WorkerWorkOnFifoBE, 0, 0);
- WorkOnCompute(pContext, workerId, curDrawBE);
+ WorkOnCompute(pContext, workerId, curDrawBE);
+ }
+
+ if (IsFEThread)
+ {
+ WorkOnFifoFE(pContext, workerId, curDrawFE);
- WorkOnFifoFE(pContext, workerId, curDrawFE, numaNode);
+ if (!IsBEThread)
+ {
+ curDrawBE = curDrawFE;
+ }
+ }
}
return 0;
}
+template<> DWORD workerThreadMain<false, false>(LPVOID) = delete;
+template <bool IsFEThread, bool IsBEThread>
DWORD workerThreadInit(LPVOID pData)
{
#if defined(_WIN32)
__try
#endif // _WIN32
{
- return workerThreadMain(pData);
+ return workerThreadMain<IsFEThread, IsBEThread>(pData);
}
#if defined(_WIN32)
return 1;
}
+template<> DWORD workerThreadInit<false, false>(LPVOID pData) = delete;
void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
{
uint32_t numCoresPerNode = numHWCoresPerNode;
uint32_t numHyperThreads = numHWHyperThreads;
+ if (KNOB_MAX_WORKER_THREADS)
+ {
+ SET_KNOB(HYPERTHREADED_FE, false);
+ }
+
+ if (KNOB_HYPERTHREADED_FE)
+ {
+ SET_KNOB(MAX_THREADS_PER_CORE, 0);
+ }
+
if (KNOB_MAX_NUMA_NODES)
{
numNodes = std::min(numNodes, KNOB_MAX_NUMA_NODES);
numHyperThreads = std::min(numHyperThreads, KNOB_MAX_THREADS_PER_CORE);
}
+ if (numHyperThreads < 2)
+ {
+ SET_KNOB(HYPERTHREADED_FE, false);
+ }
+
// Calculate numThreads
uint32_t numThreads = numNodes * numCoresPerNode * numHyperThreads;
pPool->pThreadData[workerId].procGroupId = workerId % numProcGroups;
pPool->pThreadData[workerId].threadId = 0;
pPool->pThreadData[workerId].numaId = 0;
+ pPool->pThreadData[workerId].coreId = 0;
+ pPool->pThreadData[workerId].htId = 0;
pPool->pThreadData[workerId].pContext = pContext;
pPool->pThreadData[workerId].forceBindProcGroup = bForceBindProcGroup;
- pPool->threads[workerId] = new std::thread(workerThreadInit, &pPool->pThreadData[workerId]);
+ pPool->threads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
+
+ pContext->NumBEThreads++;
+ pContext->NumFEThreads++;
}
}
else
pPool->pThreadData[workerId].procGroupId = core.procGroup;
pPool->pThreadData[workerId].threadId = core.threadIds[t];
pPool->pThreadData[workerId].numaId = n;
+ pPool->pThreadData[workerId].coreId = c;
+ pPool->pThreadData[workerId].htId = t;
pPool->pThreadData[workerId].pContext = pContext;
- pPool->threads[workerId] = new std::thread(workerThreadInit, &pPool->pThreadData[workerId]);
+
+ if (KNOB_HYPERTHREADED_FE)
+ {
+ if (t == 0)
+ {
+ pContext->NumBEThreads++;
+ pPool->threads[workerId] = new std::thread(workerThreadInit<false, true>, &pPool->pThreadData[workerId]);
+ }
+ else
+ {
+ pContext->NumFEThreads++;
+ pPool->threads[workerId] = new std::thread(workerThreadInit<true, false>, &pPool->pThreadData[workerId]);
+ }
+ }
+ else
+ {
+ pPool->threads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
+ pContext->NumBEThreads++;
+ pContext->NumFEThreads++;
+ }
++workerId;
}