static std::mutex m;
std::lock_guard<std::mutex> l(m);
- static SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer[KNOB_MAX_NUM_THREADS];
- DWORD bufSize = sizeof(buffer);
+ DWORD bufSize = 0;
- BOOL ret = GetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufSize);
+ BOOL ret = GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &bufSize);
+ SWR_ASSERT(ret == FALSE && GetLastError() == ERROR_INSUFFICIENT_BUFFER);
+
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBufferMem = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufSize);
+ SWR_ASSERT(pBufferMem);
+
+ ret = GetLogicalProcessorInformationEx(RelationProcessorCore, pBufferMem, &bufSize);
SWR_ASSERT(ret != FALSE, "Failed to get Processor Topology Information");
- uint32_t count = bufSize / buffer->Size;
- PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBuffer = buffer;
+ uint32_t count = bufSize / pBufferMem->Size;
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBuffer = pBufferMem;
for (uint32_t i = 0; i < count; ++i)
{
pBuffer = PtrAdd(pBuffer, pBuffer->Size);
}
+ free(pBufferMem);
+
#elif defined(__linux__) || defined (__gnu_linux__)
// Sum up stats across all workers before sending to client.
for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
{
- stats.DepthPassCount += dynState.stats[i].DepthPassCount;
+ stats.DepthPassCount += dynState.pStats[i].DepthPassCount;
- stats.PsInvocations += dynState.stats[i].PsInvocations;
- stats.CsInvocations += dynState.stats[i].CsInvocations;
+ stats.PsInvocations += dynState.pStats[i].PsInvocations;
+ stats.CsInvocations += dynState.pStats[i].CsInvocations;
}
pContext->pfnUpdateStats(GetPrivateState(pDC), &stats);
numThreads = std::min(pContext->threadInfo.MAX_WORKER_THREADS, maxHWThreads);
}
- if (numThreads > KNOB_MAX_NUM_THREADS)
- {
- printf("WARNING: system thread count %u exceeds max %u, "
- "performance will be degraded\n",
- numThreads, KNOB_MAX_NUM_THREADS);
- }
-
uint32_t numAPIReservedThreads = 1;
else
{
pPool->numThreads = 0;
- SET_KNOB(SINGLE_THREADED, true);
- return;
+ numThreads = 1;
+ pContext->threadInfo.SINGLE_THREADED = true;
}
}
else
}
}
+ // Initialize DRAW_CONTEXT's per-thread stats
+ for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc)
+ {
+ pContext->dcRing[dc].dynState.pStats = new SWR_STATS[numThreads];
+ memset(pContext->dcRing[dc].dynState.pStats, 0, sizeof(SWR_STATS) * numThreads);
+ }
+
+ if (pContext->threadInfo.SINGLE_THREADED)
+ {
+ return;
+ }
+
+
pPool->numThreads = numThreads;
pContext->NumWorkerThreads = pPool->numThreads;
pPool->pThreadData = (THREAD_DATA *)malloc(pPool->numThreads * sizeof(THREAD_DATA));
pPool->numaMask = 0;
+ pPool->pThreads = new THREAD_PTR[pPool->numThreads];
+
if (pContext->threadInfo.MAX_WORKER_THREADS)
{
bool bForceBindProcGroup = (numThreads > numThreadsPerProcGroup);
pPool->pThreadData[workerId].htId = 0;
pPool->pThreadData[workerId].pContext = pContext;
pPool->pThreadData[workerId].forceBindProcGroup = bForceBindProcGroup;
- pPool->threads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
+ pPool->pThreads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
pContext->NumBEThreads++;
pContext->NumFEThreads++;
pPool->pThreadData[workerId].htId = t;
pPool->pThreadData[workerId].pContext = pContext;
- pPool->threads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
+ pPool->pThreads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
pContext->NumBEThreads++;
pContext->NumFEThreads++;
// Wait for threads to finish and destroy them
for (uint32_t t = 0; t < pPool->numThreads; ++t)
{
- pPool->threads[t]->join();
- delete(pPool->threads[t]);
+ pPool->pThreads[t]->join();
+ delete(pPool->pThreads[t]);
}
+ delete [] pPool->pThreads;
+
// Clean up data used by threads
free(pPool->pThreadData);
}