swr: [rasterizer core] remove KNOB_MAX_THREADS
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / threads.cpp
index 9665f09e2c852feac23daffda0e0d3d3353c44e6..ed03d70a1f931b18812575a66a13629b101dca3c 100644 (file)
@@ -73,14 +73,19 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
     static std::mutex m;
     std::lock_guard<std::mutex> l(m);
 
-    static SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer[KNOB_MAX_NUM_THREADS];
-    DWORD bufSize = sizeof(buffer);
+    DWORD bufSize = 0;
 
-    BOOL ret = GetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufSize);
+    BOOL ret = GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &bufSize);
+    SWR_ASSERT(ret == FALSE && GetLastError() == ERROR_INSUFFICIENT_BUFFER);
+
+    PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBufferMem = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufSize);
+    SWR_ASSERT(pBufferMem);
+
+    ret = GetLogicalProcessorInformationEx(RelationProcessorCore, pBufferMem, &bufSize);
     SWR_ASSERT(ret != FALSE, "Failed to get Processor Topology Information");
 
-    uint32_t count = bufSize / buffer->Size;
-    PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBuffer = buffer;
+    uint32_t count = bufSize / pBufferMem->Size;
+    PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBuffer = pBufferMem;
 
     for (uint32_t i = 0; i < count; ++i)
     {
@@ -150,6 +155,8 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
         pBuffer = PtrAdd(pBuffer, pBuffer->Size);
     }
 
+    free(pBufferMem);
+
 
 #elif defined(__linux__) || defined (__gnu_linux__)
 
@@ -321,10 +328,10 @@ INLINE void UpdateClientStats(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
     // Sum up stats across all workers before sending to client.
     for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
     {
-        stats.DepthPassCount += dynState.stats[i].DepthPassCount;
+        stats.DepthPassCount += dynState.pStats[i].DepthPassCount;
 
-        stats.PsInvocations  += dynState.stats[i].PsInvocations;
-        stats.CsInvocations  += dynState.stats[i].CsInvocations;
+        stats.PsInvocations  += dynState.pStats[i].PsInvocations;
+        stats.CsInvocations  += dynState.pStats[i].CsInvocations;
     }
 
     pContext->pfnUpdateStats(GetPrivateState(pDC), &stats);
@@ -849,13 +856,6 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
         numThreads = std::min(pContext->threadInfo.MAX_WORKER_THREADS, maxHWThreads);
     }
 
-    if (numThreads > KNOB_MAX_NUM_THREADS)
-    {
-        printf("WARNING: system thread count %u exceeds max %u, "
-            "performance will be degraded\n",
-            numThreads, KNOB_MAX_NUM_THREADS);
-    }
-
     uint32_t numAPIReservedThreads = 1;
 
 
@@ -878,8 +878,8 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
         else
         {
             pPool->numThreads = 0;
-            SET_KNOB(SINGLE_THREADED, true);
-            return;
+            numThreads = 1;
+            pContext->threadInfo.SINGLE_THREADED = true;
         }
     }
     else
@@ -895,6 +895,19 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
         }
     }
 
+    // Initialize DRAW_CONTEXT's per-thread stats
+    for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc)
+    {
+        pContext->dcRing[dc].dynState.pStats = new SWR_STATS[numThreads];
+        memset(pContext->dcRing[dc].dynState.pStats, 0, sizeof(SWR_STATS) * numThreads);
+    }
+
+    if (pContext->threadInfo.SINGLE_THREADED)
+    {
+        return;
+    }
+
+
     pPool->numThreads = numThreads;
     pContext->NumWorkerThreads = pPool->numThreads;
 
@@ -902,6 +915,8 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
     pPool->pThreadData = (THREAD_DATA *)malloc(pPool->numThreads * sizeof(THREAD_DATA));
     pPool->numaMask = 0;
 
+    pPool->pThreads = new THREAD_PTR[pPool->numThreads];
+
     if (pContext->threadInfo.MAX_WORKER_THREADS)
     {
         bool bForceBindProcGroup = (numThreads > numThreadsPerProcGroup);
@@ -918,7 +933,7 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
             pPool->pThreadData[workerId].htId = 0;
             pPool->pThreadData[workerId].pContext = pContext;
             pPool->pThreadData[workerId].forceBindProcGroup = bForceBindProcGroup;
-            pPool->threads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
+            pPool->pThreads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
 
             pContext->NumBEThreads++;
             pContext->NumFEThreads++;
@@ -964,7 +979,7 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
                     pPool->pThreadData[workerId].htId = t;
                     pPool->pThreadData[workerId].pContext = pContext;
 
-                    pPool->threads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
+                    pPool->pThreads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
                     pContext->NumBEThreads++;
                     pContext->NumFEThreads++;
 
@@ -989,10 +1004,12 @@ void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
         // Wait for threads to finish and destroy them
         for (uint32_t t = 0; t < pPool->numThreads; ++t)
         {
-            pPool->threads[t]->join();
-            delete(pPool->threads[t]);
+            pPool->pThreads[t]->join();
+            delete(pPool->pThreads[t]);
         }
 
+        delete [] pPool->pThreads;
+
         // Clean up data used by threads
         free(pPool->pThreadData);
     }