swr: [rasterizer core] Support sparse numa id values on all OSes
authorTim Rowley <timothy.o.rowley@intel.com>
Mon, 6 Feb 2017 23:25:57 +0000 (15:25 -0800)
committerTim Rowley <timothy.o.rowley@intel.com>
Mon, 20 Mar 2017 23:04:53 +0000 (18:04 -0500)
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/core/threads.cpp

index f1c3030b851f0d98b54148610427a22e2e3fa858..e11291bb83e9c9f1a090f9b97607fd7776f01cad 100644 (file)
@@ -56,6 +56,7 @@ struct Core
 
 struct NumaNode
 {
+    uint32_t          numaId;
     std::vector<Core> cores;
 };
 
@@ -134,8 +135,12 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
                 SWR_ASSERT(ret);
 
                 // Store data
-                if (out_nodes.size() <= numaId) out_nodes.resize(numaId + 1);
+                if (out_nodes.size() <= numaId)
+                {
+                    out_nodes.resize(numaId + 1);
+                }
                 auto& numaNode = out_nodes[numaId];
+                numaNode.numaId = numaId;
 
                 uint32_t coreId = 0;
 
@@ -175,11 +180,18 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
             if (threadId != uint32_t(-1))
             {
                 // Save information.
-                if (out_nodes.size() <= numaId) out_nodes.resize(numaId + 1);
+                if (out_nodes.size() <= numaId)
+                {
+                    out_nodes.resize(numaId + 1);
+                }
+
                 auto& numaNode = out_nodes[numaId];
-                if (numaNode.cores.size() <= coreId) numaNode.cores.resize(coreId + 1);
-                auto& core = numaNode.cores[coreId];
+                if (numaNode.cores.size() <= coreId)
+                {
+                    numaNode.cores.resize(coreId + 1);
+                }
 
+                auto& core = numaNode.cores[coreId];
                 core.procGroup = coreId;
                 core.threadIds.push_back(threadId);
 
@@ -207,9 +219,16 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
     if (threadId != uint32_t(-1))
     {
         // Save information.
-        if (out_nodes.size() <= numaId) out_nodes.resize(numaId + 1);
+        if (out_nodes.size() <= numaId)
+        {
+            out_nodes.resize(numaId + 1);
+        }
         auto& numaNode = out_nodes[numaId];
-        if (numaNode.cores.size() <= coreId) numaNode.cores.resize(coreId + 1);
+        numaNode.numaId = numaId;
+        if (numaNode.cores.size() <= coreId)
+        {
+            numaNode.cores.resize(coreId + 1);
+        }
         auto& core = numaNode.cores[coreId];
 
         core.procGroup = coreId;
@@ -217,31 +236,38 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
         out_numThreadsPerProcGroup++;
     }
 
-    /* Prune empty numa nodes */
-    for (auto it = out_nodes.begin(); it != out_nodes.end(); ) {
-       if ((*it).cores.size() == 0)
-          it = out_nodes.erase(it);
-       else
-          ++it;
-    }
-
-    /* Prune empty core nodes */
-    for (uint32_t node = 0; node < out_nodes.size(); node++) {
-        auto& numaNode = out_nodes[node];
-        auto it = numaNode.cores.begin();
-        for ( ; it != numaNode.cores.end(); ) {
-            if (it->threadIds.size() == 0)
-                numaNode.cores.erase(it);
-            else
-                ++it;
-        }
-    }
-
 #else
 
 #error Unsupported platform
 
 #endif
+
+    // Prune empty cores and numa nodes
+    for (auto node_it = out_nodes.begin(); node_it != out_nodes.end(); )
+    {
+        // Erase empty cores (first)
+        for (auto core_it = node_it->cores.begin(); core_it != node_it->cores.end(); )
+        {
+            if (core_it->threadIds.size() == 0)
+            {
+                core_it = node_it->cores.erase(core_it);
+            }
+            else
+            {
+                ++core_it;
+            }
+        }
+
+        // Erase empty numa nodes (second)
+        if (node_it->cores.size() == 0)
+        {
+            node_it = out_nodes.erase(node_it);
+        }
+        else
+        {
+            ++node_it;
+        }
+    }
 }
 
 
@@ -1022,7 +1048,7 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
                     pPool->pThreadData[workerId].workerId = workerId;
                     pPool->pThreadData[workerId].procGroupId = core.procGroup;
                     pPool->pThreadData[workerId].threadId = core.threadIds[t];
-                    pPool->pThreadData[workerId].numaId = n;
+                    pPool->pThreadData[workerId].numaId = node.numaId;
                     pPool->pThreadData[workerId].coreId = c;
                     pPool->pThreadData[workerId].htId = t;
                     pPool->pThreadData[workerId].pContext = pContext;