From ead0dfe31ec7a1b1928e4abbfa99d59e0e5e929a Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Wed, 6 Sep 2017 14:59:33 -0500 Subject: [PATCH] swr/rast: adjust linux cpu topology identification code Make more robust to handle strange strange configurations like a vmware exported 4-way numa X 1-core configuration. Reviewed-by: Bruce Cherniak --- .../drivers/swr/rasterizer/core/threads.cpp | 81 +++++++++---------- 1 file changed, 38 insertions(+), 43 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index b704d23f547..4bb395dec34 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -169,37 +169,16 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread std::ifstream input("/proc/cpuinfo"); std::string line; char* c; - uint32_t threadId = uint32_t(-1); + uint32_t procId = uint32_t(-1); uint32_t coreId = uint32_t(-1); - uint32_t numaId = uint32_t(-1); + uint32_t physId = uint32_t(-1); while (std::getline(input, line)) { if (line.find("processor") != std::string::npos) { - if (threadId != uint32_t(-1)) - { - // Save information. - if (out_nodes.size() <= numaId) - { - out_nodes.resize(numaId + 1); - } - - auto& numaNode = out_nodes[numaId]; - if (numaNode.cores.size() <= coreId) - { - numaNode.cores.resize(coreId + 1); - } - - auto& core = numaNode.cores[coreId]; - core.procGroup = coreId; - core.threadIds.push_back(threadId); - - out_numThreadsPerProcGroup++; - } - auto data_start = line.find(": ") + 2; - threadId = std::strtoul(&line.c_str()[data_start], &c, 10); + procId = std::strtoul(&line.c_str()[data_start], &c, 10); continue; } if (line.find("core id") != std::string::npos) @@ -211,29 +190,32 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread if (line.find("physical id") != std::string::npos) { auto data_start = line.find(": ") + 2; - numaId = std::strtoul(&line.c_str()[data_start], &c, 10); + physId = std::strtoul(&line.c_str()[data_start], &c, 10); continue; } + if (line.length() == 0) + { + if (physId + 1 > out_nodes.size()) + out_nodes.resize(physId + 1); + auto& numaNode = out_nodes[physId]; + numaNode.numaId = physId; + + if (coreId + 1 > numaNode.cores.size()) + numaNode.cores.resize(coreId + 1); + auto& core = numaNode.cores[coreId]; + core.procGroup = coreId; + core.threadIds.push_back(procId); + } } - if (threadId != uint32_t(-1)) + out_numThreadsPerProcGroup = 0; + for (auto &node : out_nodes) { - // Save information. - if (out_nodes.size() <= numaId) + for (auto &core : node.cores) { - out_nodes.resize(numaId + 1); + out_numThreadsPerProcGroup = std::max((size_t)out_numThreadsPerProcGroup, + core.threadIds.size()); } - auto& numaNode = out_nodes[numaId]; - numaNode.numaId = numaId; - if (numaNode.cores.size() <= coreId) - { - numaNode.cores.resize(coreId + 1); - } - auto& core = numaNode.cores[coreId]; - - core.procGroup = coreId; - core.threadIds.push_back(threadId); - out_numThreadsPerProcGroup++; } #else @@ -316,7 +298,11 @@ void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId = CPU_ZERO(&cpuset); CPU_SET(threadId, &cpuset); - pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); + int err = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); + if (err != 0) + { + fprintf(stderr, "pthread_setaffinity_np failure for tid %u: %s\n", threadId, strerror(err)); + } #endif } @@ -1031,7 +1017,16 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool) } else { - pPool->numaMask = numNodes - 1; // Only works for 2**n numa nodes (1, 2, 4, etc.) + // numa distribution assumes workers on all nodes + bool useNuma = true; + if (numCoresPerNode * numHyperThreads == 1) + useNuma = false; + + if (useNuma) { + pPool->numaMask = numNodes - 1; // Only works for 2**n numa nodes (1, 2, 4, etc.) + } else { + pPool->numaMask = 0; + } uint32_t workerId = 0; for (uint32_t n = 0; n < numNodes; ++n) @@ -1064,7 +1059,7 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool) pPool->pThreadData[workerId].workerId = workerId; pPool->pThreadData[workerId].procGroupId = core.procGroup; pPool->pThreadData[workerId].threadId = core.threadIds[t]; - pPool->pThreadData[workerId].numaId = node.numaId; + pPool->pThreadData[workerId].numaId = useNuma ? n : 0; pPool->pThreadData[workerId].coreId = c; pPool->pThreadData[workerId].htId = t; pPool->pThreadData[workerId].pContext = pContext; -- 2.30.2