std::ifstream input("/proc/cpuinfo");
std::string line;
char* c;
- uint32_t threadId = uint32_t(-1);
+ uint32_t procId = uint32_t(-1);
uint32_t coreId = uint32_t(-1);
- uint32_t numaId = uint32_t(-1);
+ uint32_t physId = uint32_t(-1);
while (std::getline(input, line))
{
if (line.find("processor") != std::string::npos)
{
- if (threadId != uint32_t(-1))
- {
- // Save information.
- if (out_nodes.size() <= numaId)
- {
- out_nodes.resize(numaId + 1);
- }
-
- auto& numaNode = out_nodes[numaId];
- if (numaNode.cores.size() <= coreId)
- {
- numaNode.cores.resize(coreId + 1);
- }
-
- auto& core = numaNode.cores[coreId];
- core.procGroup = coreId;
- core.threadIds.push_back(threadId);
-
- out_numThreadsPerProcGroup++;
- }
-
auto data_start = line.find(": ") + 2;
- threadId = std::strtoul(&line.c_str()[data_start], &c, 10);
+ procId = std::strtoul(&line.c_str()[data_start], &c, 10);
continue;
}
if (line.find("core id") != std::string::npos)
if (line.find("physical id") != std::string::npos)
{
auto data_start = line.find(": ") + 2;
- numaId = std::strtoul(&line.c_str()[data_start], &c, 10);
+ physId = std::strtoul(&line.c_str()[data_start], &c, 10);
continue;
}
+ if (line.length() == 0)
+ {
+ if (physId + 1 > out_nodes.size())
+ out_nodes.resize(physId + 1);
+ auto& numaNode = out_nodes[physId];
+ numaNode.numaId = physId;
+
+ if (coreId + 1 > numaNode.cores.size())
+ numaNode.cores.resize(coreId + 1);
+ auto& core = numaNode.cores[coreId];
+ core.procGroup = coreId;
+ core.threadIds.push_back(procId);
+ }
}
- if (threadId != uint32_t(-1))
+ out_numThreadsPerProcGroup = 0;
+ for (auto &node : out_nodes)
{
- // Save information.
- if (out_nodes.size() <= numaId)
+ for (auto &core : node.cores)
{
- out_nodes.resize(numaId + 1);
+ out_numThreadsPerProcGroup = std::max((size_t)out_numThreadsPerProcGroup,
+ core.threadIds.size());
}
- auto& numaNode = out_nodes[numaId];
- numaNode.numaId = numaId;
- if (numaNode.cores.size() <= coreId)
- {
- numaNode.cores.resize(coreId + 1);
- }
- auto& core = numaNode.cores[coreId];
-
- core.procGroup = coreId;
- core.threadIds.push_back(threadId);
- out_numThreadsPerProcGroup++;
}
#else
CPU_ZERO(&cpuset);
CPU_SET(threadId, &cpuset);
- pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
+ int err = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
+ if (err != 0)
+ {
+ fprintf(stderr, "pthread_setaffinity_np failure for tid %u: %s\n", threadId, strerror(err));
+ }
#endif
}
}
else
{
- pPool->numaMask = numNodes - 1; // Only works for 2**n numa nodes (1, 2, 4, etc.)
+ // numa distribution assumes workers on all nodes
+ bool useNuma = true;
+ if (numCoresPerNode * numHyperThreads == 1)
+ useNuma = false;
+
+ if (useNuma) {
+ pPool->numaMask = numNodes - 1; // Only works for 2**n numa nodes (1, 2, 4, etc.)
+ } else {
+ pPool->numaMask = 0;
+ }
uint32_t workerId = 0;
for (uint32_t n = 0; n < numNodes; ++n)
pPool->pThreadData[workerId].workerId = workerId;
pPool->pThreadData[workerId].procGroupId = core.procGroup;
pPool->pThreadData[workerId].threadId = core.threadIds[t];
- pPool->pThreadData[workerId].numaId = node.numaId;
+ pPool->pThreadData[workerId].numaId = useNuma ? n : 0;
pPool->pThreadData[workerId].coreId = c;
pPool->pThreadData[workerId].htId = t;
pPool->pThreadData[workerId].pContext = pContext;