#ifndef U_THREAD_H_
#define U_THREAD_H_
+#include <stdint.h>
+#include <stdbool.h>
+
#include "c11/threads.h"
+#include "detect_os.h"
#ifdef HAVE_PTHREAD
#include <signal.h>
+#ifdef PTHREAD_SETAFFINITY_IN_NP_HEADER
+#include <pthread_np.h>
+#endif
+#endif
+
+#ifdef __HAIKU__
+#include <OS.h>
#endif
+#ifdef __FreeBSD__
+/* pthread_np.h -> sys/param.h -> machine/param.h
+ * - defines ALIGN which clashes with our ALIGN
+ */
+#undef ALIGN
+#define cpu_set_t cpuset_t
+#endif
static inline thrd_t u_thread_create(int (*routine)(void *), void *param)
{
int ret;
sigfillset(&new_set);
- pthread_sigmask(SIG_SETMASK, &new_set, &saved_set);
+ sigdelset(&new_set, SIGSYS);
+ pthread_sigmask(SIG_BLOCK, &new_set, &saved_set);
ret = thrd_create( &thread, routine, param );
pthread_sigmask(SIG_SETMASK, &saved_set, NULL);
#else
static inline void u_thread_setname( const char *name )
{
#if defined(HAVE_PTHREAD)
-# if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
- (__GLIBC__ >= 3 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 12))
+#if DETECT_OS_LINUX || DETECT_OS_CYGWIN || DETECT_OS_SOLARIS
pthread_setname_np(pthread_self(), name);
-# endif
+#elif DETECT_OS_FREEBSD || DETECT_OS_OPENBSD
+ pthread_set_name_np(pthread_self(), name);
+#elif DETECT_OS_NETBSD
+ pthread_setname_np(pthread_self(), "%s", (void *)name);
+#elif DETECT_OS_APPLE
+ pthread_setname_np(name);
+#elif DETECT_OS_HAIKU
+ rename_thread(find_thread(NULL), name);
+#else
+#warning Not sure how to call pthread_setname_np
+#endif
#endif
(void)name;
}
+/**
+ * An AMD Zen CPU consists of multiple modules where each module has its own L3
+ * cache. Inter-thread communication such as locks and atomics between modules
+ * is very expensive. It's desirable to pin a group of closely cooperating
+ * threads to one group of cores sharing L3.
+ *
+ * \param thread thread
+ * \param L3_index index of the L3 cache
+ * \param cores_per_L3 number of CPU cores shared by one L3
+ */
+static inline void
+util_pin_thread_to_L3(thrd_t thread, unsigned L3_index, unsigned cores_per_L3)
+{
+#if defined(HAVE_PTHREAD_SETAFFINITY)
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ for (unsigned i = 0; i < cores_per_L3; i++)
+ CPU_SET(L3_index * cores_per_L3 + i, &cpuset);
+ pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
+#endif
+}
+
+/**
+ * Return the index of L3 that the thread is pinned to. If the thread is
+ * pinned to multiple L3 caches, return -1.
+ *
+ * \param thread thread
+ * \param cores_per_L3 number of CPU cores shared by one L3
+ */
+static inline int
+util_get_L3_for_pinned_thread(thrd_t thread, unsigned cores_per_L3)
+{
+#if defined(HAVE_PTHREAD_SETAFFINITY)
+ cpu_set_t cpuset;
+
+ if (pthread_getaffinity_np(thread, sizeof(cpuset), &cpuset) == 0) {
+ int L3_index = -1;
+
+ for (unsigned i = 0; i < CPU_SETSIZE; i++) {
+ if (CPU_ISSET(i, &cpuset)) {
+ int x = i / cores_per_L3;
+
+ if (L3_index != x) {
+ if (L3_index == -1)
+ L3_index = x;
+ else
+ return -1; /* multiple L3s are set */
+ }
+ }
+ }
+ return L3_index;
+ }
+#endif
+ return -1;
+}
+
/*
* Thread statistics.
*/
static inline int64_t
u_thread_get_time_nano(thrd_t thread)
{
-#if defined(__linux__) && defined(HAVE_PTHREAD)
+#if defined(HAVE_PTHREAD) && !defined(__APPLE__) && !defined(__HAIKU__)
struct timespec ts;
clockid_t cid;
#endif
}
+static inline bool u_thread_is_self(thrd_t thread)
+{
+#if defined(HAVE_PTHREAD)
+ return pthread_equal(pthread_self(), thread);
+#endif
+ return false;
+}
+
+/*
+ * util_barrier
+ */
+
+#if defined(HAVE_PTHREAD) && !defined(__APPLE__)
+
+typedef pthread_barrier_t util_barrier;
+
+static inline void util_barrier_init(util_barrier *barrier, unsigned count)
+{
+ pthread_barrier_init(barrier, NULL, count);
+}
+
+static inline void util_barrier_destroy(util_barrier *barrier)
+{
+ pthread_barrier_destroy(barrier);
+}
+
+static inline void util_barrier_wait(util_barrier *barrier)
+{
+ pthread_barrier_wait(barrier);
+}
+
+
+#else /* If the OS doesn't have its own, implement barriers using a mutex and a condvar */
+
+typedef struct {
+ unsigned count;
+ unsigned waiters;
+ uint64_t sequence;
+ mtx_t mutex;
+ cnd_t condvar;
+} util_barrier;
+
+static inline void util_barrier_init(util_barrier *barrier, unsigned count)
+{
+ barrier->count = count;
+ barrier->waiters = 0;
+ barrier->sequence = 0;
+ (void) mtx_init(&barrier->mutex, mtx_plain);
+ cnd_init(&barrier->condvar);
+}
+
+static inline void util_barrier_destroy(util_barrier *barrier)
+{
+ assert(barrier->waiters == 0);
+ mtx_destroy(&barrier->mutex);
+ cnd_destroy(&barrier->condvar);
+}
+
+static inline void util_barrier_wait(util_barrier *barrier)
+{
+ mtx_lock(&barrier->mutex);
+
+ assert(barrier->waiters < barrier->count);
+ barrier->waiters++;
+
+ if (barrier->waiters < barrier->count) {
+ uint64_t sequence = barrier->sequence;
+
+ do {
+ cnd_wait(&barrier->condvar, &barrier->mutex);
+ } while (sequence == barrier->sequence);
+ } else {
+ barrier->waiters = 0;
+ barrier->sequence++;
+ cnd_broadcast(&barrier->condvar);
+ }
+
+ mtx_unlock(&barrier->mutex);
+}
+
+#endif
+
#endif /* U_THREAD_H_ */