X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Futil%2Fu_thread.h;h=b91d05e4cfd2aefa28cdb8e8aef9d25cdebbaec4;hb=96cfc684e63238a7aeabc8893fb04fe5f3781a66;hp=6b5458af86a4085989819635ee15a2c618b852a1;hpb=6884c95ab4e06a418add29052b0a633a7fdad6ae;p=mesa.git diff --git a/src/util/u_thread.h b/src/util/u_thread.h index 6b5458af86a..b91d05e4cfd 100644 --- a/src/util/u_thread.h +++ b/src/util/u_thread.h @@ -31,11 +31,26 @@ #include #include "c11/threads.h" +#include "detect_os.h" #ifdef HAVE_PTHREAD #include +#ifdef PTHREAD_SETAFFINITY_IN_NP_HEADER +#include +#endif #endif +#ifdef __HAIKU__ +#include +#endif + +#ifdef __FreeBSD__ +/* pthread_np.h -> sys/param.h -> machine/param.h + * - defines ALIGN which clashes with our ALIGN + */ +#undef ALIGN +#define cpu_set_t cpuset_t +#endif static inline thrd_t u_thread_create(int (*routine)(void *), void *param) { @@ -45,7 +60,8 @@ static inline thrd_t u_thread_create(int (*routine)(void *), void *param) int ret; sigfillset(&new_set); - pthread_sigmask(SIG_SETMASK, &new_set, &saved_set); + sigdelset(&new_set, SIGSYS); + pthread_sigmask(SIG_BLOCK, &new_set, &saved_set); ret = thrd_create( &thread, routine, param ); pthread_sigmask(SIG_SETMASK, &saved_set, NULL); #else @@ -61,14 +77,80 @@ static inline thrd_t u_thread_create(int (*routine)(void *), void *param) static inline void u_thread_setname( const char *name ) { #if defined(HAVE_PTHREAD) -# if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \ - (__GLIBC__ >= 3 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 12)) +#if DETECT_OS_LINUX || DETECT_OS_CYGWIN || DETECT_OS_SOLARIS pthread_setname_np(pthread_self(), name); -# endif +#elif DETECT_OS_FREEBSD || DETECT_OS_OPENBSD + pthread_set_name_np(pthread_self(), name); +#elif DETECT_OS_NETBSD + pthread_setname_np(pthread_self(), "%s", (void *)name); +#elif DETECT_OS_APPLE + pthread_setname_np(name); +#elif DETECT_OS_HAIKU + rename_thread(find_thread(NULL), name); +#else +#warning Not sure how to call pthread_setname_np +#endif #endif (void)name; } +/** + * An AMD Zen CPU consists of multiple modules where each module has its own L3 + * cache. Inter-thread communication such as locks and atomics between modules + * is very expensive. It's desirable to pin a group of closely cooperating + * threads to one group of cores sharing L3. + * + * \param thread thread + * \param L3_index index of the L3 cache + * \param cores_per_L3 number of CPU cores shared by one L3 + */ +static inline void +util_pin_thread_to_L3(thrd_t thread, unsigned L3_index, unsigned cores_per_L3) +{ +#if defined(HAVE_PTHREAD_SETAFFINITY) + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + for (unsigned i = 0; i < cores_per_L3; i++) + CPU_SET(L3_index * cores_per_L3 + i, &cpuset); + pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); +#endif +} + +/** + * Return the index of L3 that the thread is pinned to. If the thread is + * pinned to multiple L3 caches, return -1. + * + * \param thread thread + * \param cores_per_L3 number of CPU cores shared by one L3 + */ +static inline int +util_get_L3_for_pinned_thread(thrd_t thread, unsigned cores_per_L3) +{ +#if defined(HAVE_PTHREAD_SETAFFINITY) + cpu_set_t cpuset; + + if (pthread_getaffinity_np(thread, sizeof(cpuset), &cpuset) == 0) { + int L3_index = -1; + + for (unsigned i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, &cpuset)) { + int x = i / cores_per_L3; + + if (L3_index != x) { + if (L3_index == -1) + L3_index = x; + else + return -1; /* multiple L3s are set */ + } + } + } + return L3_index; + } +#endif + return -1; +} + /* * Thread statistics. */ @@ -77,7 +159,7 @@ static inline void u_thread_setname( const char *name ) static inline int64_t u_thread_get_time_nano(thrd_t thread) { -#if defined(__linux__) && defined(HAVE_PTHREAD) +#if defined(HAVE_PTHREAD) && !defined(__APPLE__) && !defined(__HAIKU__) struct timespec ts; clockid_t cid; @@ -92,12 +174,83 @@ u_thread_get_time_nano(thrd_t thread) static inline bool u_thread_is_self(thrd_t thread) { #if defined(HAVE_PTHREAD) -# if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \ - (__GLIBC__ >= 3 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 12)) return pthread_equal(pthread_self(), thread); -# endif #endif return false; } +/* + * util_barrier + */ + +#if defined(HAVE_PTHREAD) && !defined(__APPLE__) + +typedef pthread_barrier_t util_barrier; + +static inline void util_barrier_init(util_barrier *barrier, unsigned count) +{ + pthread_barrier_init(barrier, NULL, count); +} + +static inline void util_barrier_destroy(util_barrier *barrier) +{ + pthread_barrier_destroy(barrier); +} + +static inline void util_barrier_wait(util_barrier *barrier) +{ + pthread_barrier_wait(barrier); +} + + +#else /* If the OS doesn't have its own, implement barriers using a mutex and a condvar */ + +typedef struct { + unsigned count; + unsigned waiters; + uint64_t sequence; + mtx_t mutex; + cnd_t condvar; +} util_barrier; + +static inline void util_barrier_init(util_barrier *barrier, unsigned count) +{ + barrier->count = count; + barrier->waiters = 0; + barrier->sequence = 0; + (void) mtx_init(&barrier->mutex, mtx_plain); + cnd_init(&barrier->condvar); +} + +static inline void util_barrier_destroy(util_barrier *barrier) +{ + assert(barrier->waiters == 0); + mtx_destroy(&barrier->mutex); + cnd_destroy(&barrier->condvar); +} + +static inline void util_barrier_wait(util_barrier *barrier) +{ + mtx_lock(&barrier->mutex); + + assert(barrier->waiters < barrier->count); + barrier->waiters++; + + if (barrier->waiters < barrier->count) { + uint64_t sequence = barrier->sequence; + + do { + cnd_wait(&barrier->condvar, &barrier->mutex); + } while (sequence == barrier->sequence); + } else { + barrier->waiters = 0; + barrier->sequence++; + cnd_broadcast(&barrier->condvar); + } + + mtx_unlock(&barrier->mutex); +} + +#endif + #endif /* U_THREAD_H_ */