gallium/util: start splitting u_debug into generic and gallium specific components
[mesa.git] / src / gallium / auxiliary / util / u_helpers.c
index 7d45b2f06db5461ec1eb7cec3b4d23e04da7e382..4c70c004178b36bbc8973ae0fad882b1b6611186 100644 (file)
@@ -30,6 +30,7 @@
 #include "util/u_inlines.h"
 #include "util/u_upload_mgr.h"
 #include "util/u_thread.h"
+#include "util/os_time.h"
 #include <inttypes.h>
 
 /**
@@ -120,6 +121,43 @@ util_upload_index_buffer(struct pipe_context *pipe,
    return *out_buffer != NULL;
 }
 
+#ifdef HAVE_PTHREAD_SETAFFINITY
+
+static unsigned L3_cache_number;
+static once_flag thread_pinning_once_flag = ONCE_FLAG_INIT;
+
+static void
+util_set_full_cpu_affinity(void)
+{
+   cpu_set_t cpuset;
+
+   CPU_ZERO(&cpuset);
+   for (unsigned i = 0; i < CPU_SETSIZE; i++)
+      CPU_SET(i, &cpuset);
+
+   pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+}
+
+static void
+util_init_thread_pinning(void)
+{
+   /* Get a semi-random number. */
+   int64_t t = os_time_get_nano();
+   L3_cache_number = (t ^ (t >> 8) ^ (t >> 16));
+
+   /* Reset thread affinity for all child processes to prevent them from
+    * inheriting the current thread's affinity.
+    *
+    * XXX: If the driver is unloaded after this, and the app later calls
+    * fork(), the child process will likely crash before fork() returns,
+    * because the address where util_set_full_cpu_affinity was located
+    * will either be unmapped or point to random other contents.
+    */
+   pthread_atfork(NULL, NULL, util_set_full_cpu_affinity);
+}
+
+#endif
+
 /**
  * Called by MakeCurrent. Used to notify the driver that the application
  * thread may have been changed.
@@ -134,18 +172,24 @@ util_upload_index_buffer(struct pipe_context *pipe,
 void
 util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread)
 {
+#ifdef HAVE_PTHREAD_SETAFFINITY
+   /* If pinning has no effect, don't do anything. */
+   if (util_cpu_caps.nr_cpus == util_cpu_caps.cores_per_L3)
+      return;
+
    thrd_t current = thrd_current();
    int cache = util_get_L3_for_pinned_thread(current,
                                              util_cpu_caps.cores_per_L3);
 
+   call_once(&thread_pinning_once_flag, util_init_thread_pinning);
+
    /* If the main thread is not pinned, choose the L3 cache. */
    if (cache == -1) {
-      unsigned num_caches = util_cpu_caps.nr_cpus /
-                            util_cpu_caps.cores_per_L3;
-      static unsigned last_cache;
+      unsigned num_L3_caches = util_cpu_caps.nr_cpus /
+                               util_cpu_caps.cores_per_L3;
 
       /* Choose a different L3 cache for each subsequent MakeCurrent. */
-      cache = p_atomic_inc_return(&last_cache) % num_caches;
+      cache = p_atomic_inc_return(&L3_cache_number) % num_L3_caches;
       util_pin_thread_to_L3(current, cache, util_cpu_caps.cores_per_L3);
    }
 
@@ -158,6 +202,7 @@ util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread)
    /* Do the same for the upper level thread if there is any (e.g. glthread) */
    if (upper_thread)
       util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3);
+#endif
 }
 
 /* This is a helper for hardware bring-up. Don't remove. */
@@ -221,3 +266,123 @@ util_wait_for_idle(struct pipe_context *ctx)
    ctx->flush(ctx, &fence, 0);
    ctx->screen->fence_finish(ctx->screen, NULL, fence, PIPE_TIMEOUT_INFINITE);
 }
+
+void
+util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage)
+{
+   t->max_mem_usage = max_mem_usage;
+}
+
+void
+util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t)
+{
+   for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
+      screen->fence_reference(screen, &t->ring[i].fence, NULL);
+}
+
+static uint64_t
+util_get_throttle_total_memory_usage(struct util_throttle *t)
+{
+   uint64_t total_usage = 0;
+
+   for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
+      total_usage += t->ring[i].mem_usage;
+   return total_usage;
+}
+
+static void util_dump_throttle_ring(struct util_throttle *t)
+{
+   printf("Throttle:\n");
+   for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) {
+      printf("  ring[%u]: fence = %s, mem_usage = %"PRIu64"%s%s\n",
+             i, t->ring[i].fence ? "yes" : " no",
+             t->ring[i].mem_usage,
+             t->flush_index == i ? " [flush]" : "",
+             t->wait_index == i ? " [wait]" : "");
+   }
+}
+
+/**
+ * Notify util_throttle that the next operation allocates memory.
+ * util_throttle tracks memory usage and waits for fences until its tracked
+ * memory usage decreases.
+ *
+ * Example:
+ *   util_throttle_memory_usage(..., w*h*d*Bpp);
+ *   TexSubImage(..., w, h, d, ...);
+ *
+ * This means that TexSubImage can't allocate more memory its maximum limit
+ * set during initialization.
+ */
+void
+util_throttle_memory_usage(struct pipe_context *pipe,
+                           struct util_throttle *t, uint64_t memory_size)
+{
+   (void)util_dump_throttle_ring; /* silence warning */
+
+   if (!t->max_mem_usage)
+      return;
+
+   struct pipe_screen *screen = pipe->screen;
+   struct pipe_fence_handle **fence = NULL;
+   unsigned ring_size = ARRAY_SIZE(t->ring);
+   uint64_t total = util_get_throttle_total_memory_usage(t);
+
+   /* If there is not enough memory, walk the list of fences and find
+    * the latest one that we need to wait for.
+    */
+   while (t->wait_index != t->flush_index &&
+          total && total + memory_size > t->max_mem_usage) {
+      assert(t->ring[t->wait_index].fence);
+
+      /* Release an older fence if we need to wait for a newer one. */
+      if (fence)
+         screen->fence_reference(screen, fence, NULL);
+
+      fence = &t->ring[t->wait_index].fence;
+      t->ring[t->wait_index].mem_usage = 0;
+      t->wait_index = (t->wait_index + 1) % ring_size;
+
+      total = util_get_throttle_total_memory_usage(t);
+   }
+
+   /* Wait for the fence to decrease memory usage. */
+   if (fence) {
+      screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
+      screen->fence_reference(screen, fence, NULL);
+   }
+
+   /* Flush and get a fence if we've exhausted memory usage for the current
+    * slot.
+    */
+   if (t->ring[t->flush_index].mem_usage &&
+       t->ring[t->flush_index].mem_usage + memory_size >
+       t->max_mem_usage / (ring_size / 2)) {
+      struct pipe_fence_handle **fence =
+         &t->ring[t->flush_index].fence;
+
+      /* Expect that the current flush slot doesn't have a fence yet. */
+      assert(!*fence);
+
+      pipe->flush(pipe, fence, PIPE_FLUSH_ASYNC);
+      t->flush_index = (t->flush_index + 1) % ring_size;
+
+      /* Vacate the next slot if it's occupied. This should be rare. */
+      if (t->flush_index == t->wait_index) {
+         struct pipe_fence_handle **fence =
+            &t->ring[t->wait_index].fence;
+
+         t->ring[t->wait_index].mem_usage = 0;
+         t->wait_index = (t->wait_index + 1) % ring_size;
+
+         assert(*fence);
+         screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
+         screen->fence_reference(screen, fence, NULL);
+      }
+
+      assert(!t->ring[t->flush_index].mem_usage);
+      assert(!t->ring[t->flush_index].fence);
+   }
+
+   t->ring[t->flush_index].mem_usage += memory_size;
+}