util: add a resource wrapper to get resource samples
[mesa.git] / src / gallium / auxiliary / util / u_helpers.c
index 8095242efd958ed06339800708c964a3513c61db..31b7d533952d158ebef68ce6c28c478d4d384d0c 100644 (file)
@@ -30,6 +30,7 @@
 #include "util/u_inlines.h"
 #include "util/u_upload_mgr.h"
 #include "util/u_thread.h"
+#include "util/os_time.h"
 #include <inttypes.h>
 
 /**
@@ -51,6 +52,8 @@ void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst,
 
    dst += start_slot;
 
+   *enabled_buffers &= ~u_bit_consecutive(start_slot, count);
+
    if (src) {
       for (i = 0; i < count; i++) {
          if (src[i].buffer.resource)
@@ -65,15 +68,12 @@ void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst,
       /* Copy over the other members of pipe_vertex_buffer. */
       memcpy(dst, src, count * sizeof(struct pipe_vertex_buffer));
 
-      *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
       *enabled_buffers |= bitmask << start_slot;
    }
    else {
       /* Unreference the buffers. */
       for (i = 0; i < count; i++)
          pipe_vertex_buffer_unreference(&dst[i]);
-
-      *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
    }
 }
 
@@ -100,6 +100,43 @@ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
    *dst_count = util_last_bit(enabled_buffers);
 }
 
+/**
+ * This function is used to copy an array of pipe_shader_buffer structures,
+ * while properly referencing the pipe_shader_buffer::buffer member.
+ *
+ * \sa util_set_vertex_buffer_mask
+ */
+void util_set_shader_buffers_mask(struct pipe_shader_buffer *dst,
+                                  uint32_t *enabled_buffers,
+                                  const struct pipe_shader_buffer *src,
+                                  unsigned start_slot, unsigned count)
+{
+   unsigned i;
+
+   dst += start_slot;
+
+   if (src) {
+      for (i = 0; i < count; i++) {
+         pipe_resource_reference(&dst[i].buffer, src[i].buffer);
+
+         if (src[i].buffer)
+            *enabled_buffers |= (1ull << (start_slot + i));
+         else
+            *enabled_buffers &= ~(1ull << (start_slot + i));
+      }
+
+      /* Copy over the other members of pipe_shader_buffer. */
+      memcpy(dst, src, count * sizeof(struct pipe_shader_buffer));
+   }
+   else {
+      /* Unreference the buffers. */
+      for (i = 0; i < count; i++)
+         pipe_resource_reference(&dst[i].buffer, NULL);
+
+      *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
+   }
+}
+
 /**
  * Given a user index buffer, save the structure to "saved", and upload it.
  */
@@ -107,12 +144,12 @@ bool
 util_upload_index_buffer(struct pipe_context *pipe,
                          const struct pipe_draw_info *info,
                          struct pipe_resource **out_buffer,
-                         unsigned *out_offset)
+                         unsigned *out_offset, unsigned alignment)
 {
    unsigned start_offset = info->start * info->index_size;
 
    u_upload_data(pipe->stream_uploader, start_offset,
-                 info->count * info->index_size, 4,
+                 info->count * info->index_size, alignment,
                  (char*)info->index.user + start_offset,
                  out_offset, out_buffer);
    u_upload_unmap(pipe->stream_uploader);
@@ -132,25 +169,21 @@ util_upload_index_buffer(struct pipe_context *pipe,
  *                      pinned.
  */
 void
-util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread)
+util_pin_driver_threads_to_random_L3(struct pipe_context *ctx,
+                                     thrd_t *upper_thread)
 {
-#ifdef HAVE_PTHREAD
-   thrd_t current = thrd_current();
-   int cache = util_get_L3_for_pinned_thread(current,
-                                             util_cpu_caps.cores_per_L3);
-
-   /* If the main thread is not pinned, choose the L3 cache. */
-   if (cache == -1) {
-      unsigned num_caches = util_cpu_caps.nr_cpus /
+   /* If pinning has no effect, don't do anything. */
+   if (util_cpu_caps.nr_cpus == util_cpu_caps.cores_per_L3)
+      return;
+
+   unsigned num_L3_caches = util_cpu_caps.nr_cpus /
                             util_cpu_caps.cores_per_L3;
-      static unsigned last_cache;
 
-      /* Choose a different L3 cache for each subsequent MakeCurrent. */
-      cache = p_atomic_inc_return(&last_cache) % num_caches;
-      util_pin_thread_to_L3(current, cache, util_cpu_caps.cores_per_L3);
-   }
+   /* Get a semi-random number. */
+   int64_t t = os_time_get_nano();
+   unsigned cache = (t ^ (t >> 8) ^ (t >> 16)) % num_L3_caches;
 
-   /* Tell the driver to pin its threads to the same L3 cache. */
+   /* Tell the driver to pin its threads to the selected L3 cache. */
    if (ctx->set_context_param) {
       ctx->set_context_param(ctx, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
                              cache);
@@ -159,6 +192,15 @@ util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread)
    /* Do the same for the upper level thread if there is any (e.g. glthread) */
    if (upper_thread)
       util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3);
+
+   /* Optionally pin the application thread to the same L3 to get maximum
+    * performance with glthread on AMD Zen. (this function is only called
+    * with glthread) This is used to estimate and remove the overhead of
+    * Infinity Fabric between L3 caches.
+    */
+#if defined(HAVE_PTHREAD)
+   if (debug_get_bool_option("pin_app_thread", false))
+      util_pin_thread_to_L3(pthread_self(), cache, util_cpu_caps.cores_per_L3);
 #endif
 }
 
@@ -200,7 +242,7 @@ util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q,
            "    hs_invocations = %"PRIu64"\n"
            "    ds_invocations = %"PRIu64"\n"
            "    cs_invocations = %"PRIu64"\n",
-           p_atomic_inc_return(&counter),
+           (unsigned)p_atomic_inc_return(&counter),
            stats.ia_vertices,
            stats.ia_primitives,
            stats.vs_invocations,