util: add a resource wrapper to get resource samples

[mesa.git] / src / gallium / auxiliary / util / u_helpers.c
diff --git a/src/gallium/auxiliary/util/u_helpers.c b/src/gallium/auxiliary/util/u_helpers.c

index 8095242efd958ed06339800708c964a3513c61db..31b7d533952d158ebef68ce6c28c478d4d384d0c 100644 (file)
--- a/src/gallium/auxiliary/util/u_helpers.c
+++ b/src/gallium/auxiliary/util/u_helpers.c
@@ -30,6 +30,7 @@
  #include "util/u_inlines.h"
  #include "util/u_upload_mgr.h"
  #include "util/u_thread.h"
+#include "util/os_time.h"
  #include <inttypes.h>
  
  /**
@@ -51,6 +52,8 @@ void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst,
  
     dst += start_slot;
  
+   *enabled_buffers &= ~u_bit_consecutive(start_slot, count);
+
     if (src) {
        for (i = 0; i < count; i++) {
           if (src[i].buffer.resource)
@@ -65,15 +68,12 @@ void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst,
        /* Copy over the other members of pipe_vertex_buffer. */
        memcpy(dst, src, count * sizeof(struct pipe_vertex_buffer));
  
-      *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
        *enabled_buffers |= bitmask << start_slot;
     }
     else {
        /* Unreference the buffers. */
        for (i = 0; i < count; i++)
           pipe_vertex_buffer_unreference(&dst[i]);
-
-      *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
     }
  }
  
@@ -100,6 +100,43 @@ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
     *dst_count = util_last_bit(enabled_buffers);
  }
  
+/**
+ * This function is used to copy an array of pipe_shader_buffer structures,
+ * while properly referencing the pipe_shader_buffer::buffer member.
+ *
+ * \sa util_set_vertex_buffer_mask
+ */
+void util_set_shader_buffers_mask(struct pipe_shader_buffer *dst,
+                                  uint32_t *enabled_buffers,
+                                  const struct pipe_shader_buffer *src,
+                                  unsigned start_slot, unsigned count)
+{
+   unsigned i;
+
+   dst += start_slot;
+
+   if (src) {
+      for (i = 0; i < count; i++) {
+         pipe_resource_reference(&dst[i].buffer, src[i].buffer);
+
+         if (src[i].buffer)
+            *enabled_buffers |= (1ull << (start_slot + i));
+         else
+            *enabled_buffers &= ~(1ull << (start_slot + i));
+      }
+
+      /* Copy over the other members of pipe_shader_buffer. */
+      memcpy(dst, src, count * sizeof(struct pipe_shader_buffer));
+   }
+   else {
+      /* Unreference the buffers. */
+      for (i = 0; i < count; i++)
+         pipe_resource_reference(&dst[i].buffer, NULL);
+
+      *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
+   }
+}
+
  /**
   * Given a user index buffer, save the structure to "saved", and upload it.
   */
@@ -107,12 +144,12 @@ bool
  util_upload_index_buffer(struct pipe_context *pipe,
                           const struct pipe_draw_info *info,
                           struct pipe_resource **out_buffer,
-                         unsigned *out_offset)
+                         unsigned *out_offset, unsigned alignment)
  {
     unsigned start_offset = info->start * info->index_size;
  
     u_upload_data(pipe->stream_uploader, start_offset,
-                 info->count * info->index_size, 4,
+                 info->count * info->index_size, alignment,
                   (char*)info->index.user + start_offset,
                   out_offset, out_buffer);
     u_upload_unmap(pipe->stream_uploader);
@@ -132,25 +169,21 @@ util_upload_index_buffer(struct pipe_context *pipe,
   *                      pinned.
   */
  void
-util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread)
+util_pin_driver_threads_to_random_L3(struct pipe_context *ctx,
+                                     thrd_t *upper_thread)
  {
-#ifdef HAVE_PTHREAD
-   thrd_t current = thrd_current();
-   int cache = util_get_L3_for_pinned_thread(current,
-                                             util_cpu_caps.cores_per_L3);
-
-   /* If the main thread is not pinned, choose the L3 cache. */
-   if (cache == -1) {
-      unsigned num_caches = util_cpu_caps.nr_cpus /
+   /* If pinning has no effect, don't do anything. */
+   if (util_cpu_caps.nr_cpus == util_cpu_caps.cores_per_L3)
+      return;
+
+   unsigned num_L3_caches = util_cpu_caps.nr_cpus /
                              util_cpu_caps.cores_per_L3;
-      static unsigned last_cache;
  
-      /* Choose a different L3 cache for each subsequent MakeCurrent. */
-      cache = p_atomic_inc_return(&last_cache) % num_caches;
-      util_pin_thread_to_L3(current, cache, util_cpu_caps.cores_per_L3);
-   }
+   /* Get a semi-random number. */
+   int64_t t = os_time_get_nano();
+   unsigned cache = (t ^ (t >> 8) ^ (t >> 16)) % num_L3_caches;
  
-   /* Tell the driver to pin its threads to the same L3 cache. */
+   /* Tell the driver to pin its threads to the selected L3 cache. */
     if (ctx->set_context_param) {
        ctx->set_context_param(ctx, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
                               cache);
@@ -159,6 +192,15 @@ util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread)
     /* Do the same for the upper level thread if there is any (e.g. glthread) */
     if (upper_thread)
        util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3);
+
+   /* Optionally pin the application thread to the same L3 to get maximum
+    * performance with glthread on AMD Zen. (this function is only called
+    * with glthread) This is used to estimate and remove the overhead of
+    * Infinity Fabric between L3 caches.
+    */
+#if defined(HAVE_PTHREAD)
+   if (debug_get_bool_option("pin_app_thread", false))
+      util_pin_thread_to_L3(pthread_self(), cache, util_cpu_caps.cores_per_L3);
  #endif
  }
  
@@ -200,7 +242,7 @@ util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q,
             "    hs_invocations = %"PRIu64"\n"
             "    ds_invocations = %"PRIu64"\n"
             "    cs_invocations = %"PRIu64"\n",
-           p_atomic_inc_return(&counter),
+           (unsigned)p_atomic_inc_return(&counter),
             stats.ia_vertices,
             stats.ia_primitives,
             stats.vs_invocations,