src/gallium/auxiliary/util/u_helpers.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2012 Marek Olšák <maraeo@gmail.com>
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL THE AUTHORS AND/OR THEIR SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 #include "util/u_cpu_detect.h"
  29 #include "util/u_helpers.h"
  30 #include "util/u_inlines.h"
  31 #include "util/u_upload_mgr.h"
  32 #include "util/u_thread.h"
  33 #include "util/os_time.h"
  34 #include <inttypes.h>
  35
  36 /**
  37  * This function is used to copy an array of pipe_vertex_buffer structures,
  38  * while properly referencing the pipe_vertex_buffer::buffer member.
  39  *
  40  * enabled_buffers is updated such that the bits corresponding to the indices
  41  * of disabled buffers are set to 0 and the enabled ones are set to 1.
  42  *
  43  * \sa util_copy_framebuffer_state
  44  */
  45 void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst,
  46                                   uint32_t *enabled_buffers,
  47                                   const struct pipe_vertex_buffer *src,
  48                                   unsigned start_slot, unsigned count)
  49 {
  50    unsigned i;
  51    uint32_t bitmask = 0;
  52
  53    dst += start_slot;
  54
  55    if (src) {
  56       for (i = 0; i < count; i++) {
  57          if (src[i].buffer.resource)
  58             bitmask |= 1 << i;
  59
  60          pipe_vertex_buffer_unreference(&dst[i]);
  61
  62          if (!src[i].is_user_buffer)
  63             pipe_resource_reference(&dst[i].buffer.resource, src[i].buffer.resource);
  64       }
  65
  66       /* Copy over the other members of pipe_vertex_buffer. */
  67       memcpy(dst, src, count * sizeof(struct pipe_vertex_buffer));
  68
  69       *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
  70       *enabled_buffers |= bitmask << start_slot;
  71    }
  72    else {
  73       /* Unreference the buffers. */
  74       for (i = 0; i < count; i++)
  75          pipe_vertex_buffer_unreference(&dst[i]);
  76
  77       *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
  78    }
  79 }
  80
  81 /**
  82  * Same as util_set_vertex_buffers_mask, but it only returns the number
  83  * of bound buffers.
  84  */
  85 void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
  86                                    unsigned *dst_count,
  87                                    const struct pipe_vertex_buffer *src,
  88                                    unsigned start_slot, unsigned count)
  89 {
  90    unsigned i;
  91    uint32_t enabled_buffers = 0;
  92
  93    for (i = 0; i < *dst_count; i++) {
  94       if (dst[i].buffer.resource)
  95          enabled_buffers |= (1ull << i);
  96    }
  97
  98    util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot,
  99                                 count);
 100
 101    *dst_count = util_last_bit(enabled_buffers);
 102 }
 103
 104 /**
 105  * This function is used to copy an array of pipe_shader_buffer structures,
 106  * while properly referencing the pipe_shader_buffer::buffer member.
 107  *
 108  * \sa util_set_vertex_buffer_mask
 109  */
 110 void util_set_shader_buffers_mask(struct pipe_shader_buffer *dst,
 111                                   uint32_t *enabled_buffers,
 112                                   const struct pipe_shader_buffer *src,
 113                                   unsigned start_slot, unsigned count)
 114 {
 115    unsigned i;
 116
 117    dst += start_slot;
 118
 119    if (src) {
 120       for (i = 0; i < count; i++) {
 121          pipe_resource_reference(&dst[i].buffer, src[i].buffer);
 122
 123          if (src[i].buffer)
 124             *enabled_buffers |= (1ull << (start_slot + i));
 125          else
 126             *enabled_buffers &= ~(1ull << (start_slot + i));
 127       }
 128
 129       /* Copy over the other members of pipe_shader_buffer. */
 130       memcpy(dst, src, count * sizeof(struct pipe_shader_buffer));
 131    }
 132    else {
 133       /* Unreference the buffers. */
 134       for (i = 0; i < count; i++)
 135          pipe_resource_reference(&dst[i].buffer, NULL);
 136
 137       *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
 138    }
 139 }
 140
 141 /**
 142  * Given a user index buffer, save the structure to "saved", and upload it.
 143  */
 144 bool
 145 util_upload_index_buffer(struct pipe_context *pipe,
 146                          const struct pipe_draw_info *info,
 147                          struct pipe_resource **out_buffer,
 148                          unsigned *out_offset)
 149 {
 150    unsigned start_offset = info->start * info->index_size;
 151
 152    u_upload_data(pipe->stream_uploader, start_offset,
 153                  info->count * info->index_size, 4,
 154                  (char*)info->index.user + start_offset,
 155                  out_offset, out_buffer);
 156    u_upload_unmap(pipe->stream_uploader);
 157    *out_offset -= start_offset;
 158    return *out_buffer != NULL;
 159 }
 160
 161 /**
 162  * Called by MakeCurrent. Used to notify the driver that the application
 163  * thread may have been changed.
 164  *
 165  * The function pins the current thread and driver threads to a group of
 166  * CPU cores that share the same L3 cache. This is needed for good multi-
 167  * threading performance on AMD Zen CPUs.
 168  *
 169  * \param upper_thread  thread in the state tracker that also needs to be
 170  *                      pinned.
 171  */
 172 void
 173 util_pin_driver_threads_to_random_L3(struct pipe_context *ctx,
 174                                      thrd_t *upper_thread)
 175 {
 176    /* If pinning has no effect, don't do anything. */
 177    if (util_cpu_caps.nr_cpus == util_cpu_caps.cores_per_L3)
 178       return;
 179
 180    unsigned num_L3_caches = util_cpu_caps.nr_cpus /
 181                             util_cpu_caps.cores_per_L3;
 182
 183    /* Get a semi-random number. */
 184    int64_t t = os_time_get_nano();
 185    unsigned cache = (t ^ (t >> 8) ^ (t >> 16)) % num_L3_caches;
 186
 187    /* Tell the driver to pin its threads to the selected L3 cache. */
 188    if (ctx->set_context_param) {
 189       ctx->set_context_param(ctx, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
 190                              cache);
 191    }
 192
 193    /* Do the same for the upper level thread if there is any (e.g. glthread) */
 194    if (upper_thread)
 195       util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3);
 196 }
 197
 198 /* This is a helper for hardware bring-up. Don't remove. */
 199 struct pipe_query *
 200 util_begin_pipestat_query(struct pipe_context *ctx)
 201 {
 202    struct pipe_query *q =
 203       ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0);
 204    if (!q)
 205       return NULL;
 206
 207    ctx->begin_query(ctx, q);
 208    return q;
 209 }
 210
 211 /* This is a helper for hardware bring-up. Don't remove. */
 212 void
 213 util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q,
 214                         FILE *f)
 215 {
 216    static unsigned counter;
 217    struct pipe_query_data_pipeline_statistics stats;
 218
 219    ctx->end_query(ctx, q);
 220    ctx->get_query_result(ctx, q, true, (void*)&stats);
 221    ctx->destroy_query(ctx, q);
 222
 223    fprintf(f,
 224            "Draw call %u:\n"
 225            "    ia_vertices    = %"PRIu64"\n"
 226            "    ia_primitives  = %"PRIu64"\n"
 227            "    vs_invocations = %"PRIu64"\n"
 228            "    gs_invocations = %"PRIu64"\n"
 229            "    gs_primitives  = %"PRIu64"\n"
 230            "    c_invocations  = %"PRIu64"\n"
 231            "    c_primitives   = %"PRIu64"\n"
 232            "    ps_invocations = %"PRIu64"\n"
 233            "    hs_invocations = %"PRIu64"\n"
 234            "    ds_invocations = %"PRIu64"\n"
 235            "    cs_invocations = %"PRIu64"\n",
 236            (unsigned)p_atomic_inc_return(&counter),
 237            stats.ia_vertices,
 238            stats.ia_primitives,
 239            stats.vs_invocations,
 240            stats.gs_invocations,
 241            stats.gs_primitives,
 242            stats.c_invocations,
 243            stats.c_primitives,
 244            stats.ps_invocations,
 245            stats.hs_invocations,
 246            stats.ds_invocations,
 247            stats.cs_invocations);
 248 }
 249
 250 /* This is a helper for hardware bring-up. Don't remove. */
 251 void
 252 util_wait_for_idle(struct pipe_context *ctx)
 253 {
 254    struct pipe_fence_handle *fence = NULL;
 255
 256    ctx->flush(ctx, &fence, 0);
 257    ctx->screen->fence_finish(ctx->screen, NULL, fence, PIPE_TIMEOUT_INFINITE);
 258 }
 259
 260 void
 261 util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage)
 262 {
 263    t->max_mem_usage = max_mem_usage;
 264 }
 265
 266 void
 267 util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t)
 268 {
 269    for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
 270       screen->fence_reference(screen, &t->ring[i].fence, NULL);
 271 }
 272
 273 static uint64_t
 274 util_get_throttle_total_memory_usage(struct util_throttle *t)
 275 {
 276    uint64_t total_usage = 0;
 277
 278    for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
 279       total_usage += t->ring[i].mem_usage;
 280    return total_usage;
 281 }
 282
 283 static void util_dump_throttle_ring(struct util_throttle *t)
 284 {
 285    printf("Throttle:\n");
 286    for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) {
 287       printf("  ring[%u]: fence = %s, mem_usage = %"PRIu64"%s%s\n",
 288              i, t->ring[i].fence ? "yes" : " no",
 289              t->ring[i].mem_usage,
 290              t->flush_index == i ? " [flush]" : "",
 291              t->wait_index == i ? " [wait]" : "");
 292    }
 293 }
 294
 295 /**
 296  * Notify util_throttle that the next operation allocates memory.
 297  * util_throttle tracks memory usage and waits for fences until its tracked
 298  * memory usage decreases.
 299  *
 300  * Example:
 301  *   util_throttle_memory_usage(..., w*h*d*Bpp);
 302  *   TexSubImage(..., w, h, d, ...);
 303  *
 304  * This means that TexSubImage can't allocate more memory its maximum limit
 305  * set during initialization.
 306  */
 307 void
 308 util_throttle_memory_usage(struct pipe_context *pipe,
 309                            struct util_throttle *t, uint64_t memory_size)
 310 {
 311    (void)util_dump_throttle_ring; /* silence warning */
 312
 313    if (!t->max_mem_usage)
 314       return;
 315
 316    struct pipe_screen *screen = pipe->screen;
 317    struct pipe_fence_handle **fence = NULL;
 318    unsigned ring_size = ARRAY_SIZE(t->ring);
 319    uint64_t total = util_get_throttle_total_memory_usage(t);
 320
 321    /* If there is not enough memory, walk the list of fences and find
 322     * the latest one that we need to wait for.
 323     */
 324    while (t->wait_index != t->flush_index &&
 325           total && total + memory_size > t->max_mem_usage) {
 326       assert(t->ring[t->wait_index].fence);
 327
 328       /* Release an older fence if we need to wait for a newer one. */
 329       if (fence)
 330          screen->fence_reference(screen, fence, NULL);
 331
 332       fence = &t->ring[t->wait_index].fence;
 333       t->ring[t->wait_index].mem_usage = 0;
 334       t->wait_index = (t->wait_index + 1) % ring_size;
 335
 336       total = util_get_throttle_total_memory_usage(t);
 337    }
 338
 339    /* Wait for the fence to decrease memory usage. */
 340    if (fence) {
 341       screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
 342       screen->fence_reference(screen, fence, NULL);
 343    }
 344
 345    /* Flush and get a fence if we've exhausted memory usage for the current
 346     * slot.
 347     */
 348    if (t->ring[t->flush_index].mem_usage &&
 349        t->ring[t->flush_index].mem_usage + memory_size >
 350        t->max_mem_usage / (ring_size / 2)) {
 351       struct pipe_fence_handle **fence =
 352          &t->ring[t->flush_index].fence;
 353
 354       /* Expect that the current flush slot doesn't have a fence yet. */
 355       assert(!*fence);
 356
 357       pipe->flush(pipe, fence, PIPE_FLUSH_ASYNC);
 358       t->flush_index = (t->flush_index + 1) % ring_size;
 359
 360       /* Vacate the next slot if it's occupied. This should be rare. */
 361       if (t->flush_index == t->wait_index) {
 362          struct pipe_fence_handle **fence =
 363             &t->ring[t->wait_index].fence;
 364
 365          t->ring[t->wait_index].mem_usage = 0;
 366          t->wait_index = (t->wait_index + 1) % ring_size;
 367
 368          assert(*fence);
 369          screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
 370          screen->fence_reference(screen, fence, NULL);
 371       }
 372
 373       assert(!t->ring[t->flush_index].mem_usage);
 374       assert(!t->ring[t->flush_index].fence);
 375    }
 376
 377    t->ring[t->flush_index].mem_usage += memory_size;
 378 }