radv: set writes_memory for global memory stores/atomics
[mesa.git] / src / gallium / auxiliary / util / u_threaded_context.c
index b05cffd754df99a313429987745b8a94cad73003..daed6c6950b21d270b9cca882cb947243339bbd7 100644 (file)
@@ -26,7 +26,7 @@
 
 #include "util/u_threaded_context.h"
 #include "util/u_cpu_detect.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
@@ -64,7 +64,7 @@ typedef void (*tc_execute)(struct pipe_context *pipe, union tc_payload *payload)
 static const tc_execute execute_func[TC_NUM_CALLS];
 
 static void
-tc_batch_check(struct tc_batch *batch)
+tc_batch_check(UNUSED struct tc_batch *batch)
 {
    tc_assert(batch->sentinel == TC_SENTINEL);
    tc_assert(batch->num_total_call_slots <= TC_CALLS_PER_BATCH);
@@ -80,7 +80,7 @@ tc_debug_check(struct threaded_context *tc)
 }
 
 static void
-tc_batch_execute(void *job, int thread_index)
+tc_batch_execute(void *job, UNUSED int thread_index)
 {
    struct tc_batch *batch = job;
    struct pipe_context *pipe = batch->pipe;
@@ -116,7 +116,7 @@ tc_batch_flush(struct threaded_context *tc)
    }
 
    util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
-                      NULL);
+                      NULL, 0);
    tc->last = tc->next;
    tc->next = (tc->next + 1) % TC_MAX_BATCHES;
 }
@@ -180,7 +180,7 @@ tc_is_sync(struct threaded_context *tc)
 }
 
 static void
-_tc_sync(struct threaded_context *tc, const char *info, const char *func)
+_tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func)
 {
    struct tc_batch *last = &tc->batch_slots[tc->last];
    struct tc_batch *next = &tc->batch_slots[tc->next];
@@ -211,8 +211,9 @@ _tc_sync(struct threaded_context *tc, const char *info, const char *func)
    if (synced) {
       p_atomic_inc(&tc->num_syncs);
 
-      if (tc_strcmp(func, "tc_destroy") != 0)
+      if (tc_strcmp(func, "tc_destroy") != 0) {
          tc_printf("sync %s %s\n", func, info);
+         }
    }
 
    tc_debug_check(tc);
@@ -230,13 +231,23 @@ _tc_sync(struct threaded_context *tc, const char *info, const char *func)
  */
 void
 threaded_context_flush(struct pipe_context *_pipe,
-                       struct tc_unflushed_batch_token *token)
+                       struct tc_unflushed_batch_token *token,
+                       bool prefer_async)
 {
    struct threaded_context *tc = threaded_context(_pipe);
 
    /* This is called from the state-tracker / application thread. */
-   if (token->tc && token->tc == tc)
-      tc_sync(token->tc);
+   if (token->tc && token->tc == tc) {
+      struct tc_batch *last = &tc->batch_slots[tc->last];
+
+      /* Prefer to do the flush in the driver thread if it is already
+       * running. That should be better for cache locality.
+       */
+      if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
+         tc_batch_flush(tc);
+      else
+         tc_sync(token->tc);
+   }
 }
 
 static void
@@ -298,7 +309,7 @@ threaded_context_unwrap_sync(struct pipe_context *pipe)
       *p = deref(param); \
    }
 
-TC_FUNC1(set_active_query_state, flags, , boolean, , *)
+TC_FUNC1(set_active_query_state, flags, , bool, , *)
 
 TC_FUNC1(set_blend_color, blend_color, const, struct pipe_blend_color, *, )
 TC_FUNC1(set_stencil_ref, stencil_ref, const, struct pipe_stencil_ref, *, )
@@ -341,7 +352,7 @@ tc_call_destroy_query(struct pipe_context *pipe, union tc_payload *payload)
    struct threaded_query *tq = threaded_query(payload->query);
 
    if (tq->head_unflushed.next)
-      LIST_DEL(&tq->head_unflushed);
+      list_del(&tq->head_unflushed);
 
    pipe->destroy_query(pipe, payload->query);
 }
@@ -360,7 +371,7 @@ tc_call_begin_query(struct pipe_context *pipe, union tc_payload *payload)
    pipe->begin_query(pipe, payload->query);
 }
 
-static boolean
+static bool
 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
 {
    struct threaded_context *tc = threaded_context(_pipe);
@@ -382,7 +393,7 @@ tc_call_end_query(struct pipe_context *pipe, union tc_payload *payload)
    struct threaded_query *tq = threaded_query(p->query);
 
    if (!tq->head_unflushed.next)
-      LIST_ADD(&tq->head_unflushed, &p->tc->unflushed_queries);
+      list_add(&tq->head_unflushed, &p->tc->unflushed_queries);
 
    pipe->end_query(pipe, p->query);
 }
@@ -403,9 +414,9 @@ tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
    return true; /* we don't care about the return value for this call */
 }
 
-static boolean
+static bool
 tc_get_query_result(struct pipe_context *_pipe,
-                    struct pipe_query *query, boolean wait,
+                    struct pipe_query *query, bool wait,
                     union pipe_query_result *result)
 {
    struct threaded_context *tc = threaded_context(_pipe);
@@ -421,7 +432,7 @@ tc_get_query_result(struct pipe_context *_pipe,
       tq->flushed = true;
       if (tq->head_unflushed.next) {
          /* This is safe because it can only happen after we sync'd. */
-         LIST_DEL(&tq->head_unflushed);
+         list_del(&tq->head_unflushed);
       }
    }
    return success;
@@ -429,7 +440,7 @@ tc_get_query_result(struct pipe_context *_pipe,
 
 struct tc_query_result_resource {
    struct pipe_query *query;
-   boolean wait;
+   bool wait;
    enum pipe_query_value_type result_type;
    int index;
    struct pipe_resource *resource;
@@ -449,7 +460,7 @@ tc_call_get_query_result_resource(struct pipe_context *pipe,
 
 static void
 tc_get_query_result_resource(struct pipe_context *_pipe,
-                             struct pipe_query *query, boolean wait,
+                             struct pipe_query *query, bool wait,
                              enum pipe_query_value_type result_type, int index,
                              struct pipe_resource *resource, unsigned offset)
 {
@@ -481,7 +492,7 @@ tc_call_render_condition(struct pipe_context *pipe, union tc_payload *payload)
 
 static void
 tc_render_condition(struct pipe_context *_pipe,
-                    struct pipe_query *query, boolean condition,
+                    struct pipe_query *query, bool condition,
                     enum pipe_render_cond_flag mode)
 {
    struct threaded_context *tc = threaded_context(_pipe);
@@ -665,6 +676,7 @@ tc_set_constant_buffer(struct pipe_context *_pipe,
    if (cb && cb->user_buffer) {
       u_upload_data(tc->base.const_uploader, 0, cb->buffer_size, 64,
                     cb->user_buffer, &offset, &buffer);
+      u_upload_unmap(tc->base.const_uploader);
    }
 
    struct tc_constant_buffer *p =
@@ -759,7 +771,7 @@ tc_call_set_window_rectangles(struct pipe_context *pipe,
 }
 
 static void
-tc_set_window_rectangles(struct pipe_context *_pipe, boolean include,
+tc_set_window_rectangles(struct pipe_context *_pipe, bool include,
                          unsigned count,
                          const struct pipe_scissor_state *rects)
 {
@@ -867,7 +879,8 @@ tc_set_shader_images(struct pipe_context *_pipe,
             struct threaded_resource *tres =
                threaded_resource(images[i].resource);
 
-            util_range_add(&tres->valid_buffer_range, images[i].u.buf.offset,
+            util_range_add(&tres->b, &tres->valid_buffer_range,
+                           images[i].u.buf.offset,
                            images[i].u.buf.offset + images[i].u.buf.size);
          }
       }
@@ -878,6 +891,7 @@ tc_set_shader_images(struct pipe_context *_pipe,
 struct tc_shader_buffers {
    ubyte shader, start, count;
    bool unbind;
+   unsigned writable_bitmask;
    struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
 };
 
@@ -888,11 +902,12 @@ tc_call_set_shader_buffers(struct pipe_context *pipe, union tc_payload *payload)
    unsigned count = p->count;
 
    if (p->unbind) {
-      pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL);
+      pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0);
       return;
    }
 
-   pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot);
+   pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot,
+                            p->writable_bitmask);
 
    for (unsigned i = 0; i < count; i++)
       pipe_resource_reference(&p->slot[i].buffer, NULL);
@@ -902,7 +917,8 @@ static void
 tc_set_shader_buffers(struct pipe_context *_pipe,
                       enum pipe_shader_type shader,
                       unsigned start, unsigned count,
-                      const struct pipe_shader_buffer *buffers)
+                      const struct pipe_shader_buffer *buffers,
+                      unsigned writable_bitmask)
 {
    if (!count)
       return;
@@ -916,6 +932,7 @@ tc_set_shader_buffers(struct pipe_context *_pipe,
    p->start = start;
    p->count = count;
    p->unbind = buffers == NULL;
+   p->writable_bitmask = writable_bitmask;
 
    if (buffers) {
       for (unsigned i = 0; i < count; i++) {
@@ -929,7 +946,8 @@ tc_set_shader_buffers(struct pipe_context *_pipe,
          if (src->buffer) {
             struct threaded_resource *tres = threaded_resource(src->buffer);
 
-            util_range_add(&tres->valid_buffer_range, src->buffer_offset,
+            util_range_add(&tres->b, &tres->valid_buffer_range,
+                           src->buffer_offset,
                            src->buffer_offset + src->buffer_size);
          }
       }
@@ -1119,7 +1137,7 @@ tc_create_stream_output_target(struct pipe_context *_pipe,
    struct pipe_stream_output_target *view;
 
    tc_sync(threaded_context(_pipe));
-   util_range_add(&tres->valid_buffer_range, buffer_offset,
+   util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
                   buffer_offset + buffer_size);
 
    view = pipe->create_stream_output_target(pipe, res, buffer_offset,
@@ -1513,7 +1531,8 @@ tc_buffer_do_flush_region(struct threaded_context *tc,
    if (ttrans->staging) {
       struct pipe_box src_box;
 
-      u_box_1d(ttrans->offset + box->x % tc->map_buffer_alignment,
+      u_box_1d(ttrans->offset + ttrans->b.box.x % tc->map_buffer_alignment +
+               (box->x - ttrans->b.box.x),
                box->width, &src_box);
 
       /* Copy the staging buffer into the original one. */
@@ -1521,7 +1540,8 @@ tc_buffer_do_flush_region(struct threaded_context *tc,
                               ttrans->staging, 0, &src_box);
    }
 
-   util_range_add(tres->base_valid_buffer_range, box->x, box->x + box->width);
+   util_range_add(&tres->b, tres->base_valid_buffer_range,
+                  box->x, box->x + box->width);
 }
 
 static void
@@ -1613,8 +1633,11 @@ tc_buffer_subdata(struct pipe_context *_pipe,
    if (!size)
       return;
 
-   usage |= PIPE_TRANSFER_WRITE |
-            PIPE_TRANSFER_DISCARD_RANGE;
+   usage |= PIPE_TRANSFER_WRITE;
+
+   /* PIPE_TRANSFER_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
+   if (!(usage & PIPE_TRANSFER_MAP_DIRECTLY))
+      usage |= PIPE_TRANSFER_DISCARD_RANGE;
 
    usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
 
@@ -1638,7 +1661,7 @@ tc_buffer_subdata(struct pipe_context *_pipe,
       return;
    }
 
-   util_range_add(&tres->valid_buffer_range, offset, offset + size);
+   util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
 
    /* The upload is small. Enqueue it. */
    struct tc_buffer_subdata *p =
@@ -1824,13 +1847,21 @@ tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log)
 
 static void
 tc_create_fence_fd(struct pipe_context *_pipe,
-                   struct pipe_fence_handle **fence, int fd)
+                   struct pipe_fence_handle **fence, int fd,
+                   enum pipe_fd_type type)
 {
    struct threaded_context *tc = threaded_context(_pipe);
    struct pipe_context *pipe = tc->pipe;
 
    tc_sync(tc);
-   pipe->create_fence_fd(pipe, fence, fd);
+   pipe->create_fence_fd(pipe, fence, fd, type);
+}
+
+static void
+tc_call_fence_server_sync(struct pipe_context *pipe, union tc_payload *payload)
+{
+   pipe->fence_server_sync(pipe, payload->fence);
+   pipe->screen->fence_reference(pipe->screen, &payload->fence, NULL);
 }
 
 static void
@@ -1838,28 +1869,86 @@ tc_fence_server_sync(struct pipe_context *_pipe,
                      struct pipe_fence_handle *fence)
 {
    struct threaded_context *tc = threaded_context(_pipe);
-   struct pipe_context *pipe = tc->pipe;
+   struct pipe_screen *screen = tc->pipe->screen;
+   union tc_payload *payload = tc_add_small_call(tc, TC_CALL_fence_server_sync);
 
-   tc_sync(tc);
-   pipe->fence_server_sync(pipe, fence);
+   payload->fence = NULL;
+   screen->fence_reference(screen, &payload->fence, fence);
+}
+
+static void
+tc_call_fence_server_signal(struct pipe_context *pipe, union tc_payload *payload)
+{
+   pipe->fence_server_signal(pipe, payload->fence);
+   pipe->screen->fence_reference(pipe->screen, &payload->fence, NULL);
+}
+
+static void
+tc_fence_server_signal(struct pipe_context *_pipe,
+                           struct pipe_fence_handle *fence)
+{
+   struct threaded_context *tc = threaded_context(_pipe);
+   struct pipe_screen *screen = tc->pipe->screen;
+   union tc_payload *payload = tc_add_small_call(tc, TC_CALL_fence_server_signal);
+
+   payload->fence = NULL;
+   screen->fence_reference(screen, &payload->fence, fence);
 }
 
 static struct pipe_video_codec *
-tc_create_video_codec(struct pipe_context *_pipe,
-                      const struct pipe_video_codec *templ)
+tc_create_video_codec(UNUSED struct pipe_context *_pipe,
+                      UNUSED const struct pipe_video_codec *templ)
 {
    unreachable("Threaded context should not be enabled for video APIs");
    return NULL;
 }
 
 static struct pipe_video_buffer *
-tc_create_video_buffer(struct pipe_context *_pipe,
-                       const struct pipe_video_buffer *templ)
+tc_create_video_buffer(UNUSED struct pipe_context *_pipe,
+                       UNUSED const struct pipe_video_buffer *templ)
 {
    unreachable("Threaded context should not be enabled for video APIs");
    return NULL;
 }
 
+struct tc_context_param {
+   enum pipe_context_param param;
+   unsigned value;
+};
+
+static void
+tc_call_set_context_param(struct pipe_context *pipe,
+                          union tc_payload *payload)
+{
+   struct tc_context_param *p = (struct tc_context_param*)payload;
+
+   if (pipe->set_context_param)
+      pipe->set_context_param(pipe, p->param, p->value);
+}
+
+static void
+tc_set_context_param(struct pipe_context *_pipe,
+                           enum pipe_context_param param,
+                           unsigned value)
+{
+   struct threaded_context *tc = threaded_context(_pipe);
+
+   if (tc->pipe->set_context_param) {
+      struct tc_context_param *payload =
+         tc_add_struct_typed_call(tc, TC_CALL_set_context_param,
+                                  tc_context_param);
+
+      payload->param = param;
+      payload->value = value;
+   }
+
+   if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) {
+      /* Pin the gallium thread as requested. */
+      util_pin_thread_to_L3(tc->queue.threads[0], value,
+                            util_cpu_caps.cores_per_L3);
+   }
+}
+
 
 /********************************************************************
  * draw, launch, clear, blit, copy, flush
@@ -1876,7 +1965,7 @@ tc_flush_queries(struct threaded_context *tc)
 {
    struct threaded_query *tq, *tmp;
    LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
-      LIST_DEL(&tq->head_unflushed);
+      list_del(&tq->head_unflushed);
 
       /* Memory release semantics: due to a possible race with
        * tc_get_query_result, we must ensure that the linked list changes
@@ -1922,7 +2011,6 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
 
    if (async && tc->create_fence) {
       if (fence) {
-         struct tc_unflushed_batch_token *token = NULL;
          struct tc_batch *next = &tc->batch_slots[tc->next];
 
          if (!next->token) {
@@ -1934,7 +2022,7 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
             next->token->tc = tc;
          }
 
-         screen->fence_reference(screen, fence, tc->create_fence(pipe, token));
+         screen->fence_reference(screen, fence, tc->create_fence(pipe, next->token));
          if (!*fence)
             goto out_of_memory;
       }
@@ -2100,7 +2188,8 @@ tc_resource_copy_region(struct pipe_context *_pipe,
    p->src_box = *src_box;
 
    if (dst->target == PIPE_BUFFER)
-      util_range_add(&tdst->valid_buffer_range, dstx, dstx + src_box->width);
+      util_range_add(&tdst->b, &tdst->valid_buffer_range,
+                     dstx, dstx + src_box->width);
 }
 
 static void
@@ -2138,7 +2227,7 @@ static void
 tc_call_generate_mipmap(struct pipe_context *pipe, union tc_payload *payload)
 {
    struct tc_generate_mipmap *p = (struct tc_generate_mipmap *)payload;
-   bool MAYBE_UNUSED result = pipe->generate_mipmap(pipe, p->res, p->format,
+   ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format,
                                                     p->base_level,
                                                     p->last_level,
                                                     p->first_layer,
@@ -2147,7 +2236,7 @@ tc_call_generate_mipmap(struct pipe_context *pipe, union tc_payload *payload)
    pipe_resource_reference(&p->res, NULL);
 }
 
-static boolean
+static bool
 tc_generate_mipmap(struct pipe_context *_pipe,
                    struct pipe_resource *res,
                    enum pipe_format format,
@@ -2167,7 +2256,8 @@ tc_generate_mipmap(struct pipe_context *_pipe,
       bind = PIPE_BIND_RENDER_TARGET;
 
    if (!screen->is_format_supported(screen, format, res->target,
-                                    res->nr_samples, bind))
+                                    res->nr_samples, res->nr_storage_samples,
+                                    bind))
       return false;
 
    struct tc_generate_mipmap *p =
@@ -2315,7 +2405,7 @@ tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
    memcpy(p->clear_value, clear_value, clear_value_size);
    p->clear_value_size = clear_value_size;
 
-   util_range_add(&tres->valid_buffer_range, offset, offset + size);
+   util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
 }
 
 struct tc_clear_texture {
@@ -2391,7 +2481,7 @@ struct tc_callback_payload {
 };
 
 static void
-tc_call_callback(struct pipe_context *pipe, union tc_payload *payload)
+tc_call_callback(UNUSED struct pipe_context *pipe, union tc_payload *payload)
 {
    struct tc_callback_payload *p = (struct tc_callback_payload *)payload;
 
@@ -2528,7 +2618,7 @@ threaded_context_create(struct pipe_context *pipe,
     * from the queue before being executed, so keep one tc_batch slot for that
     * execution. Also, keep one unused slot for an unflushed batch.
     */
-   if (!util_queue_init(&tc->queue, "gallium_drv", TC_MAX_BATCHES - 2, 1, 0))
+   if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0))
       goto fail;
 
    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
@@ -2537,10 +2627,12 @@ threaded_context_create(struct pipe_context *pipe,
       util_queue_fence_init(&tc->batch_slots[i].fence);
    }
 
-   LIST_INITHEAD(&tc->unflushed_queries);
+   list_inithead(&tc->unflushed_queries);
 
    slab_create_child(&tc->pool_transfers, parent_transfer_pool);
 
+   tc->base.set_context_param = tc_set_context_param; /* always set this */
+
 #define CTX_INIT(_member) \
    tc->base._member = tc->pipe->_member ? tc_##_member : NULL
 
@@ -2643,6 +2735,7 @@ threaded_context_create(struct pipe_context *pipe,
    CTX_INIT(set_debug_callback);
    CTX_INIT(create_fence_fd);
    CTX_INIT(fence_server_sync);
+   CTX_INIT(fence_server_signal);
    CTX_INIT(get_timestamp);
    CTX_INIT(create_texture_handle);
    CTX_INIT(delete_texture_handle);