iris: Replace buffer backing storage and rebind to update addresses.
authorKenneth Graunke <kenneth@whitecape.org>
Tue, 12 Mar 2019 21:51:22 +0000 (14:51 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Tue, 23 Apr 2019 07:24:08 +0000 (00:24 -0700)
This implements PIPE_CAP_INVALIDATE_BUFFER and invalidate_resource(),
as well as the PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE flag.  When either
of these happen, we swap out the backing storage of the buffer for a
new idle BO, allowing us to write to it immediately without stalling
or queueing a blit.

On my Skylake GT4e at 1920x1080, this improves performance in games:

   -----------------------------------------------
   | DiRT Rally        | +25% (avg) | +17% (max) |
   | Bioshock Infinite | +22% (avg) | +11% (max) |
   | Shadow of Mordor  | +27% (avg) | +83% (max) |
   -----------------------------------------------

src/gallium/drivers/iris/iris_context.h
src/gallium/drivers/iris/iris_resource.c
src/gallium/drivers/iris/iris_screen.c
src/gallium/drivers/iris/iris_state.c

index 1b69b256947c6258deaf57d5be30c3e46d364041..ab70fc58718cbd235987bf17ef19a30ac3214541 100644 (file)
@@ -355,6 +355,9 @@ struct iris_vtable {
    void (*upload_compute_state)(struct iris_context *ice,
                                 struct iris_batch *batch,
                                 const struct pipe_grid_info *grid);
+   void (*rebind_buffer)(struct iris_context *ice,
+                         struct iris_resource *res,
+                         uint64_t old_address);
    void (*load_register_reg32)(struct iris_batch *batch, uint32_t dst,
                                uint32_t src);
    void (*load_register_reg64)(struct iris_batch *batch, uint32_t dst,
index 0011439949e2aac8170a722869e36ded81c38ed6..293f71aa1f934b14e869d734fc1e12ff9291957c 100644 (file)
@@ -38,6 +38,7 @@
 #include "util/u_cpu_detect.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
+#include "util/u_threaded_context.h"
 #include "util/u_transfer.h"
 #include "util/u_transfer_helper.h"
 #include "util/u_upload_mgr.h"
@@ -877,6 +878,37 @@ iris_resource_get_handle(struct pipe_screen *pscreen,
    return false;
 }
 
+static void
+iris_invalidate_resource(struct pipe_context *ctx,
+                         struct pipe_resource *resource)
+{
+   struct iris_screen *screen = (void *) ctx->screen;
+   struct iris_context *ice = (void *) ctx;
+   struct iris_resource *res = (void *) resource;
+
+   if (resource->target != PIPE_BUFFER)
+      return;
+
+   /* We can't reallocate memory we didn't allocate in the first place. */
+   if (res->bo->userptr)
+      return;
+
+   // XXX: We should support this.
+   if (res->bind_history & PIPE_BIND_STREAM_OUTPUT)
+      return;
+
+   struct iris_bo *old_bo = res->bo;
+   struct iris_bo *new_bo =
+      iris_bo_alloc(screen->bufmgr, res->bo->name, resource->width0,
+                    iris_memzone_for_address(old_bo->gtt_offset));
+   if (!new_bo)
+      return;
+
+   res->bo = new_bo;
+   ice->vtbl.rebind_buffer(ice, res, old_bo->gtt_offset);
+   iris_bo_unreference(old_bo);
+}
+
 static void
 iris_flush_staging_region(struct pipe_transfer *xfer,
                           const struct pipe_box *flush_box)
@@ -1280,11 +1312,15 @@ iris_transfer_map(struct pipe_context *ctx,
    struct iris_resource *res = (struct iris_resource *)resource;
    struct isl_surf *surf = &res->surf;
 
-    /* If we can discard the whole resource, we can also discard the
-     * subrange being accessed.
-     */
-    if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
-       usage |= PIPE_TRANSFER_DISCARD_RANGE;
+   if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+      /* Replace the backing storage with a fresh buffer for non-async maps */
+      if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
+                     TC_TRANSFER_MAP_NO_INVALIDATE)))
+         iris_invalidate_resource(ctx, resource);
+
+      /* If we can discard the whole resource, we can discard the range. */
+      usage |= PIPE_TRANSFER_DISCARD_RANGE;
+   }
 
    bool map_would_stall = false;
 
@@ -1536,6 +1572,7 @@ void
 iris_init_resource_functions(struct pipe_context *ctx)
 {
    ctx->flush_resource = iris_flush_resource;
+   ctx->invalidate_resource = iris_invalidate_resource;
    ctx->transfer_map = u_transfer_helper_transfer_map;
    ctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
    ctx->transfer_unmap = u_transfer_helper_transfer_unmap;
index 38cdbc1507dac9daf257d0cf061c0d85a1e6a698..1ede9c4335f2c8ae75adc7b4d2f902bd0075a2db 100644 (file)
@@ -178,6 +178,7 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_DRAW_PARAMETERS:
    case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
    case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
+   case PIPE_CAP_INVALIDATE_BUFFER:
       return true;
    case PIPE_CAP_TGSI_FS_FBFETCH:
    case PIPE_CAP_POST_DEPTH_COVERAGE:
index 073da79a28a769c875eecad1b466d38e0383c0cd..d6a8ba4fb47385cca7c96a6850292ba636762eb3 100644 (file)
@@ -5460,6 +5460,130 @@ iris_destroy_state(struct iris_context *ice)
 
 /* ------------------------------------------------------------------- */
 
+static void
+iris_rebind_buffer(struct iris_context *ice,
+                   struct iris_resource *res,
+                   uint64_t old_address)
+{
+   struct pipe_context *ctx = &ice->ctx;
+   struct iris_screen *screen = (void *) ctx->screen;
+   struct iris_genx_state *genx = ice->state.genx;
+
+   assert(res->base.target == PIPE_BUFFER);
+
+   /* Buffers can't be framebuffer attachments, nor display related,
+    * and we don't have upstream Clover support.
+    */
+   assert(!(res->bind_history & (PIPE_BIND_DEPTH_STENCIL |
+                                 PIPE_BIND_RENDER_TARGET |
+                                 PIPE_BIND_BLENDABLE |
+                                 PIPE_BIND_DISPLAY_TARGET |
+                                 PIPE_BIND_CURSOR |
+                                 PIPE_BIND_COMPUTE_RESOURCE |
+                                 PIPE_BIND_GLOBAL)));
+
+   if (res->bind_history & PIPE_BIND_VERTEX_BUFFER) {
+      uint64_t bound_vbs = ice->state.bound_vertex_buffers;
+      while (bound_vbs) {
+         const int i = u_bit_scan64(&bound_vbs);
+         struct iris_vertex_buffer_state *state = &genx->vertex_buffers[i];
+
+         /* Update the CPU struct */
+         STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_start) == 32);
+         STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) == 64);
+         uint64_t *addr = (uint64_t *) &state->state[1];
+
+         if (*addr == old_address) {
+            *addr = res->bo->gtt_offset;
+            ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS;
+         }
+      }
+   }
+
+   /* No need to handle these:
+    * - PIPE_BIND_INDEX_BUFFER (emitted for every indexed draw)
+    * - PIPE_BIND_COMMAND_ARGS_BUFFER (emitted for every indirect draw)
+    * - PIPE_BIND_QUERY_BUFFER (no persistent state references)
+    */
+
+   if (res->bind_history & PIPE_BIND_STREAM_OUTPUT) {
+      /* XXX: be careful about resetting vs appending... */
+      assert(false);
+   }
+
+   for (int s = MESA_SHADER_VERTEX; s < MESA_SHADER_STAGES; s++) {
+      struct iris_shader_state *shs = &ice->state.shaders[s];
+      enum pipe_shader_type p_stage = stage_to_pipe(s);
+
+      if (res->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
+         /* Skip constant buffer 0, it's for regular uniforms, not UBOs */
+         uint32_t bound_cbufs = shs->bound_cbufs & ~1u;
+         while (bound_cbufs) {
+            const int i = u_bit_scan(&bound_cbufs);
+            struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
+            struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
+
+            if (res->bo == iris_resource_bo(cbuf->buffer)) {
+               upload_ubo_ssbo_surf_state(ice, cbuf, surf_state, false);
+               ice->state.dirty |= IRIS_DIRTY_CONSTANTS_VS << s;
+            }
+         }
+      }
+
+      if (res->bind_history & PIPE_BIND_SHADER_BUFFER) {
+         uint32_t bound_ssbos = shs->bound_ssbos;
+         while (bound_ssbos) {
+            const int i = u_bit_scan(&bound_ssbos);
+            struct pipe_shader_buffer *ssbo = &shs->ssbo[i];
+
+            if (res->bo == iris_resource_bo(ssbo->buffer)) {
+               struct pipe_shader_buffer buf = {
+                  .buffer = &res->base,
+                  .buffer_offset = ssbo->buffer_offset,
+                  .buffer_size = ssbo->buffer_size,
+               };
+               iris_set_shader_buffers(ctx, p_stage, i, 1, &buf,
+                                       (shs->writable_ssbos >> i) & 1);
+            }
+         }
+      }
+
+      if (res->bind_history & PIPE_BIND_SAMPLER_VIEW) {
+         uint32_t bound_sampler_views = shs->bound_sampler_views;
+         while (bound_sampler_views) {
+            const int i = u_bit_scan(&bound_sampler_views);
+            struct iris_sampler_view *isv = shs->textures[i];
+
+            if (res->bo == iris_resource_bo(isv->base.texture)) {
+               void *map = alloc_surface_states(ice->state.surface_uploader,
+                                                &isv->surface_state,
+                                                isv->res->aux.sampler_usages);
+               assert(map);
+               fill_buffer_surface_state(&screen->isl_dev, isv->res->bo, map,
+                                         isv->view.format, isv->view.swizzle,
+                                         isv->base.u.buf.offset,
+                                         isv->base.u.buf.size);
+               ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << s;
+            }
+         }
+      }
+
+      if (res->bind_history & PIPE_BIND_SHADER_IMAGE) {
+         uint32_t bound_image_views = shs->bound_image_views;
+         while (bound_image_views) {
+            const int i = u_bit_scan(&bound_image_views);
+            struct iris_image_view *iv = &shs->image[i];
+
+            if (res->bo == iris_resource_bo(iv->base.resource)) {
+               iris_set_shader_images(ctx, p_stage, i, 1, &iv->base);
+            }
+         }
+      }
+   }
+}
+
+/* ------------------------------------------------------------------- */
+
 static void
 iris_load_register_reg32(struct iris_batch *batch, uint32_t dst,
                          uint32_t src)
@@ -6075,6 +6199,7 @@ genX(init_state)(struct iris_context *ice)
    ice->vtbl.update_surface_base_address = iris_update_surface_base_address;
    ice->vtbl.upload_compute_state = iris_upload_compute_state;
    ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control;
+   ice->vtbl.rebind_buffer = iris_rebind_buffer;
    ice->vtbl.load_register_reg32 = iris_load_register_reg32;
    ice->vtbl.load_register_reg64 = iris_load_register_reg64;
    ice->vtbl.load_register_imm32 = iris_load_register_imm32;