X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Firis%2Firis_resource.c;h=3d4bfd6fd9f78c58d582f7171fc5494fc766525f;hb=cdcf38b98a7bfdfda5836d6a4cebb6cca074e5ff;hp=c16fc8eb0158519863ba85d56e3fa6c007c00085;hpb=7339660e80331ed773fc6dbb8295384d4ef6954e;p=mesa.git

diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c
index c16fc8eb015..3d4bfd6fd9f 100644
--- a/src/gallium/drivers/iris/iris_resource.c
+++ b/src/gallium/drivers/iris/iris_resource.c
@@ -38,6 +38,7 @@
 #include "util/u_cpu_detect.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
+#include "util/u_threaded_context.h"
 #include "util/u_transfer.h"
 #include "util/u_transfer_helper.h"
 #include "util/u_upload_mgr.h"
@@ -46,7 +47,7 @@
 #include "iris_context.h"
 #include "iris_resource.h"
 #include "iris_screen.h"
-#include "intel/common/gen_debug.h"
+#include "intel/dev/gen_debug.h"
 #include "isl/isl.h"
 #include "drm-uapi/drm_fourcc.h"
 #include "drm-uapi/i915_drm.h"
@@ -203,7 +204,11 @@ iris_resource_get_separate_stencil(struct pipe_resource *p_res)
    /* For packed depth-stencil, we treat depth as the primary resource
     * and store S8 as the "second plane" resource.
     */
-   return p_res->next;
+   if (p_res->next && p_res->next->format == PIPE_FORMAT_S8_UINT)
+      return p_res->next;
+
+   return NULL;
+
 }
 
 static void
@@ -256,6 +261,9 @@ iris_resource_destroy(struct pipe_screen *screen,
 {
    struct iris_resource *res = (struct iris_resource *)resource;
 
+   if (resource->target == PIPE_BUFFER)
+      util_range_destroy(&res->valid_buffer_range);
+
    iris_resource_disable_aux(res);
 
    iris_bo_unreference(res->bo);
@@ -277,6 +285,9 @@ iris_alloc_resource(struct pipe_screen *pscreen,
    res->aux.possible_usages = 1 << ISL_AUX_USAGE_NONE;
    res->aux.sampler_usages = 1 << ISL_AUX_USAGE_NONE;
 
+   if (templ->target == PIPE_BUFFER)
+      util_range_init(&res->valid_buffer_range);
+
    return res;
 }
 
@@ -344,6 +355,7 @@ iris_resource_alloc_aux(struct iris_screen *screen, struct iris_resource *res)
    switch (res->aux.usage) {
    case ISL_AUX_USAGE_NONE:
       res->aux.surf.size_B = 0;
+      ok = true;
       break;
    case ISL_AUX_USAGE_HIZ:
       initial_state = ISL_AUX_STATE_AUX_INVALID;
@@ -385,13 +397,14 @@ iris_resource_alloc_aux(struct iris_screen *screen, struct iris_resource *res)
       break;
    }
 
+   /* We should have a valid aux_surf. */
+   if (!ok)
+      return false;
+
    /* No work is needed for a zero-sized auxiliary buffer. */
    if (res->aux.surf.size_B == 0)
       return true;
 
-   /* Assert that ISL gave us a valid aux surf */
-   assert(ok);
-
    /* Create the aux_state for the auxiliary buffer. */
    res->aux.state = create_aux_state_map(res, initial_state);
    if (!res->aux.state)
@@ -466,10 +479,6 @@ supports_mcs(const struct isl_surf *surf)
    if (surf->samples <= 1)
       return false;
 
-   /* See isl_surf_get_mcs_surf for details. */
-   if (surf->samples == 16 && surf->logical_level0_px.width > 8192)
-      return false;
-
    /* Depth and stencil buffers use the IMS (interleaved) layout. */
    if (isl_surf_usage_is_depth_or_stencil(surf->usage))
       return false;
@@ -481,21 +490,10 @@ static bool
 supports_ccs(const struct gen_device_info *devinfo,
              const struct isl_surf *surf)
 {
-   /* Gen9+ only supports CCS for Y-tiled buffers. */
-   if (surf->tiling != ISL_TILING_Y0)
-      return false;
-
    /* CCS only supports singlesampled resources. */
    if (surf->samples > 1)
       return false;
 
-   /* The PRM doesn't say this explicitly, but fast-clears don't appear to
-    * work for 3D textures until Gen9 where the layout of 3D textures changes
-    * to match 2D array textures.
-    */
-   if (devinfo->gen < 9 && surf->dim != ISL_SURF_DIM_2D)
-      return false;
-
    /* Note: still need to check the format! */
 
    return true;
@@ -677,7 +675,7 @@ iris_resource_create_with_modifiers(struct pipe_screen *pscreen,
       goto fail;
 
    if (!iris_resource_alloc_aux(screen, res))
-      goto fail;
+      iris_resource_disable_aux(res);
 
    return &res->base;
 
@@ -734,6 +732,8 @@ iris_resource_from_user_memory(struct pipe_screen *pscreen,
       return NULL;
    }
 
+   util_range_add(&res->valid_buffer_range, 0, templ->width0);
+
    return &res->base;
 }
 
@@ -750,12 +750,6 @@ iris_resource_from_handle(struct pipe_screen *pscreen,
    if (!res)
       return NULL;
 
-   if (whandle->offset != 0) {
-      dbg_printf("Attempt to import unsupported winsys offset %u\n",
-                 whandle->offset);
-      goto fail;
-   }
-
    switch (whandle->type) {
    case WINSYS_HANDLE_TYPE_FD:
       res->bo = iris_bo_import_dmabuf(bufmgr, whandle->handle);
@@ -770,6 +764,8 @@ iris_resource_from_handle(struct pipe_screen *pscreen,
    if (!res->bo)
       return NULL;
 
+   res->offset = whandle->offset;
+
    uint64_t modifier = whandle->modifier;
    if (modifier == DRM_FORMAT_MOD_INVALID) {
       modifier = tiling_to_modifier(res->bo->tiling_mode);
@@ -815,6 +811,21 @@ fail:
    return NULL;
 }
 
+static void
+iris_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource)
+{
+   struct iris_context *ice = (struct iris_context *)ctx;
+   struct iris_batch *render_batch = &ice->batches[IRIS_BATCH_RENDER];
+   struct iris_resource *res = (void *) resource;
+   const struct isl_drm_modifier_info *mod = res->mod_info;
+
+   iris_resource_prepare_access(ice, render_batch, res,
+                                0, INTEL_REMAINING_LEVELS,
+                                0, INTEL_REMAINING_LAYERS,
+                                mod ? mod->aux_usage : ISL_AUX_USAGE_NONE,
+                                mod ? mod->supports_clear_color : false);
+}
+
 static boolean
 iris_resource_get_handle(struct pipe_screen *pscreen,
                          struct pipe_context *ctx,
@@ -824,6 +835,14 @@ iris_resource_get_handle(struct pipe_screen *pscreen,
 {
    struct iris_resource *res = (struct iris_resource *)resource;
 
+   /* Disable aux usage if explicit flush not set and this is the
+    * first time we are dealing with this resource.
+    */
+   if ((!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && res->aux.usage != 0)) {
+      if (p_atomic_read(&resource->reference.count) == 1)
+         iris_resource_disable_aux(res);
+   }
+
    /* If this is a buffer, stride should be 0 - no need to special case */
    whandle->stride = res->surf.row_pitch_B;
    whandle->modifier =
@@ -854,25 +873,99 @@ iris_resource_get_handle(struct pipe_screen *pscreen,
    return false;
 }
 
+static bool
+resource_is_busy(struct iris_context *ice,
+                 struct iris_resource *res)
+{
+   bool busy = iris_bo_busy(res->bo);
+
+   for (int i = 0; i < IRIS_BATCH_COUNT; i++)
+      busy |= iris_batch_references(&ice->batches[i], res->bo);
+
+   return busy;
+}
+
 static void
-iris_unmap_copy_region(struct iris_transfer *map)
+iris_invalidate_resource(struct pipe_context *ctx,
+                         struct pipe_resource *resource)
 {
-   struct pipe_transfer *xfer = &map->base;
-   struct pipe_box *dst_box = &xfer->box;
-   struct pipe_box src_box = (struct pipe_box) {
-      .x = xfer->resource->target == PIPE_BUFFER ?
-           xfer->box.x % IRIS_MAP_BUFFER_ALIGNMENT : 0,
-      .width = dst_box->width,
-      .height = dst_box->height,
-      .depth = dst_box->depth,
-   };
+   struct iris_screen *screen = (void *) ctx->screen;
+   struct iris_context *ice = (void *) ctx;
+   struct iris_resource *res = (void *) resource;
 
-   if (xfer->usage & PIPE_TRANSFER_WRITE) {
-      iris_copy_region(map->blorp, map->batch, xfer->resource, xfer->level,
-                       dst_box->x, dst_box->y, dst_box->z, map->staging, 0,
-                       &src_box);
+   if (resource->target != PIPE_BUFFER)
+      return;
+
+   if (!resource_is_busy(ice, res)) {
+      /* The resource is idle, so just mark that it contains no data and
+       * keep using the same underlying buffer object.
+       */
+      util_range_set_empty(&res->valid_buffer_range);
+      return;
    }
 
+   /* Otherwise, try and replace the backing storage with a new BO. */
+
+   /* We can't reallocate memory we didn't allocate in the first place. */
+   if (res->bo->userptr)
+      return;
+
+   // XXX: We should support this.
+   if (res->bind_history & PIPE_BIND_STREAM_OUTPUT)
+      return;
+
+   struct iris_bo *old_bo = res->bo;
+   struct iris_bo *new_bo =
+      iris_bo_alloc(screen->bufmgr, res->bo->name, resource->width0,
+                    iris_memzone_for_address(old_bo->gtt_offset));
+   if (!new_bo)
+      return;
+
+   /* Swap out the backing storage */
+   res->bo = new_bo;
+
+   /* Rebind the buffer, replacing any state referring to the old BO's
+    * address, and marking state dirty so it's reemitted.
+    */
+   ice->vtbl.rebind_buffer(ice, res, old_bo->gtt_offset);
+
+   util_range_set_empty(&res->valid_buffer_range);
+
+   iris_bo_unreference(old_bo);
+}
+
+static void
+iris_flush_staging_region(struct pipe_transfer *xfer,
+                          const struct pipe_box *flush_box)
+{
+   if (!(xfer->usage & PIPE_TRANSFER_WRITE))
+      return;
+
+   struct iris_transfer *map = (void *) xfer;
+
+   struct pipe_box src_box = *flush_box;
+
+   /* Account for extra alignment padding in staging buffer */
+   if (xfer->resource->target == PIPE_BUFFER)
+      src_box.x += xfer->box.x % IRIS_MAP_BUFFER_ALIGNMENT;
+
+   struct pipe_box dst_box = (struct pipe_box) {
+      .x = xfer->box.x + flush_box->x,
+      .y = xfer->box.y + flush_box->y,
+      .z = xfer->box.z + flush_box->z,
+      .width = flush_box->width,
+      .height = flush_box->height,
+      .depth = flush_box->depth,
+   };
+
+   iris_copy_region(map->blorp, map->batch, xfer->resource, xfer->level,
+                    dst_box.x, dst_box.y, dst_box.z, map->staging, 0,
+                    &src_box);
+}
+
+static void
+iris_unmap_copy_region(struct iris_transfer *map)
+{
    iris_resource_destroy(map->staging->screen, map->staging);
 
    map->ptr = NULL;
@@ -945,13 +1038,14 @@ iris_map_copy_region(struct iris_transfer *map)
    if (iris_batch_references(map->batch, staging_bo))
       iris_batch_flush(map->batch);
 
-   map->ptr = iris_bo_map(map->dbg, staging_bo, xfer->usage) + extra;
+   map->ptr =
+      iris_bo_map(map->dbg, staging_bo, xfer->usage & MAP_FLAGS) + extra;
 
    map->unmap = iris_unmap_copy_region;
 }
 
 static void
-get_image_offset_el(struct isl_surf *surf, unsigned level, unsigned z,
+get_image_offset_el(const struct isl_surf *surf, unsigned level, unsigned z,
                     unsigned *out_x0_el, unsigned *out_y0_el)
 {
    if (surf->dim == ISL_SURF_DIM_3D) {
@@ -1029,7 +1123,7 @@ iris_unmap_s8(struct iris_transfer *map)
    if (xfer->usage & PIPE_TRANSFER_WRITE) {
       uint8_t *untiled_s8_map = map->ptr;
       uint8_t *tiled_s8_map =
-         iris_bo_map(map->dbg, res->bo, xfer->usage | MAP_RAW);
+         iris_bo_map(map->dbg, res->bo, (xfer->usage | MAP_RAW) & MAP_FLAGS);
 
       for (int s = 0; s < box->depth; s++) {
          unsigned x0_el, y0_el;
@@ -1079,7 +1173,7 @@ iris_map_s8(struct iris_transfer *map)
    if (!(xfer->usage & PIPE_TRANSFER_DISCARD_RANGE)) {
       uint8_t *untiled_s8_map = map->ptr;
       uint8_t *tiled_s8_map =
-         iris_bo_map(map->dbg, res->bo, xfer->usage | MAP_RAW);
+         iris_bo_map(map->dbg, res->bo, (xfer->usage | MAP_RAW) & MAP_FLAGS);
 
       for (int s = 0; s < box->depth; s++) {
          unsigned x0_el, y0_el;
@@ -1105,7 +1199,7 @@ iris_map_s8(struct iris_transfer *map)
  * xs are in units of bytes and ys are in units of strides.
  */
 static inline void
-tile_extents(struct isl_surf *surf,
+tile_extents(const struct isl_surf *surf,
              const struct pipe_box *box,
              unsigned level, int z,
              unsigned *x1_B, unsigned *x2_B,
@@ -1137,7 +1231,8 @@ iris_unmap_tiled_memcpy(struct iris_transfer *map)
    const bool has_swizzling = false;
 
    if (xfer->usage & PIPE_TRANSFER_WRITE) {
-      char *dst = iris_bo_map(map->dbg, res->bo, xfer->usage | MAP_RAW);
+      char *dst =
+         iris_bo_map(map->dbg, res->bo, (xfer->usage | MAP_RAW) & MAP_FLAGS);
 
       for (int s = 0; s < box->depth; s++) {
          unsigned x1, x2, y1, y2;
@@ -1181,7 +1276,8 @@ iris_map_tiled_memcpy(struct iris_transfer *map)
 
    // XXX: PIPE_TRANSFER_READ?
    if (!(xfer->usage & PIPE_TRANSFER_DISCARD_RANGE)) {
-      char *src = iris_bo_map(map->dbg, res->bo, xfer->usage | MAP_RAW);
+      char *src =
+         iris_bo_map(map->dbg, res->bo, (xfer->usage | MAP_RAW) & MAP_FLAGS);
 
       for (int s = 0; s < box->depth; s++) {
          unsigned x1, x2, y1, y2;
@@ -1206,7 +1302,7 @@ iris_map_direct(struct iris_transfer *map)
    struct pipe_box *box = &xfer->box;
    struct iris_resource *res = (struct iris_resource *) xfer->resource;
 
-   void *ptr = iris_bo_map(map->dbg, res->bo, xfer->usage);
+   void *ptr = iris_bo_map(map->dbg, res->bo, xfer->usage & MAP_FLAGS);
 
    if (res->base.target == PIPE_BUFFER) {
       xfer->stride = 0;
@@ -1229,6 +1325,21 @@ iris_map_direct(struct iris_transfer *map)
    }
 }
 
+static bool
+can_promote_to_async(const struct iris_resource *res,
+                     const struct pipe_box *box,
+                     enum pipe_transfer_usage usage)
+{
+   /* If we're writing to a section of the buffer that hasn't even been
+    * initialized with useful data, then we can safely promote this write
+    * to be unsynchronized.  This helps the common pattern of appending data.
+    */
+   return res->base.target == PIPE_BUFFER && (usage & PIPE_TRANSFER_WRITE) &&
+          !(usage & TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED) &&
+          !util_ranges_intersect(&res->valid_buffer_range, box->x,
+                                 box->x + box->width);
+}
+
 static void *
 iris_transfer_map(struct pipe_context *ctx,
                   struct pipe_resource *resource,
@@ -1241,11 +1352,15 @@ iris_transfer_map(struct pipe_context *ctx,
    struct iris_resource *res = (struct iris_resource *)resource;
    struct isl_surf *surf = &res->surf;
 
-    /* If we can discard the whole resource, we can also discard the
-     * subrange being accessed.
-     */
-    if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
-       usage |= PIPE_TRANSFER_DISCARD_RANGE;
+   if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+      /* Replace the backing storage with a fresh buffer for non-async maps */
+      if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
+                     TC_TRANSFER_MAP_NO_INVALIDATE)))
+         iris_invalidate_resource(ctx, resource);
+
+      /* If we can discard the whole resource, we can discard the range. */
+      usage |= PIPE_TRANSFER_DISCARD_RANGE;
+   }
 
    bool map_would_stall = false;
 
@@ -1255,11 +1370,13 @@ iris_transfer_map(struct pipe_context *ctx,
                                usage & PIPE_TRANSFER_WRITE);
    }
 
-   if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
-      map_would_stall = iris_bo_busy(res->bo);
+   if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
+       can_promote_to_async(res, box, usage)) {
+      usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+   }
 
-      for (int i = 0; i < IRIS_BATCH_COUNT; i++)
-         map_would_stall |= iris_batch_references(&ice->batches[i], res->bo);
+   if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+      map_would_stall = resource_is_busy(ice, res);
 
       if (map_would_stall && (usage & PIPE_TRANSFER_DONTBLOCK) &&
                              (usage & PIPE_TRANSFER_MAP_DIRECTLY))
@@ -1285,12 +1402,8 @@ iris_transfer_map(struct pipe_context *ctx,
    xfer->box = *box;
    *ptransfer = xfer;
 
-   xfer->usage &= (PIPE_TRANSFER_READ |
-                   PIPE_TRANSFER_WRITE |
-                   PIPE_TRANSFER_UNSYNCHRONIZED |
-                   PIPE_TRANSFER_PERSISTENT |
-                   PIPE_TRANSFER_COHERENT |
-                   PIPE_TRANSFER_DISCARD_RANGE);
+   if (usage & PIPE_TRANSFER_WRITE)
+      util_range_add(&res->valid_buffer_range, box->x, box->x + box->width);
 
    /* Avoid using GPU copies for persistent/coherent buffers, as the idea
     * there is to access them simultaneously on the CPU & GPU.  This also
@@ -1317,7 +1430,7 @@ iris_transfer_map(struct pipe_context *ctx,
       no_gpu = true;
    }
 
-   if (map_would_stall && !no_gpu) {
+   if ((map_would_stall || res->aux.usage == ISL_AUX_USAGE_CCS_E) && !no_gpu) {
       /* If we need a synchronous mapping and the resource is busy,
        * we copy to/from a linear temporary buffer using the GPU.
        */
@@ -1353,6 +1466,10 @@ iris_transfer_flush_region(struct pipe_context *ctx,
 {
    struct iris_context *ice = (struct iris_context *)ctx;
    struct iris_resource *res = (struct iris_resource *) xfer->resource;
+   struct iris_transfer *map = (void *) xfer;
+
+   if (map->staging)
+      iris_flush_staging_region(xfer, box);
 
    for (int i = 0; i < IRIS_BATCH_COUNT; i++) {
       if (ice->batches[i].contains_draw ||
@@ -1361,6 +1478,11 @@ iris_transfer_flush_region(struct pipe_context *ctx,
          iris_flush_and_dirty_for_history(ice, &ice->batches[i], res);
       }
    }
+
+   /* Make sure we flag constants dirty even if there's no need to emit
+    * any PIPE_CONTROLs to a batch.
+    */
+   iris_dirty_for_history(ice, res);
 }
 
 static void
@@ -1368,59 +1490,34 @@ iris_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *xfer)
 {
    struct iris_context *ice = (struct iris_context *)ctx;
    struct iris_transfer *map = (void *) xfer;
-   struct iris_resource *res = (struct iris_resource *) xfer->resource;
+
+   if (!(xfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+      struct pipe_box flush_box = {
+         .x = 0, .y = 0, .z = 0,
+         .width  = xfer->box.width,
+         .height = xfer->box.height,
+         .depth  = xfer->box.depth,
+      };
+      iris_transfer_flush_region(ctx, xfer, &flush_box);
+   }
 
    if (map->unmap)
       map->unmap(map);
 
-   for (int i = 0; i < IRIS_BATCH_COUNT; i++) {
-      if (ice->batches[i].contains_draw ||
-          ice->batches[i].cache.render->entries) {
-         iris_batch_maybe_flush(&ice->batches[i], 24);
-         iris_flush_and_dirty_for_history(ice, &ice->batches[i], res);
-      }
-   }
-
    pipe_resource_reference(&xfer->resource, NULL);
    slab_free(&ice->transfer_pool, map);
 }
 
-static void
-iris_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource)
-{
-   struct iris_context *ice = (struct iris_context *)ctx;
-   struct iris_batch *render_batch = &ice->batches[IRIS_BATCH_RENDER];
-   struct iris_resource *res = (void *) resource;
-   const struct isl_drm_modifier_info *mod = res->mod_info;
-
-   iris_resource_prepare_access(ice, render_batch, res,
-                                0, INTEL_REMAINING_LEVELS,
-                                0, INTEL_REMAINING_LAYERS,
-                                mod ? mod->aux_usage : ISL_AUX_USAGE_NONE,
-                                mod ? mod->supports_clear_color : false);
-}
-
+/**
+ * Mark state dirty that needs to be re-emitted when a resource is written.
+ */
 void
-iris_flush_and_dirty_for_history(struct iris_context *ice,
-                                 struct iris_batch *batch,
-                                 struct iris_resource *res)
+iris_dirty_for_history(struct iris_context *ice,
+                       struct iris_resource *res)
 {
-   if (res->base.target != PIPE_BUFFER)
-      return;
-
-   unsigned flush = PIPE_CONTROL_CS_STALL;
-
-   /* We've likely used the rendering engine (i.e. BLORP) to write to this
-    * surface.  Flush the render cache so the data actually lands.
-    */
-   if (batch->name != IRIS_BATCH_COMPUTE)
-      flush |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
-
    uint64_t dirty = 0ull;
 
    if (res->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
-      flush |= PIPE_CONTROL_CONST_CACHE_INVALIDATE |
-               PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
       dirty |= IRIS_DIRTY_CONSTANTS_VS |
                IRIS_DIRTY_CONSTANTS_TCS |
                IRIS_DIRTY_CONSTANTS_TES |
@@ -1430,6 +1527,23 @@ iris_flush_and_dirty_for_history(struct iris_context *ice,
                IRIS_ALL_DIRTY_BINDINGS;
    }
 
+   ice->state.dirty |= dirty;
+}
+
+/**
+ * Produce a set of PIPE_CONTROL bits which ensure data written to a
+ * resource becomes visible, and any stale read cache data is invalidated.
+ */
+uint32_t
+iris_flush_bits_for_history(struct iris_resource *res)
+{
+   uint32_t flush = PIPE_CONTROL_CS_STALL;
+
+   if (res->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
+      flush |= PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+               PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+   }
+
    if (res->bind_history & PIPE_BIND_SAMPLER_VIEW)
       flush |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
 
@@ -1439,9 +1553,26 @@ iris_flush_and_dirty_for_history(struct iris_context *ice,
    if (res->bind_history & (PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE))
       flush |= PIPE_CONTROL_DATA_CACHE_FLUSH;
 
-   iris_emit_pipe_control_flush(batch, flush);
+   return flush;
+}
 
-   ice->state.dirty |= dirty;
+void
+iris_flush_and_dirty_for_history(struct iris_context *ice,
+                                 struct iris_batch *batch,
+                                 struct iris_resource *res)
+{
+   if (res->base.target != PIPE_BUFFER)
+      return;
+
+   uint32_t flush = iris_flush_bits_for_history(res);
+
+   /* We've likely used the rendering engine (i.e. BLORP) to write to this
+    * surface.  Flush the render cache so the data actually lands.
+    */
+   if (batch->name != IRIS_BATCH_COMPUTE)
+      flush |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
+
+   iris_emit_pipe_control_flush(batch, flush);
 }
 
 bool
@@ -1508,6 +1639,7 @@ void
 iris_init_resource_functions(struct pipe_context *ctx)
 {
    ctx->flush_resource = iris_flush_resource;
+   ctx->invalidate_resource = iris_invalidate_resource;
    ctx->transfer_map = u_transfer_helper_transfer_map;
    ctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
    ctx->transfer_unmap = u_transfer_helper_transfer_unmap;