iris: Avoid unnecessary resolves on transfer maps
authorKenneth Graunke <kenneth@whitecape.org>
Fri, 26 Apr 2019 17:44:18 +0000 (10:44 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Fri, 23 Aug 2019 01:31:17 +0000 (18:31 -0700)
We were always resolving the buffer as if we were accessing it via
CPU maps, which don't understand any auxiliary surfaces.  But we often
copy to a temporary using BLORP, which understands compression just
fine.  So we can avoid the resolve, and accelerate the copy as well.

Fixes: 9d1334d2a0f ("iris: Use copy_region and staging resources to avoid transfer stalls")
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
src/gallium/drivers/iris/iris_resource.c
src/gallium/drivers/iris/iris_resource.h

index cc19e601a75a34a3103876528822bc2da5395480..a8d0291862ebdc5eb80eba7125f1181ee004654c 100644 (file)
@@ -1707,21 +1707,29 @@ iris_transfer_map(struct pipe_context *ctx,
       usage |= PIPE_TRANSFER_DISCARD_RANGE;
    }
 
-   bool map_would_stall = false;
-
-   if (resource->target != PIPE_BUFFER) {
-      iris_resource_access_raw(ice, &ice->batches[IRIS_BATCH_RENDER], res,
-                               level, box->z, box->depth,
-                               usage & PIPE_TRANSFER_WRITE);
-   }
-
    if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
        can_promote_to_async(res, box, usage)) {
       usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
    }
 
+   bool need_resolve = false;
+   bool need_color_resolve = false;
+
+   if (resource->target != PIPE_BUFFER) {
+      bool need_hiz_resolve = iris_resource_level_has_hiz(res, level);
+
+      need_color_resolve =
+         (res->aux.usage == ISL_AUX_USAGE_CCS_D ||
+          res->aux.usage == ISL_AUX_USAGE_CCS_E) &&
+         iris_has_color_unresolved(res, level, 1, box->z, box->depth);
+
+      need_resolve = need_color_resolve || need_hiz_resolve;
+   }
+
+   bool map_would_stall = false;
+
    if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
-      map_would_stall = resource_is_busy(ice, res);
+      map_would_stall = need_resolve || resource_is_busy(ice, res);
 
       if (map_would_stall && (usage & PIPE_TRANSFER_DONTBLOCK) &&
                              (usage & PIPE_TRANSFER_MAP_DIRECTLY))
@@ -1769,25 +1777,29 @@ iris_transfer_map(struct pipe_context *ctx,
     * temporary and map that, to avoid the resolve.  (It might be better to
     * a tiled temporary and use the tiled_memcpy paths...)
     */
-   if (!(usage & PIPE_TRANSFER_DISCARD_RANGE) &&
-       res->aux.usage != ISL_AUX_USAGE_CCS_E &&
-       res->aux.usage != ISL_AUX_USAGE_CCS_D) {
+   if (!(usage & PIPE_TRANSFER_DISCARD_RANGE) && !need_color_resolve)
       no_gpu = true;
-   }
 
    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
    if (fmtl->txc == ISL_TXC_ASTC)
       no_gpu = true;
 
    if ((map_would_stall || res->aux.usage == ISL_AUX_USAGE_CCS_E) && !no_gpu) {
-      /* If we need a synchronous mapping and the resource is busy,
-       * we copy to/from a linear temporary buffer using the GPU.
+      /* If we need a synchronous mapping and the resource is busy, or needs
+       * resolving, we copy to/from a linear temporary buffer using the GPU.
        */
       map->batch = &ice->batches[IRIS_BATCH_RENDER];
       map->blorp = &ice->blorp;
       iris_map_copy_region(map);
    } else {
-      /* Otherwise we're free to map on the CPU.  Flush if needed. */
+      /* Otherwise we're free to map on the CPU. */
+
+      if (need_resolve) {
+         iris_resource_access_raw(ice, &ice->batches[IRIS_BATCH_RENDER], res,
+                                  level, box->z, box->depth,
+                                  usage & PIPE_TRANSFER_WRITE);
+      }
+
       if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
          for (int i = 0; i < IRIS_BATCH_COUNT; i++) {
             if (iris_batch_references(&ice->batches[i], res->bo))
index 1fdabb4073351de9ee09554c563ef650767def4c..304339eb470f673c0eaeb9d012b8b013ab8f43d1 100644 (file)
@@ -436,6 +436,9 @@ void iris_resource_check_level_layer(const struct iris_resource *res,
 
 bool iris_resource_level_has_hiz(const struct iris_resource *res,
                                  uint32_t level);
+bool iris_has_color_unresolved(const struct iris_resource *res,
+                               unsigned start_level, unsigned num_levels,
+                               unsigned start_layer, unsigned num_layers);
 
 enum isl_aux_usage iris_resource_render_aux_usage(struct iris_context *ice,
                                                   struct iris_resource *res,