anv: Add support for fast clears on gen9
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 18 Nov 2016 06:55:30 +0000 (22:55 -0800)
committerJason Ekstrand <jason.ekstrand@intel.com>
Tue, 22 Nov 2016 22:24:29 +0000 (14:24 -0800)
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
src/intel/vulkan/anv_blorp.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/genX_cmd_buffer.c

index 4b639e4a139f1edc2c3db07a400914df31c39d33..159e4a01d010aec417beba90a25e877e9ff4ad9c 100644 (file)
@@ -1193,16 +1193,35 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
       struct blorp_surf surf;
       get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
                                    att_state->aux_usage, &surf);
+      surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
 
       const VkRect2D render_area = cmd_buffer->state.render_area;
 
-      blorp_clear(&batch, &surf, iview->isl.format, iview->isl.swizzle,
-                  iview->isl.base_level,
-                  iview->isl.base_array_layer, fb->layers,
-                  render_area.offset.x, render_area.offset.y,
-                  render_area.offset.x + render_area.extent.width,
-                  render_area.offset.y + render_area.extent.height,
-                  vk_to_isl_color(att_state->clear_value.color), NULL);
+      if (att_state->fast_clear) {
+         blorp_fast_clear(&batch, &surf, iview->isl.format,
+                          iview->isl.base_level,
+                          iview->isl.base_array_layer, fb->layers,
+                          render_area.offset.x, render_area.offset.y,
+                          render_area.offset.x + render_area.extent.width,
+                          render_area.offset.y + render_area.extent.height);
+
+         /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
+          *
+          *    "After Render target fast clear, pipe-control with color cache
+          *    write-flush must be issued before sending any DRAW commands on
+          *    that render target."
+          */
+         cmd_buffer->state.pending_pipe_bits |=
+            ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
+      } else {
+         blorp_clear(&batch, &surf, iview->isl.format, iview->isl.swizzle,
+                     iview->isl.base_level,
+                     iview->isl.base_array_layer, fb->layers,
+                     render_area.offset.x, render_area.offset.y,
+                     render_area.offset.x + render_area.extent.width,
+                     render_area.offset.y + render_area.extent.height,
+                     surf.clear_color, NULL);
+      }
 
       att_state->pending_clear_aspects = 0;
    }
@@ -1313,10 +1332,12 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
    struct anv_attachment_state *att_state =
       &cmd_buffer->state.attachments[att];
 
-   assert(att_state->aux_usage != ISL_AUX_USAGE_CCS_D);
-   if (att_state->aux_usage != ISL_AUX_USAGE_CCS_E)
+   if (att_state->aux_usage == ISL_AUX_USAGE_NONE)
       return; /* Nothing to resolve */
 
+   assert(att_state->aux_usage == ISL_AUX_USAGE_CCS_E ||
+          att_state->aux_usage == ISL_AUX_USAGE_CCS_D);
+
    struct anv_render_pass *pass = cmd_buffer->state.pass;
    struct anv_subpass *subpass = cmd_buffer->state.subpass;
    unsigned subpass_idx = subpass - pass->subpasses;
@@ -1327,14 +1348,17 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
     * of a particular attachment.  That way we only resolve once but it's
     * still hot in the cache.
     */
+   bool found_draw = false;
+   enum anv_subpass_usage usage = 0;
    for (uint32_t s = subpass_idx + 1; s < pass->subpass_count; s++) {
-      enum anv_subpass_usage usage = pass->attachments[att].subpass_usage[s];
+      usage |= pass->attachments[att].subpass_usage[s];
 
       if (usage & (ANV_SUBPASS_USAGE_DRAW | ANV_SUBPASS_USAGE_RESOLVE_DST)) {
          /* We found another subpass that draws to this attachment.  We'll
           * wait to resolve until then.
           */
-         return;
+         found_draw = true;
+         break;
       }
    }
 
@@ -1342,12 +1366,60 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
    const struct anv_image *image = iview->image;
    assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
 
-   if (image->aux_usage == ISL_AUX_USAGE_CCS_E)
+   enum blorp_fast_clear_op resolve_op = BLORP_FAST_CLEAR_OP_NONE;
+   if (!found_draw) {
+      /* This is the last subpass that writes to this attachment so we need to
+       * resolve here.  Ideally, we would like to only resolve if the storeOp
+       * is set to VK_ATTACHMENT_STORE_OP_STORE.  However, we need to ensure
+       * that the CCS bits are set to "resolved" because there may be copy or
+       * blit operations (which may ignore CCS) between now and the next time
+       * we render and we need to ensure that anything they write will be
+       * respected in the next render.  Unfortunately, the hardware does not
+       * provide us with any sort of "invalidate" pass that sets the CCS to
+       * "resolved" without writing to the render target.
+       */
+      if (iview->image->aux_usage != ISL_AUX_USAGE_CCS_E) {
+         /* The image destination surface doesn't support compression outside
+          * the render pass.  We need a full resolve.
+          */
+         resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
+      } else if (att_state->fast_clear) {
+         /* We don't know what to do with clear colors outside the render
+          * pass.  We need a partial resolve.
+          */
+         resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
+      } else {
+         /* The image "natively" supports all the compression we care about
+          * and we don't need to resolve at all.  If this is the case, we also
+          * don't need to resolve for any of the input attachment cases below.
+          */
+      }
+   } else if (usage & ANV_SUBPASS_USAGE_INPUT) {
+      /* Input attachments are clear-color aware so, at least on Sky Lake, we
+       * can frequently sample from them with no resolves at all.
+       */
+      if (att_state->aux_usage != att_state->input_aux_usage) {
+         assert(att_state->input_aux_usage == ISL_AUX_USAGE_NONE);
+         resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
+      } else if (!att_state->clear_color_is_zero_one) {
+         /* Sky Lake PRM, Vol. 2d, RENDER_SURFACE_STATE::Red Clear Color:
+          *
+          *    "If Number of Multisamples is MULTISAMPLECOUNT_1 AND if this RT
+          *    is fast cleared with non-0/1 clear value, this RT must be
+          *    partially resolved (refer to Partial Resolve operation) before
+          *    binding this surface to Sampler."
+          */
+         resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
+      }
+   }
+
+   if (resolve_op == BLORP_FAST_CLEAR_OP_NONE)
       return;
 
    struct blorp_surf surf;
    get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
                                 att_state->aux_usage, &surf);
+   surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
 
    /* From the Sky Lake PRM Vol. 7, "Render Target Resolve":
     *
@@ -1368,12 +1440,14 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
       blorp_ccs_resolve(batch, &surf,
                         iview->isl.base_level,
                         iview->isl.base_array_layer + layer,
-                        iview->isl.format,
-                        BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
+                        iview->isl.format, resolve_op);
    }
 
    cmd_buffer->state.pending_pipe_bits |=
       ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
+
+   /* Once we've done any sort of resolve, we're no longer fast-cleared */
+   att_state->fast_clear = false;
 }
 
 void
index 7931d4bb790f67e2db44636ac10efdd52acac3f4..2fc543daacc8c2c9409bad6ce535b4195c6477b7 100644 (file)
@@ -1100,11 +1100,14 @@ void anv_dynamic_state_copy(struct anv_dynamic_state *dest,
  */
 struct anv_attachment_state {
    enum isl_aux_usage                           aux_usage;
+   enum isl_aux_usage                           input_aux_usage;
    struct anv_state                             color_rt_state;
    struct anv_state                             input_att_state;
 
    VkImageAspectFlags                           pending_clear_aspects;
+   bool                                         fast_clear;
    VkClearValue                                 clear_value;
+   bool                                         clear_color_is_zero_one;
 };
 
 /** State required while building cmd buffer */
index 7aaa8a1cffe0257cd6cf5faa24cde4f9bdcf5f27..4649dae1a11b33377f28a9cd1b9bca9c4e6f8b17 100644 (file)
@@ -191,23 +191,87 @@ add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
    }
 }
 
-static enum isl_aux_usage
-fb_attachment_get_aux_usage(struct anv_device *device,
-                            struct anv_framebuffer *fb,
-                            uint32_t attachment)
+static bool
+color_is_zero_one(VkClearColorValue value, enum isl_format format)
 {
-   struct anv_image_view *iview = fb->attachments[attachment];
+   if (isl_format_has_int_channel(format)) {
+      for (unsigned i = 0; i < 4; i++) {
+         if (value.int32[i] != 0 && value.int32[i] != 1)
+            return false;
+      }
+   } else {
+      for (unsigned i = 0; i < 4; i++) {
+         if (value.float32[i] != 0.0f && value.float32[i] != 1.0f)
+            return false;
+      }
+   }
 
-   if (iview->image->aux_surface.isl.size == 0)
-      return ISL_AUX_USAGE_NONE; /* No aux surface */
+   return true;
+}
+
+static void
+color_attachment_compute_aux_usage(struct anv_device *device,
+                                   struct anv_attachment_state *att_state,
+                                   struct anv_image_view *iview,
+                                   VkRect2D render_area,
+                                   union isl_color_value *fast_clear_color)
+{
+   if (iview->image->aux_surface.isl.size == 0) {
+      att_state->aux_usage = ISL_AUX_USAGE_NONE;
+      att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
+      att_state->fast_clear = false;
+      return;
+   }
 
    assert(iview->image->aux_surface.isl.usage & ISL_SURF_USAGE_CCS_BIT);
 
-   if (isl_format_supports_lossless_compression(&device->info,
-                                                iview->isl.format))
-      return ISL_AUX_USAGE_CCS_E;
+   att_state->clear_color_is_zero_one =
+      color_is_zero_one(att_state->clear_value.color, iview->isl.format);
+
+   if (att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
+      /* Start off assuming fast clears are possible */
+      att_state->fast_clear = true;
 
-   return ISL_AUX_USAGE_NONE;
+      /* Potentially, we could do partial fast-clears but doing so has crazy
+       * alignment restrictions.  It's easier to just restrict to full size
+       * fast clears for now.
+       */
+      if (render_area.offset.x != 0 ||
+          render_area.offset.y != 0 ||
+          render_area.extent.width != iview->extent.width ||
+          render_area.extent.height != iview->extent.height)
+         att_state->fast_clear = false;
+
+      if (att_state->fast_clear) {
+         memcpy(fast_clear_color->u32, att_state->clear_value.color.uint32,
+                sizeof(fast_clear_color->u32));
+      }
+   } else {
+      att_state->fast_clear = false;
+   }
+
+   if (isl_format_supports_lossless_compression(&device->info,
+                                                iview->isl.format)) {
+      att_state->aux_usage = ISL_AUX_USAGE_CCS_E;
+      att_state->input_aux_usage = ISL_AUX_USAGE_CCS_E;
+   } else if (att_state->fast_clear) {
+      att_state->aux_usage = ISL_AUX_USAGE_CCS_D;
+      /* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode:
+       *
+       *    "If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D
+       *    setting is only allowed if Surface Format supported for Fast
+       *    Clear. In addition, if the surface is bound to the sampling
+       *    engine, Surface Format must be supported for Render Target
+       *    Compression for surfaces bound to the sampling engine."
+       *
+       * In other words, we can't sample from a fast-cleared image if it
+       * doesn't also support color compression.
+       */
+      att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
+   } else {
+      att_state->aux_usage = ISL_AUX_USAGE_NONE;
+      att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
+   }
 }
 
 static bool
@@ -350,9 +414,12 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
          struct anv_image_view *iview = framebuffer->attachments[i];
          assert(iview->vk_format == att->format);
 
+         union isl_color_value clear_color = { .u32 = { 0, } };
          if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
-            state->attachments[i].aux_usage =
-               fb_attachment_get_aux_usage(cmd_buffer->device, framebuffer, i);
+            color_attachment_compute_aux_usage(cmd_buffer->device,
+                                               &state->attachments[i],
+                                               iview, begin->renderArea,
+                                               &clear_color);
 
             struct isl_view view = iview->isl;
             view.usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
@@ -362,6 +429,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
                                 .view = &view,
                                 .aux_surf = &iview->image->aux_surface.isl,
                                 .aux_usage = state->attachments[i].aux_usage,
+                                .clear_color = clear_color,
                                 .mocs = cmd_buffer->device->default_mocs);
 
             add_image_view_relocs(cmd_buffer, iview,
@@ -369,6 +437,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
                                   state->attachments[i].color_rt_state);
          } else {
             state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE;
+            state->attachments[i].input_aux_usage = ISL_AUX_USAGE_NONE;
          }
 
          if (need_input_attachment_state(&pass->attachments[i])) {
@@ -386,11 +455,12 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
                                 .surf = surf,
                                 .view = &view,
                                 .aux_surf = &iview->image->aux_surface.isl,
-                                .aux_usage = state->attachments[i].aux_usage,
+                                .aux_usage = state->attachments[i].input_aux_usage,
+                                .clear_color = clear_color,
                                 .mocs = cmd_buffer->device->default_mocs);
 
             add_image_view_relocs(cmd_buffer, iview,
-                                  state->attachments[i].aux_usage,
+                                  state->attachments[i].input_aux_usage,
                                   state->attachments[i].input_att_state);
          }
       }