anv: Rework depth_stencil_attachment_compute_aux_usage
[mesa.git] / src / intel / vulkan / genX_cmd_buffer.c
index 7be8ba819e6114e4016e96c8c8a7a1a360348ec5..959e3a582f446fdd1b778d05a4f7c75aa0206a4d 100644 (file)
@@ -290,11 +290,29 @@ add_surface_state_relocs(struct anv_cmd_buffer *cmd_buffer,
    }
 }
 
+static bool
+isl_color_value_requires_conversion(union isl_color_value color,
+                                    const struct isl_surf *surf,
+                                    const struct isl_view *view)
+{
+   if (surf->format == view->format && isl_swizzle_is_identity(view->swizzle))
+      return false;
+
+   uint32_t surf_pack[4] = { 0, 0, 0, 0 };
+   isl_color_value_pack(&color, surf->format, surf_pack);
+
+   uint32_t view_pack[4] = { 0, 0, 0, 0 };
+   union isl_color_value swiz_color =
+      isl_color_value_swizzle_inv(color, view->swizzle);
+   isl_color_value_pack(&swiz_color, view->format, view_pack);
+
+   return memcmp(surf_pack, view_pack, sizeof(surf_pack)) != 0;
+}
+
 static void
 color_attachment_compute_aux_usage(struct anv_device * device,
                                    struct anv_cmd_state * cmd_state,
-                                   uint32_t att, VkRect2D render_area,
-                                   union isl_color_value *fast_clear_color)
+                                   uint32_t att, VkRect2D render_area)
 {
    struct anv_attachment_state *att_state = &cmd_state->attachments[att];
    struct anv_image_view *iview = cmd_state->attachments[att].image_view;
@@ -365,7 +383,7 @@ color_attachment_compute_aux_usage(struct anv_device * device,
    union isl_color_value clear_color = {};
    anv_clear_color_from_att_state(&clear_color, att_state, iview);
 
-   att_state->clear_color_is_zero_one =
+   const bool clear_color_is_zero_one =
       isl_color_value_is_zero_one(clear_color, iview->planes[0].isl.format);
    att_state->clear_color_is_zero =
       isl_color_value_is_zero(clear_color, iview->planes[0].isl.format);
@@ -402,9 +420,23 @@ color_attachment_compute_aux_usage(struct anv_device * device,
          att_state->fast_clear = false;
 
       /* On Broadwell and earlier, we can only handle 0/1 clear colors */
-      if (GEN_GEN <= 8 && !att_state->clear_color_is_zero_one)
+      if (GEN_GEN <= 8 && !clear_color_is_zero_one)
          att_state->fast_clear = false;
 
+      /* If the clear color is one that would require non-trivial format
+       * conversion on resolve, we don't bother with the fast clear.  This
+       * shouldn't be common as most clear colors are 0/1 and the most common
+       * format re-interpretation is for sRGB.
+       */
+      if (isl_color_value_requires_conversion(clear_color,
+                                              &iview->image->planes[0].surface.isl,
+                                              &iview->planes[0].isl)) {
+         anv_perf_warn(device, iview,
+                       "Cannot fast-clear to colors which would require "
+                       "format conversion on resolve");
+         att_state->fast_clear = false;
+      }
+
       /* We only allow fast clears to the first slice of an image (level 0,
        * layer 0) and only for the entire slice.  This guarantees us that, at
        * any given time, there is only one clear color on any given image at
@@ -425,60 +457,39 @@ color_attachment_compute_aux_usage(struct anv_device * device,
                        "Rendering to a multi-layer framebuffer with "
                        "LOAD_OP_CLEAR.  Only fast-clearing the first slice");
       }
-
-      if (att_state->fast_clear)
-         *fast_clear_color = clear_color;
    } else {
       att_state->fast_clear = false;
    }
 }
 
-static void
-depth_stencil_attachment_compute_aux_usage(struct anv_device *device,
-                                           struct anv_cmd_state *cmd_state,
-                                           uint32_t att, VkRect2D render_area)
+static bool
+anv_can_hiz_clear_ds_view(struct anv_device *device,
+                          struct anv_image_view *iview,
+                          VkImageLayout layout,
+                          VkImageAspectFlags clear_aspects,
+                          float depth_clear_value,
+                          VkRect2D render_area)
 {
-   struct anv_render_pass_attachment *pass_att =
-      &cmd_state->pass->attachments[att];
-   struct anv_attachment_state *att_state = &cmd_state->attachments[att];
-   struct anv_image_view *iview = cmd_state->attachments[att].image_view;
-
-   /* These will be initialized after the first subpass transition. */
-   att_state->aux_usage = ISL_AUX_USAGE_NONE;
-   att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
-
-   /* This is unused for depth/stencil but valgrind complains if it
-    * isn't initialized
-    */
-   att_state->clear_color_is_zero_one = false;
-
-   if (GEN_GEN == 7) {
-      /* We don't do any HiZ or depth fast-clears on gen7 yet */
-      att_state->fast_clear = false;
-      return;
-   }
-
-   if (!(att_state->pending_clear_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
-      /* If we're just clearing stencil, we can always HiZ clear */
-      att_state->fast_clear = true;
-      return;
-   }
+   /* We don't do any HiZ or depth fast-clears on gen7 yet */
+   if (GEN_GEN == 7)
+      return false;
 
-   /* Default to false for now */
-   att_state->fast_clear = false;
+   /* If we're just clearing stencil, we can always HiZ clear */
+   if (!(clear_aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
+      return true;
 
    /* We must have depth in order to have HiZ */
    if (!(iview->image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
-      return;
+      return false;
 
-   const enum isl_aux_usage first_subpass_aux_usage =
+   const enum isl_aux_usage clear_aux_usage =
       anv_layout_to_aux_usage(&device->info, iview->image,
                               VK_IMAGE_ASPECT_DEPTH_BIT,
                               VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
-                              pass_att->first_subpass_layout);
+                              layout);
    if (!blorp_can_hiz_clear_depth(&device->info,
                                   &iview->image->planes[0].surface.isl,
-                                  first_subpass_aux_usage,
+                                  clear_aux_usage,
                                   iview->planes[0].isl.base_level,
                                   iview->planes[0].isl.base_array_layer,
                                   render_area.offset.x,
@@ -487,36 +498,120 @@ depth_stencil_attachment_compute_aux_usage(struct anv_device *device,
                                   render_area.extent.width,
                                   render_area.offset.y +
                                   render_area.extent.height))
-      return;
+      return false;
 
-   if (att_state->clear_value.depthStencil.depth != ANV_HZ_FC_VAL)
-      return;
+   if (depth_clear_value != ANV_HZ_FC_VAL)
+      return false;
 
-   if (GEN_GEN == 8 && anv_can_sample_with_hiz(&device->info, iview->image)) {
-      /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a
-       * fast-cleared portion of a HiZ buffer. Testing has revealed that Gen8
-       * only supports returning 0.0f. Gens prior to gen8 do not support this
-       * feature at all.
-       */
-      return;
-   }
+   /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a fast-cleared
+    * portion of a HiZ buffer. Testing has revealed that Gen8 only supports
+    * returning 0.0f. Gens prior to gen8 do not support this feature at all.
+    */
+   if (GEN_GEN == 8 && anv_can_sample_with_hiz(&device->info, iview->image))
+      return false;
 
    /* If we got here, then we can fast clear */
-   att_state->fast_clear = true;
+   return true;
 }
 
-static bool
-need_input_attachment_state(const struct anv_render_pass_attachment *att)
+#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
+
+#if GEN_GEN == 12
+static void
+anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
+                      const struct anv_image *image,
+                      VkImageAspectFlagBits aspect,
+                      uint32_t base_level, uint32_t level_count,
+                      uint32_t base_layer, uint32_t layer_count)
 {
-   if (!(att->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
-      return false;
+   uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
+
+   uint64_t base_address =
+      anv_address_physical(image->planes[plane].address);
+
+   const struct isl_surf *isl_surf = &image->planes[plane].surface.isl;
+   uint64_t format_bits = gen_aux_map_format_bits_for_isl_surf(isl_surf);
 
-   /* We only allocate input attachment states for color surfaces. Compression
-    * is not yet enabled for depth textures and stencil doesn't allow
-    * compression so we can just use the texture surface state from the view.
+   /* We're about to live-update the AUX-TT.  We really don't want anyone else
+    * trying to read it while we're doing this.  We could probably get away
+    * with not having this stall in some cases if we were really careful but
+    * it's better to play it safe.  Full stall the GPU.
     */
-   return vk_format_is_color(att->format);
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT;
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
+   struct gen_mi_builder b;
+   gen_mi_builder_init(&b, &cmd_buffer->batch);
+
+   for (uint32_t a = 0; a < layer_count; a++) {
+      const uint32_t layer = base_layer + a;
+
+      uint64_t start_offset_B = UINT64_MAX, end_offset_B = 0;
+      for (uint32_t l = 0; l < level_count; l++) {
+         const uint32_t level = base_level + l;
+
+         uint32_t logical_array_layer, logical_z_offset_px;
+         if (image->type == VK_IMAGE_TYPE_3D) {
+            logical_array_layer = 0;
+
+            /* If the given miplevel does not have this layer, then any higher
+             * miplevels won't either because miplevels only get smaller the
+             * higher the LOD.
+             */
+            assert(layer < image->extent.depth);
+            if (layer >= anv_minify(image->extent.depth, level))
+               break;
+            logical_z_offset_px = layer;
+         } else {
+            assert(layer < image->array_size);
+            logical_array_layer = layer;
+            logical_z_offset_px = 0;
+         }
+
+         uint32_t slice_start_offset_B, slice_end_offset_B;
+         isl_surf_get_image_range_B_tile(isl_surf, level,
+                                         logical_array_layer,
+                                         logical_z_offset_px,
+                                         &slice_start_offset_B,
+                                         &slice_end_offset_B);
+
+         start_offset_B = MIN2(start_offset_B, slice_start_offset_B);
+         end_offset_B = MAX2(end_offset_B, slice_end_offset_B);
+      }
+
+      /* Aux operates 64K at a time */
+      start_offset_B = align_down_u64(start_offset_B, 64 * 1024);
+      end_offset_B = align_u64(end_offset_B, 64 * 1024);
+
+      for (uint64_t offset = start_offset_B;
+           offset < end_offset_B; offset += 64 * 1024) {
+         uint64_t address = base_address + offset;
+
+         uint64_t aux_entry_addr64, *aux_entry_map;
+         aux_entry_map = gen_aux_map_get_entry(cmd_buffer->device->aux_map_ctx,
+                                               address, &aux_entry_addr64);
+
+         assert(cmd_buffer->device->physical->use_softpin);
+         struct anv_address aux_entry_address = {
+            .bo = NULL,
+            .offset = aux_entry_addr64,
+         };
+
+         const uint64_t old_aux_entry = READ_ONCE(*aux_entry_map);
+         uint64_t new_aux_entry =
+            (old_aux_entry & GEN_AUX_MAP_ADDRESS_MASK) | format_bits;
+
+         if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage))
+            new_aux_entry |= GEN_AUX_MAP_ENTRY_VALID_BIT;
+
+         gen_mi_store(&b, gen_mi_mem64(aux_entry_address),
+                          gen_mi_imm(new_aux_entry));
+      }
+   }
+
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_AUX_TABLE_INVALIDATE_BIT;
 }
+#endif /* GEN_GEN == 12 */
 
 /* Transitions a HiZ-enabled depth buffer from one layout to another. Unless
  * the initial layout is undefined, the HiZ buffer and depth buffer will
@@ -525,6 +620,7 @@ need_input_attachment_state(const struct anv_render_pass_attachment *att)
 static void
 transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
                         const struct anv_image *image,
+                        uint32_t base_layer, uint32_t layer_count,
                         VkImageLayout initial_layout,
                         VkImageLayout final_layout)
 {
@@ -533,6 +629,16 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
    if (image->planes[depth_plane].aux_usage == ISL_AUX_USAGE_NONE)
       return;
 
+#if GEN_GEN == 12
+   if ((initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
+        initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) &&
+       cmd_buffer->device->physical->has_implicit_ccs &&
+       cmd_buffer->device->info.has_aux_map) {
+      anv_image_init_aux_tt(cmd_buffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
+                            0, 1, 0, 1);
+   }
+#endif
+
    const enum isl_aux_state initial_state =
       anv_layout_to_aux_state(&cmd_buffer->device->info, image,
                               VK_IMAGE_ASPECT_DEPTH_BIT,
@@ -560,11 +666,11 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
    if (final_needs_depth && !initial_depth_valid) {
       assert(initial_hiz_valid);
       anv_image_hiz_op(cmd_buffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
-                       0, 0, 1, ISL_AUX_OP_FULL_RESOLVE);
+                       0, base_layer, layer_count, ISL_AUX_OP_FULL_RESOLVE);
    } else if (final_needs_hiz && !initial_hiz_valid) {
       assert(initial_depth_valid);
       anv_image_hiz_op(cmd_buffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
-                       0, 0, 1, ISL_AUX_OP_AMBIGUATE);
+                       0, base_layer, layer_count, ISL_AUX_OP_AMBIGUATE);
    }
 }
 
@@ -1000,105 +1106,6 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer,
    }
 }
 
-#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
-
-#if GEN_GEN == 12
-static void
-anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
-                      const struct anv_image *image,
-                      VkImageAspectFlagBits aspect,
-                      uint32_t base_level, uint32_t level_count,
-                      uint32_t base_layer, uint32_t layer_count)
-{
-   uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
-
-   uint64_t base_address =
-      anv_address_physical(image->planes[plane].address);
-
-   const struct isl_surf *isl_surf = &image->planes[plane].surface.isl;
-   uint64_t format_bits = gen_aux_map_format_bits_for_isl_surf(isl_surf);
-
-   /* We're about to live-update the AUX-TT.  We really don't want anyone else
-    * trying to read it while we're doing this.  We could probably get away
-    * with not having this stall in some cases if we were really careful but
-    * it's better to play it safe.  Full stall the GPU.
-    */
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT;
-   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
-
-   struct gen_mi_builder b;
-   gen_mi_builder_init(&b, &cmd_buffer->batch);
-
-   for (uint32_t a = 0; a < layer_count; a++) {
-      const uint32_t layer = base_layer + a;
-
-      uint64_t start_offset_B = UINT64_MAX, end_offset_B = 0;
-      for (uint32_t l = 0; l < level_count; l++) {
-         const uint32_t level = base_level + l;
-
-         uint32_t logical_array_layer, logical_z_offset_px;
-         if (image->type == VK_IMAGE_TYPE_3D) {
-            logical_array_layer = 0;
-
-            /* If the given miplevel does not have this layer, then any higher
-             * miplevels won't either because miplevels only get smaller the
-             * higher the LOD.
-             */
-            assert(layer < image->extent.depth);
-            if (layer >= anv_minify(image->extent.depth, level))
-               break;
-            logical_z_offset_px = layer;
-         } else {
-            assert(layer < image->array_size);
-            logical_array_layer = layer;
-            logical_z_offset_px = 0;
-         }
-
-         uint32_t slice_start_offset_B, slice_end_offset_B;
-         isl_surf_get_image_range_B_tile(isl_surf, level,
-                                         logical_array_layer,
-                                         logical_z_offset_px,
-                                         &slice_start_offset_B,
-                                         &slice_end_offset_B);
-
-         start_offset_B = MIN2(start_offset_B, slice_start_offset_B);
-         end_offset_B = MAX2(end_offset_B, slice_end_offset_B);
-      }
-
-      /* Aux operates 64K at a time */
-      start_offset_B = align_down_u64(start_offset_B, 64 * 1024);
-      end_offset_B = align_u64(end_offset_B, 64 * 1024);
-
-      for (uint64_t offset = start_offset_B;
-           offset < end_offset_B; offset += 64 * 1024) {
-         uint64_t address = base_address + offset;
-
-         uint64_t aux_entry_addr64, *aux_entry_map;
-         aux_entry_map = gen_aux_map_get_entry(cmd_buffer->device->aux_map_ctx,
-                                               address, &aux_entry_addr64);
-
-         assert(cmd_buffer->device->physical->use_softpin);
-         struct anv_address aux_entry_address = {
-            .bo = NULL,
-            .offset = aux_entry_addr64,
-         };
-
-         const uint64_t old_aux_entry = READ_ONCE(*aux_entry_map);
-         uint64_t new_aux_entry =
-            (old_aux_entry & GEN_AUX_MAP_ADDRESS_MASK) | format_bits;
-
-         if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage))
-            new_aux_entry |= GEN_AUX_MAP_ENTRY_VALID_BIT;
-
-         gen_mi_store(&b, gen_mi_mem64(aux_entry_address),
-                          gen_mi_imm(new_aux_entry));
-      }
-   }
-
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_AUX_TABLE_INVALIDATE_BIT;
-}
-#endif /* GEN_GEN == 12 */
-
 /**
  * @brief Transitions a color buffer from one layout to another.
  *
@@ -1360,25 +1367,21 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
       ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT;
 }
 
-/**
- * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass.
- */
 static VkResult
 genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
-                                   struct anv_render_pass *pass,
+                                   const struct anv_render_pass *pass,
+                                   const struct anv_framebuffer *framebuffer,
                                    const VkRenderPassBeginInfo *begin)
 {
-   const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
    struct anv_cmd_state *state = &cmd_buffer->state;
-   struct anv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
 
    vk_free(&cmd_buffer->pool->alloc, state->attachments);
 
    if (pass->attachment_count > 0) {
-      state->attachments = vk_alloc(&cmd_buffer->pool->alloc,
-                                    pass->attachment_count *
-                                         sizeof(state->attachments[0]),
-                                    8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+      state->attachments = vk_zalloc(&cmd_buffer->pool->alloc,
+                                     pass->attachment_count *
+                                          sizeof(state->attachments[0]),
+                                     8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
       if (state->attachments == NULL) {
          /* Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */
          return anv_batch_set_error(&cmd_buffer->batch,
@@ -1388,147 +1391,84 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
       state->attachments = NULL;
    }
 
-   /* Reserve one for the NULL state. */
-   unsigned num_states = 1;
-   for (uint32_t i = 0; i < pass->attachment_count; ++i) {
-      if (vk_format_is_color(pass->attachments[i].format))
-         num_states++;
-
-      if (need_input_attachment_state(&pass->attachments[i]))
-         num_states++;
-   }
-
-   const uint32_t ss_stride = align_u32(isl_dev->ss.size, isl_dev->ss.align);
-   state->render_pass_states =
-      anv_state_stream_alloc(&cmd_buffer->surface_state_stream,
-                             num_states * ss_stride, isl_dev->ss.align);
-
-   struct anv_state next_state = state->render_pass_states;
-   next_state.alloc_size = isl_dev->ss.size;
-
-   state->null_surface_state = next_state;
-   next_state.offset += ss_stride;
-   next_state.map += ss_stride;
-
-   const VkRenderPassAttachmentBeginInfoKHR *begin_attachment =
+   const VkRenderPassAttachmentBeginInfoKHR *attach_begin =
       vk_find_struct_const(begin, RENDER_PASS_ATTACHMENT_BEGIN_INFO_KHR);
-
-   if (begin && !begin_attachment)
+   if (begin && !attach_begin)
       assert(pass->attachment_count == framebuffer->attachment_count);
 
    for (uint32_t i = 0; i < pass->attachment_count; ++i) {
-      if (vk_format_is_color(pass->attachments[i].format)) {
-         state->attachments[i].color.state = next_state;
-         next_state.offset += ss_stride;
-         next_state.map += ss_stride;
-      }
-
-      if (need_input_attachment_state(&pass->attachments[i])) {
-         state->attachments[i].input.state = next_state;
-         next_state.offset += ss_stride;
-         next_state.map += ss_stride;
-      }
-
-      if (begin_attachment && begin_attachment->attachmentCount != 0) {
-         assert(begin_attachment->attachmentCount == pass->attachment_count);
-         ANV_FROM_HANDLE(anv_image_view, iview, begin_attachment->pAttachments[i]);
-         cmd_buffer->state.attachments[i].image_view = iview;
+      if (attach_begin && attach_begin->attachmentCount != 0) {
+         assert(attach_begin->attachmentCount == pass->attachment_count);
+         ANV_FROM_HANDLE(anv_image_view, iview, attach_begin->pAttachments[i]);
+         state->attachments[i].image_view = iview;
       } else if (framebuffer && i < framebuffer->attachment_count) {
-         cmd_buffer->state.attachments[i].image_view = framebuffer->attachments[i];
+         state->attachments[i].image_view = framebuffer->attachments[i];
+      } else {
+         state->attachments[i].image_view = NULL;
       }
    }
-   assert(next_state.offset == state->render_pass_states.offset +
-                               state->render_pass_states.alloc_size);
 
    if (begin) {
-      isl_null_fill_state(isl_dev, state->null_surface_state.map,
-                          isl_extent3d(framebuffer->width,
-                                       framebuffer->height,
-                                       framebuffer->layers));
-
       for (uint32_t i = 0; i < pass->attachment_count; ++i) {
-         struct anv_render_pass_attachment *att = &pass->attachments[i];
-         VkImageAspectFlags att_aspects = vk_format_aspects(att->format);
+         const struct anv_render_pass_attachment *pass_att = &pass->attachments[i];
+         struct anv_attachment_state *att_state = &state->attachments[i];
+         VkImageAspectFlags att_aspects = vk_format_aspects(pass_att->format);
          VkImageAspectFlags clear_aspects = 0;
          VkImageAspectFlags load_aspects = 0;
 
          if (att_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
             /* color attachment */
-            if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+            if (pass_att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
                clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
-            } else if (att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
+            } else if (pass_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
                load_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
             }
          } else {
             /* depthstencil attachment */
             if (att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
-               if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+               if (pass_att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
                   clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
-               } else if (att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
+               } else if (pass_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
                   load_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
                }
             }
             if (att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
-               if (att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+               if (pass_att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
                   clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
-               } else if (att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
+               } else if (pass_att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
                   load_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
                }
             }
          }
 
-         state->attachments[i].current_layout = att->initial_layout;
-         state->attachments[i].current_stencil_layout = att->stencil_initial_layout;
-         state->attachments[i].pending_clear_aspects = clear_aspects;
-         state->attachments[i].pending_load_aspects = load_aspects;
+         att_state->current_layout = pass_att->initial_layout;
+         att_state->current_stencil_layout = pass_att->stencil_initial_layout;
+         att_state->pending_clear_aspects = clear_aspects;
+         att_state->pending_load_aspects = load_aspects;
          if (clear_aspects)
-            state->attachments[i].clear_value = begin->pClearValues[i];
+            att_state->clear_value = begin->pClearValues[i];
 
-         struct anv_image_view *iview = cmd_buffer->state.attachments[i].image_view;
-         anv_assert(iview->vk_format == att->format);
+         struct anv_image_view *iview = state->attachments[i].image_view;
+         anv_assert(iview->vk_format == pass_att->format);
 
          const uint32_t num_layers = iview->planes[0].isl.array_len;
-         state->attachments[i].pending_clear_views = (1 << num_layers) - 1;
+         att_state->pending_clear_views = (1 << num_layers) - 1;
 
-         union isl_color_value clear_color = { .u32 = { 0, } };
          if (att_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
             anv_assert(iview->n_planes == 1);
             assert(att_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
             color_attachment_compute_aux_usage(cmd_buffer->device,
-                                               state, i, begin->renderArea,
-                                               &clear_color);
-
-            anv_image_fill_surface_state(cmd_buffer->device,
-                                         iview->image,
-                                         VK_IMAGE_ASPECT_COLOR_BIT,
-                                         &iview->planes[0].isl,
-                                         ISL_SURF_USAGE_RENDER_TARGET_BIT,
-                                         state->attachments[i].aux_usage,
-                                         &clear_color,
-                                         0,
-                                         &state->attachments[i].color,
-                                         NULL);
-
-            add_surface_state_relocs(cmd_buffer, state->attachments[i].color);
+                                               state, i, begin->renderArea);
          } else {
-            depth_stencil_attachment_compute_aux_usage(cmd_buffer->device,
-                                                       state, i,
-                                                       begin->renderArea);
-         }
-
-         if (need_input_attachment_state(&pass->attachments[i])) {
-            anv_image_fill_surface_state(cmd_buffer->device,
-                                         iview->image,
-                                         VK_IMAGE_ASPECT_COLOR_BIT,
-                                         &iview->planes[0].isl,
-                                         ISL_SURF_USAGE_TEXTURE_BIT,
-                                         state->attachments[i].input_aux_usage,
-                                         &clear_color,
-                                         0,
-                                         &state->attachments[i].input,
-                                         NULL);
-
-            add_surface_state_relocs(cmd_buffer, state->attachments[i].input);
+            /* These will be initialized after the first subpass transition. */
+            att_state->aux_usage = ISL_AUX_USAGE_NONE;
+            att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
+            att_state->fast_clear =
+               anv_can_hiz_clear_ds_view(cmd_buffer->device, iview,
+                                         pass_att->first_subpass_layout,
+                                         clear_aspects,
+                                         att_state->clear_value.depthStencil.depth,
+                                         begin->renderArea);
          }
       }
    }
@@ -1536,6 +1476,82 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
    return VK_SUCCESS;
 }
 
+/**
+ * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass.
+ */
+static VkResult
+genX(cmd_buffer_alloc_att_surf_states)(struct anv_cmd_buffer *cmd_buffer,
+                                       const struct anv_render_pass *pass,
+                                       const struct anv_subpass *subpass)
+{
+   const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
+   struct anv_cmd_state *state = &cmd_buffer->state;
+
+   /* Reserve one for the NULL state. */
+   unsigned num_states = 1;
+   for (uint32_t i = 0; i < subpass->attachment_count; i++) {
+      uint32_t att = subpass->attachments[i].attachment;
+      if (att == VK_ATTACHMENT_UNUSED)
+         continue;
+
+      assert(att < pass->attachment_count);
+      if (!vk_format_is_color(pass->attachments[att].format))
+         continue;
+
+      const VkImageUsageFlagBits att_usage = subpass->attachments[i].usage;
+      assert(util_bitcount(att_usage) == 1);
+
+      if (att_usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT ||
+          att_usage == VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)
+         num_states++;
+   }
+
+   const uint32_t ss_stride = align_u32(isl_dev->ss.size, isl_dev->ss.align);
+   state->attachment_states =
+      anv_state_stream_alloc(&cmd_buffer->surface_state_stream,
+                             num_states * ss_stride, isl_dev->ss.align);
+   if (state->attachment_states.map == NULL) {
+      return anv_batch_set_error(&cmd_buffer->batch,
+                                 VK_ERROR_OUT_OF_DEVICE_MEMORY);
+   }
+
+   struct anv_state next_state = state->attachment_states;
+   next_state.alloc_size = isl_dev->ss.size;
+
+   state->null_surface_state = next_state;
+   next_state.offset += ss_stride;
+   next_state.map += ss_stride;
+
+   for (uint32_t i = 0; i < subpass->attachment_count; i++) {
+      uint32_t att = subpass->attachments[i].attachment;
+      if (att == VK_ATTACHMENT_UNUSED)
+         continue;
+
+      assert(att < pass->attachment_count);
+      if (!vk_format_is_color(pass->attachments[att].format))
+         continue;
+
+      const VkImageUsageFlagBits att_usage = subpass->attachments[i].usage;
+      assert(util_bitcount(att_usage) == 1);
+
+      if (att_usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
+         state->attachments[att].color.state = next_state;
+      else if (att_usage == VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)
+         state->attachments[att].input.state = next_state;
+      else
+         continue;
+
+      state->attachments[att].color.state = next_state;
+      next_state.offset += ss_stride;
+      next_state.map += ss_stride;
+   }
+
+   assert(next_state.offset == state->attachment_states.offset +
+                               state->attachment_states.alloc_size);
+
+   return VK_SUCCESS;
+}
+
 VkResult
 genX(BeginCommandBuffer)(
     VkCommandBuffer                             commandBuffer,
@@ -1599,17 +1615,28 @@ genX(BeginCommandBuffer)(
    if (cmd_buffer->usage_flags &
        VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
       assert(pBeginInfo->pInheritanceInfo);
-      cmd_buffer->state.pass =
-         anv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
-      cmd_buffer->state.subpass =
-         &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
+      ANV_FROM_HANDLE(anv_render_pass, pass,
+                      pBeginInfo->pInheritanceInfo->renderPass);
+      struct anv_subpass *subpass =
+         &pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
+      ANV_FROM_HANDLE(anv_framebuffer, framebuffer,
+                      pBeginInfo->pInheritanceInfo->framebuffer);
+
+      cmd_buffer->state.pass = pass;
+      cmd_buffer->state.subpass = subpass;
 
       /* This is optional in the inheritance info. */
-      cmd_buffer->state.framebuffer =
-         anv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer);
+      cmd_buffer->state.framebuffer = framebuffer;
 
-      result = genX(cmd_buffer_setup_attachments)(cmd_buffer,
-                                                  cmd_buffer->state.pass, NULL);
+      result = genX(cmd_buffer_setup_attachments)(cmd_buffer, pass,
+                                                  framebuffer, NULL);
+      if (result != VK_SUCCESS)
+         return result;
+
+      result = genX(cmd_buffer_alloc_att_surf_states)(cmd_buffer, pass,
+                                                      subpass);
+      if (result != VK_SUCCESS)
+         return result;
 
       /* Record that HiZ is enabled if we can. */
       if (cmd_buffer->state.framebuffer) {
@@ -1626,7 +1653,7 @@ genX(BeginCommandBuffer)(
                                        VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
                                        layout);
 
-            cmd_buffer->state.hiz_enabled = aux_usage == ISL_AUX_USAGE_HIZ;
+            cmd_buffer->state.hiz_enabled = isl_aux_usage_has_hiz(aux_usage);
          }
       }
 
@@ -1773,8 +1800,8 @@ genX(CmdExecuteCommands)(
           */
          struct anv_bo *ss_bo =
             primary->device->surface_state_pool.block_pool.bo;
-         struct anv_state src_state = primary->state.render_pass_states;
-         struct anv_state dst_state = secondary->state.render_pass_states;
+         struct anv_state src_state = primary->state.attachment_states;
+         struct anv_state dst_state = secondary->state.attachment_states;
          assert(src_state.alloc_size == dst_state.alloc_size);
 
          genX(cmd_buffer_so_memcpy)(primary,
@@ -2348,6 +2375,7 @@ void genX(CmdPipelineBarrier)(
 
       if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
          transition_depth_buffer(cmd_buffer, image,
+                                 base_layer, layer_count,
                                  pImageMemoryBarriers[i].oldLayout,
                                  pImageMemoryBarriers[i].newLayout);
       }
@@ -2534,6 +2562,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
             surface_state = cmd_buffer->state.null_surface_state;
          }
 
+         assert(surface_state.map);
          bt_map[s] = surface_state.offset + state_offset;
          break;
 
@@ -2553,6 +2582,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
                                        surface_state, format,
                                        constant_data, constant_data_size, 1);
 
+         assert(surface_state.map);
          bt_map[s] = surface_state.offset + state_offset;
          add_surface_reloc(cmd_buffer, surface_state, constant_data);
          break;
@@ -2571,6 +2601,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
                                        format,
                                        cmd_buffer->state.compute.num_workgroups,
                                        12, 1);
+
+         assert(surface_state.map);
          bt_map[s] = surface_state.offset + state_offset;
          if (need_client_mem_relocs) {
             add_surface_reloc(cmd_buffer, surface_state,
@@ -2712,6 +2744,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
             assert(!"Invalid descriptor type");
             continue;
          }
+         assert(surface_state.map);
          bt_map[s] = surface_state.offset + state_offset;
          break;
       }
@@ -4852,7 +4885,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
       const uint32_t ds =
          cmd_buffer->state.subpass->depth_stencil_attachment->attachment;
       info.hiz_usage = cmd_buffer->state.attachments[ds].aux_usage;
-      if (info.hiz_usage == ISL_AUX_USAGE_HIZ) {
+      if (info.hiz_usage != ISL_AUX_USAGE_NONE) {
+         assert(isl_aux_usage_has_hiz(info.hiz_usage));
          info.hiz_surf = &image->planes[depth_plane].aux_surface.isl;
 
          info.hiz_address =
@@ -4902,7 +4936,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
             (struct anv_address) { cmd_buffer->device->workaround_bo, 0 };
       }
    }
-   cmd_buffer->state.hiz_enabled = info.hiz_usage == ISL_AUX_USAGE_HIZ;
+   cmd_buffer->state.hiz_enabled = isl_aux_usage_has_hiz(info.hiz_usage);
 }
 
 /**
@@ -4946,7 +4980,8 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
                          uint32_t subpass_id)
 {
    struct anv_cmd_state *cmd_state = &cmd_buffer->state;
-   struct anv_subpass *subpass = &cmd_state->pass->subpasses[subpass_id];
+   struct anv_render_pass *pass = cmd_state->pass;
+   struct anv_subpass *subpass = &pass->subpasses[subpass_id];
    cmd_state->subpass = subpass;
 
    cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
@@ -4996,7 +5031,7 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
        * color or auxiliary buffer usage isn't supported by the sampler.
        */
       const bool input_needs_resolve =
-            (att_state->fast_clear && !att_state->clear_color_is_zero_one) ||
+            (att_state->fast_clear && !att_state->clear_color_is_zero) ||
             att_state->input_aux_usage != att_state->aux_usage;
 
       VkImageLayout target_layout;
@@ -5035,6 +5070,7 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
 
       if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
          transition_depth_buffer(cmd_buffer, image,
+                                 base_layer, layer_count,
                                  att_state->current_layout, target_layout);
          att_state->aux_usage =
             anv_layout_to_aux_usage(&cmd_buffer->device->info, image,
@@ -5155,12 +5191,10 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
       } else if (att_state->pending_clear_aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
                                                      VK_IMAGE_ASPECT_STENCIL_BIT)) {
          if (att_state->fast_clear && !is_multiview) {
-            /* We currently only support HiZ for single-layer images */
+            /* We currently only support HiZ for single-LOD images */
             if (att_state->pending_clear_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
-               assert(iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
+               assert(isl_aux_usage_has_hiz(iview->image->planes[0].aux_usage));
                assert(iview->planes[0].isl.base_level == 0);
-               assert(iview->planes[0].isl.base_array_layer == 0);
-               assert(fb->layers == 1);
             }
 
             anv_image_hiz_clear(cmd_buffer, image,
@@ -5203,67 +5237,6 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
          assert(att_state->pending_clear_aspects == 0);
       }
 
-      if (GEN_GEN < 10 &&
-          (att_state->pending_load_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) &&
-          image->planes[0].aux_usage != ISL_AUX_USAGE_NONE &&
-          iview->planes[0].isl.base_level == 0 &&
-          iview->planes[0].isl.base_array_layer == 0) {
-         if (att_state->aux_usage != ISL_AUX_USAGE_NONE) {
-            genX(copy_fast_clear_dwords)(cmd_buffer, att_state->color.state,
-                                         image, VK_IMAGE_ASPECT_COLOR_BIT,
-                                         false /* copy to ss */);
-         }
-
-         if (need_input_attachment_state(&cmd_state->pass->attachments[a]) &&
-             att_state->input_aux_usage != ISL_AUX_USAGE_NONE) {
-            genX(copy_fast_clear_dwords)(cmd_buffer, att_state->input.state,
-                                         image, VK_IMAGE_ASPECT_COLOR_BIT,
-                                         false /* copy to ss */);
-         }
-      }
-
-      if (subpass->attachments[i].usage ==
-          VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
-         /* We assume that if we're starting a subpass, we're going to do some
-          * rendering so we may end up with compressed data.
-          */
-         genX(cmd_buffer_mark_image_written)(cmd_buffer, iview->image,
-                                             VK_IMAGE_ASPECT_COLOR_BIT,
-                                             att_state->aux_usage,
-                                             iview->planes[0].isl.base_level,
-                                             iview->planes[0].isl.base_array_layer,
-                                             fb->layers);
-      } else if (subpass->attachments[i].usage ==
-                 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
-         /* We may be writing depth or stencil so we need to mark the surface.
-          * Unfortunately, there's no way to know at this point whether the
-          * depth or stencil tests used will actually write to the surface.
-          *
-          * Even though stencil may be plane 1, it always shares a base_level
-          * with depth.
-          */
-         const struct isl_view *ds_view = &iview->planes[0].isl;
-         if (iview->aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) {
-            genX(cmd_buffer_mark_image_written)(cmd_buffer, image,
-                                                VK_IMAGE_ASPECT_DEPTH_BIT,
-                                                att_state->aux_usage,
-                                                ds_view->base_level,
-                                                ds_view->base_array_layer,
-                                                fb->layers);
-         }
-         if (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
-            /* Even though stencil may be plane 1, it always shares a
-             * base_level with depth.
-             */
-            genX(cmd_buffer_mark_image_written)(cmd_buffer, image,
-                                                VK_IMAGE_ASPECT_STENCIL_BIT,
-                                                ISL_AUX_USAGE_NONE,
-                                                ds_view->base_level,
-                                                ds_view->base_array_layer,
-                                                fb->layers);
-         }
-      }
-
       /* If multiview is enabled, then we are only done clearing when we no
        * longer have pending layers to clear, or when we have processed the
        * last subpass that uses this attachment.
@@ -5277,6 +5250,83 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
       att_state->pending_load_aspects = 0;
    }
 
+   /* We've transitioned all our images possibly fast clearing them.  Now we
+    * can fill out the surface states that we will use as render targets
+    * during actual subpass rendering.
+    */
+   VkResult result = genX(cmd_buffer_alloc_att_surf_states)(cmd_buffer,
+                                                            pass, subpass);
+   if (result != VK_SUCCESS)
+      return;
+
+   isl_null_fill_state(&cmd_buffer->device->isl_dev,
+                       cmd_state->null_surface_state.map,
+                       isl_extent3d(fb->width, fb->height, fb->layers));
+
+   for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
+      const uint32_t att = subpass->attachments[i].attachment;
+      if (att == VK_ATTACHMENT_UNUSED)
+         continue;
+
+      assert(att < cmd_state->pass->attachment_count);
+      struct anv_render_pass_attachment *pass_att = &pass->attachments[att];
+      struct anv_attachment_state *att_state = &cmd_state->attachments[att];
+      struct anv_image_view *iview = att_state->image_view;
+
+      if (!vk_format_is_color(pass_att->format))
+         continue;
+
+      const VkImageUsageFlagBits att_usage = subpass->attachments[i].usage;
+      assert(util_bitcount(att_usage) == 1);
+
+      struct anv_surface_state *surface_state;
+      isl_surf_usage_flags_t isl_surf_usage;
+      enum isl_aux_usage isl_aux_usage;
+      if (att_usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
+         surface_state = &att_state->color;
+         isl_surf_usage = ISL_SURF_USAGE_RENDER_TARGET_BIT;
+         isl_aux_usage = att_state->aux_usage;
+      } else if (att_usage == VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
+         surface_state = &att_state->input;
+         isl_surf_usage = ISL_SURF_USAGE_TEXTURE_BIT;
+         isl_aux_usage = att_state->input_aux_usage;
+      } else {
+         continue;
+      }
+
+      /* We had better have a surface state when we get here */
+      assert(surface_state->state.map);
+
+      union isl_color_value clear_color = { .u32 = { 0, } };
+      if (pass_att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR &&
+          att_state->fast_clear)
+         anv_clear_color_from_att_state(&clear_color, att_state, iview);
+
+      anv_image_fill_surface_state(cmd_buffer->device,
+                                   iview->image,
+                                   VK_IMAGE_ASPECT_COLOR_BIT,
+                                   &iview->planes[0].isl,
+                                   isl_surf_usage,
+                                   isl_aux_usage,
+                                   &clear_color,
+                                   0,
+                                   surface_state,
+                                   NULL);
+
+      add_surface_state_relocs(cmd_buffer, *surface_state);
+
+      if (GEN_GEN < 10 &&
+          pass_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD &&
+          iview->image->planes[0].aux_usage != ISL_AUX_USAGE_NONE &&
+          iview->planes[0].isl.base_level == 0 &&
+          iview->planes[0].isl.base_array_layer == 0) {
+         genX(copy_fast_clear_dwords)(cmd_buffer, surface_state->state,
+                                      iview->image,
+                                      VK_IMAGE_ASPECT_COLOR_BIT,
+                                      false /* copy to ss */);
+      }
+   }
+
 #if GEN_GEN >= 11
    /* The PIPE_CONTROL command description says:
     *
@@ -5334,6 +5384,72 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
    uint32_t subpass_id = anv_get_subpass_id(&cmd_buffer->state);
    struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
 
+   /* We are done with the previous subpass and all rendering directly to that
+    * subpass is now complete.  Zero out all the surface states so we don't
+    * accidentally use them between now and the next subpass.
+    */
+   for (uint32_t i = 0; i < cmd_state->pass->attachment_count; ++i) {
+      memset(&cmd_state->attachments[i].color, 0,
+             sizeof(cmd_state->attachments[i].color));
+      memset(&cmd_state->attachments[i].input, 0,
+             sizeof(cmd_state->attachments[i].input));
+   }
+   cmd_state->null_surface_state = ANV_STATE_NULL;
+   cmd_state->attachment_states = ANV_STATE_NULL;
+
+   for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
+      const uint32_t a = subpass->attachments[i].attachment;
+      if (a == VK_ATTACHMENT_UNUSED)
+         continue;
+
+      assert(a < cmd_state->pass->attachment_count);
+      struct anv_attachment_state *att_state = &cmd_state->attachments[a];
+      struct anv_image_view *iview = att_state->image_view;
+
+      assert(util_bitcount(subpass->attachments[i].usage) == 1);
+      if (subpass->attachments[i].usage ==
+          VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
+         /* We assume that if we're ending a subpass, we did do some rendering
+          * so we may end up with compressed data.
+          */
+         genX(cmd_buffer_mark_image_written)(cmd_buffer, iview->image,
+                                             VK_IMAGE_ASPECT_COLOR_BIT,
+                                             att_state->aux_usage,
+                                             iview->planes[0].isl.base_level,
+                                             iview->planes[0].isl.base_array_layer,
+                                             fb->layers);
+      } else if (subpass->attachments[i].usage ==
+                 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
+         /* We may be writing depth or stencil so we need to mark the surface.
+          * Unfortunately, there's no way to know at this point whether the
+          * depth or stencil tests used will actually write to the surface.
+          *
+          * Even though stencil may be plane 1, it always shares a base_level
+          * with depth.
+          */
+         const struct isl_view *ds_view = &iview->planes[0].isl;
+         if (iview->aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) {
+            genX(cmd_buffer_mark_image_written)(cmd_buffer, iview->image,
+                                                VK_IMAGE_ASPECT_DEPTH_BIT,
+                                                att_state->aux_usage,
+                                                ds_view->base_level,
+                                                ds_view->base_array_layer,
+                                                fb->layers);
+         }
+         if (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
+            /* Even though stencil may be plane 1, it always shares a
+             * base_level with depth.
+             */
+            genX(cmd_buffer_mark_image_written)(cmd_buffer, iview->image,
+                                                VK_IMAGE_ASPECT_STENCIL_BIT,
+                                                ISL_AUX_USAGE_NONE,
+                                                ds_view->base_level,
+                                                ds_view->base_array_layer,
+                                                fb->layers);
+         }
+      }
+   }
+
    if (subpass->has_color_resolve) {
       /* We are about to do some MSAA resolves.  We need to flush so that the
        * result of writes to the MSAA color attachments show up in the sampler
@@ -5437,6 +5553,8 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
           * able to handle.
           */
          transition_depth_buffer(cmd_buffer, src_iview->image,
+                                 src_iview->planes[0].isl.base_array_layer,
+                                 fb->layers,
                                  src_state->current_layout,
                                  VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
          src_state->aux_usage =
@@ -5462,6 +5580,8 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
             dst_initial_layout = VK_IMAGE_LAYOUT_UNDEFINED;
 
          transition_depth_buffer(cmd_buffer, dst_iview->image,
+                                 dst_iview->planes[0].isl.base_array_layer,
+                                 fb->layers,
                                  dst_initial_layout,
                                  VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
          dst_state->aux_usage =
@@ -5572,57 +5692,6 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
       struct anv_image_view *iview = cmd_state->attachments[a].image_view;
       const struct anv_image *image = iview->image;
 
-      if ((image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) &&
-          image->vk_format != iview->vk_format) {
-         enum anv_fast_clear_type fast_clear_type =
-            anv_layout_to_fast_clear_type(&cmd_buffer->device->info,
-                                          image, VK_IMAGE_ASPECT_COLOR_BIT,
-                                          att_state->current_layout);
-
-         /* If any clear color was used, flush it down the aux surfaces. If we
-          * don't do it now using the view's format we might use the clear
-          * color incorrectly in the following resolves (for example with an
-          * SRGB view & a UNORM image).
-          */
-         if (fast_clear_type != ANV_FAST_CLEAR_NONE) {
-            anv_perf_warn(cmd_buffer->device, iview,
-                          "Doing a partial resolve to get rid of clear color at the "
-                          "end of a renderpass due to an image/view format mismatch");
-
-            uint32_t base_layer, layer_count;
-            if (image->type == VK_IMAGE_TYPE_3D) {
-               base_layer = 0;
-               layer_count = anv_minify(iview->image->extent.depth,
-                                        iview->planes[0].isl.base_level);
-            } else {
-               base_layer = iview->planes[0].isl.base_array_layer;
-               layer_count = fb->layers;
-            }
-
-            for (uint32_t a = 0; a < layer_count; a++) {
-               uint32_t array_layer = base_layer + a;
-               if (image->samples == 1) {
-                  anv_cmd_predicated_ccs_resolve(cmd_buffer, image,
-                                                 iview->planes[0].isl.format,
-                                                 iview->planes[0].isl.swizzle,
-                                                 VK_IMAGE_ASPECT_COLOR_BIT,
-                                                 iview->planes[0].isl.base_level,
-                                                 array_layer,
-                                                 ISL_AUX_OP_PARTIAL_RESOLVE,
-                                                 ANV_FAST_CLEAR_NONE);
-               } else {
-                  anv_cmd_predicated_mcs_resolve(cmd_buffer, image,
-                                                 iview->planes[0].isl.format,
-                                                 iview->planes[0].isl.swizzle,
-                                                 VK_IMAGE_ASPECT_COLOR_BIT,
-                                                 base_layer,
-                                                 ISL_AUX_OP_PARTIAL_RESOLVE,
-                                                 ANV_FAST_CLEAR_NONE);
-               }
-            }
-         }
-      }
-
       /* Transition the image into the final layout for this render pass */
       VkImageLayout target_layout =
          cmd_state->pass->attachments[a].final_layout;
@@ -5649,6 +5718,7 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
 
       if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
          transition_depth_buffer(cmd_buffer, image,
+                                 base_layer, layer_count,
                                  att_state->current_layout, target_layout);
       }
 
@@ -5678,14 +5748,15 @@ void genX(CmdBeginRenderPass)(
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass);
    ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
+   VkResult result;
 
    cmd_buffer->state.framebuffer = framebuffer;
    cmd_buffer->state.pass = pass;
    cmd_buffer->state.render_area = pRenderPassBegin->renderArea;
-   VkResult result =
-      genX(cmd_buffer_setup_attachments)(cmd_buffer, pass, pRenderPassBegin);
 
-   /* If we failed to setup the attachments we should not try to go further */
+   result = genX(cmd_buffer_setup_attachments)(cmd_buffer, pass,
+                                               framebuffer,
+                                               pRenderPassBegin);
    if (result != VK_SUCCESS) {
       assert(anv_batch_has_error(&cmd_buffer->batch));
       return;