+ /* We only allocate input attachment states for color surfaces. Compression
+ * is not yet enabled for depth textures and stencil doesn't allow
+ * compression so we can just use the texture surface state from the view.
+ */
+ return vk_format_is_color(att->format);
+}
+
+/* Transitions a HiZ-enabled depth buffer from one layout to another. Unless
+ * the initial layout is undefined, the HiZ buffer and depth buffer will
+ * represent the same data at the end of this operation.
+ */
+static void
+transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
+ const struct anv_image *image,
+ VkImageLayout initial_layout,
+ VkImageLayout final_layout)
+{
+ assert(image);
+
+ /* A transition is a no-op if HiZ is not enabled, or if the initial and
+ * final layouts are equal.
+ *
+ * The undefined layout indicates that the user doesn't care about the data
+ * that's currently in the buffer. Therefore, a data-preserving resolve
+ * operation is not needed.
+ */
+ if (image->aux_usage != ISL_AUX_USAGE_HIZ || initial_layout == final_layout)
+ return;
+
+ const bool hiz_enabled = ISL_AUX_USAGE_HIZ ==
+ anv_layout_to_aux_usage(&cmd_buffer->device->info, image, image->aspects,
+ initial_layout);
+ const bool enable_hiz = ISL_AUX_USAGE_HIZ ==
+ anv_layout_to_aux_usage(&cmd_buffer->device->info, image, image->aspects,
+ final_layout);
+
+ enum blorp_hiz_op hiz_op;
+ if (hiz_enabled && !enable_hiz) {
+ hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
+ } else if (!hiz_enabled && enable_hiz) {
+ hiz_op = BLORP_HIZ_OP_HIZ_RESOLVE;
+ } else {
+ assert(hiz_enabled == enable_hiz);
+ /* If the same buffer will be used, no resolves are necessary. */
+ hiz_op = BLORP_HIZ_OP_NONE;
+ }
+
+ if (hiz_op != BLORP_HIZ_OP_NONE)
+ anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
+}
+
+enum fast_clear_state_field {
+ FAST_CLEAR_STATE_FIELD_CLEAR_COLOR,
+ FAST_CLEAR_STATE_FIELD_NEEDS_RESOLVE,
+};
+
+static inline uint32_t
+get_fast_clear_state_offset(const struct anv_device *device,
+ const struct anv_image *image,
+ unsigned level, enum fast_clear_state_field field)
+{
+ assert(device && image);
+ assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(level < anv_image_aux_levels(image));
+ uint32_t offset = image->offset + image->aux_surface.offset +
+ image->aux_surface.isl.size +
+ anv_fast_clear_state_entry_size(device) * level;
+
+ switch (field) {
+ case FAST_CLEAR_STATE_FIELD_NEEDS_RESOLVE:
+ offset += device->isl_dev.ss.clear_value_size;
+ /* Fall-through */
+ case FAST_CLEAR_STATE_FIELD_CLEAR_COLOR:
+ break;
+ }
+
+ assert(offset < image->offset + image->size);
+ return offset;
+}
+
+#define MI_PREDICATE_SRC0 0x2400
+#define MI_PREDICATE_SRC1 0x2408
+
+/* Manages the state of an color image subresource to ensure resolves are
+ * performed properly.
+ */
+static void
+genX(set_image_needs_resolve)(struct anv_cmd_buffer *cmd_buffer,
+ const struct anv_image *image,
+ unsigned level, bool needs_resolve)
+{
+ assert(cmd_buffer && image);
+ assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(level < anv_image_aux_levels(image));
+
+ const uint32_t resolve_flag_offset =
+ get_fast_clear_state_offset(cmd_buffer->device, image, level,
+ FAST_CLEAR_STATE_FIELD_NEEDS_RESOLVE);
+
+ /* The HW docs say that there is no way to guarantee the completion of
+ * the following command. We use it nevertheless because it shows no
+ * issues in testing is currently being used in the GL driver.
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
+ sdi.Address = (struct anv_address) { image->bo, resolve_flag_offset };
+ sdi.ImmediateData = needs_resolve;
+ }
+}
+
+static void
+genX(load_needs_resolve_predicate)(struct anv_cmd_buffer *cmd_buffer,
+ const struct anv_image *image,
+ unsigned level)
+{
+ assert(cmd_buffer && image);
+ assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(level < anv_image_aux_levels(image));
+
+ const uint32_t resolve_flag_offset =
+ get_fast_clear_state_offset(cmd_buffer->device, image, level,
+ FAST_CLEAR_STATE_FIELD_NEEDS_RESOLVE);
+
+ /* Make the pending predicated resolve a no-op if one is not needed.
+ * predicate = do_resolve = resolve_flag != 0;
+ */
+ emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 , 0);
+ emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
+ emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 , 0);
+ emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,
+ image->bo, resolve_flag_offset);
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
+ mip.LoadOperation = LOAD_LOADINV;
+ mip.CombineOperation = COMBINE_SET;
+ mip.CompareOperation = COMPARE_SRCS_EQUAL;
+ }
+}
+
+static void
+init_fast_clear_state_entry(struct anv_cmd_buffer *cmd_buffer,
+ const struct anv_image *image,
+ unsigned level)
+{
+ assert(cmd_buffer && image);
+ assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(level < anv_image_aux_levels(image));
+
+ /* The resolve flag should updated to signify that fast-clear/compression
+ * data needs to be removed when leaving the undefined layout. Such data
+ * may need to be removed if it would cause accesses to the color buffer
+ * to return incorrect data. The fast clear data in CCS_D buffers should
+ * be removed because CCS_D isn't enabled all the time.
+ */
+ genX(set_image_needs_resolve)(cmd_buffer, image, level,
+ image->aux_usage == ISL_AUX_USAGE_NONE);
+
+ /* The fast clear value dword(s) will be copied into a surface state object.
+ * Ensure that the restrictions of the fields in the dword(s) are followed.
+ *
+ * CCS buffers on SKL+ can have any value set for the clear colors.
+ */
+ if (image->samples == 1 && GEN_GEN >= 9)
+ return;
+
+ /* Other combinations of auxiliary buffers and platforms require specific
+ * values in the clear value dword(s).
+ */
+ unsigned i = 0;
+ for (; i < cmd_buffer->device->isl_dev.ss.clear_value_size; i += 4) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
+ const uint32_t entry_offset =
+ get_fast_clear_state_offset(cmd_buffer->device, image, level,
+ FAST_CLEAR_STATE_FIELD_CLEAR_COLOR);
+ sdi.Address = (struct anv_address) { image->bo, entry_offset + i };
+
+ if (GEN_GEN >= 9) {
+ /* MCS buffers on SKL+ can only have 1/0 clear colors. */
+ assert(image->aux_usage == ISL_AUX_USAGE_MCS);
+ sdi.ImmediateData = 0;
+ } else if (GEN_VERSIONx10 >= 75) {
+ /* Pre-SKL, the dword containing the clear values also contains
+ * other fields, so we need to initialize those fields to match the
+ * values that would be in a color attachment.
+ */
+ assert(i == 0);
+ sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 |
+ ISL_CHANNEL_SELECT_GREEN << 22 |
+ ISL_CHANNEL_SELECT_BLUE << 19 |
+ ISL_CHANNEL_SELECT_ALPHA << 16;
+ } else if (GEN_VERSIONx10 == 70) {
+ /* On IVB, the dword containing the clear values also contains
+ * other fields that must be zero or can be zero.
+ */
+ assert(i == 0);
+ sdi.ImmediateData = 0;
+ }
+ }
+ }
+}
+
+/* Copy the fast-clear value dword(s) between a surface state object and an
+ * image's fast clear state buffer.
+ */
+static void
+genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_state surface_state,
+ const struct anv_image *image,
+ unsigned level,
+ bool copy_from_surface_state)
+{
+ assert(cmd_buffer && image);
+ assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(level < anv_image_aux_levels(image));
+
+ struct anv_bo *ss_bo =
+ &cmd_buffer->device->surface_state_pool.block_pool.bo;
+ uint32_t ss_clear_offset = surface_state.offset +
+ cmd_buffer->device->isl_dev.ss.clear_value_offset;
+ uint32_t entry_offset =
+ get_fast_clear_state_offset(cmd_buffer->device, image, level,
+ FAST_CLEAR_STATE_FIELD_CLEAR_COLOR);
+ unsigned copy_size = cmd_buffer->device->isl_dev.ss.clear_value_size;
+
+ if (copy_from_surface_state) {
+ genX(cmd_buffer_mi_memcpy)(cmd_buffer, image->bo, entry_offset,
+ ss_bo, ss_clear_offset, copy_size);
+ } else {
+ genX(cmd_buffer_mi_memcpy)(cmd_buffer, ss_bo, ss_clear_offset,
+ image->bo, entry_offset, copy_size);
+
+ /* Updating a surface state object may require that the state cache be
+ * invalidated. From the SKL PRM, Shared Functions -> State -> State
+ * Caching:
+ *
+ * Whenever the RENDER_SURFACE_STATE object in memory pointed to by
+ * the Binding Table Pointer (BTP) and Binding Table Index (BTI) is
+ * modified [...], the L1 state cache must be invalidated to ensure
+ * the new surface or sampler state is fetched from system memory.
+ *
+ * In testing, SKL doesn't actually seem to need this, but HSW does.
+ */
+ cmd_buffer->state.pending_pipe_bits |=
+ ANV_PIPE_STATE_CACHE_INVALIDATE_BIT;
+ }
+}
+
+/**
+ * @brief Transitions a color buffer from one layout to another.
+ *
+ * See section 6.1.1. Image Layout Transitions of the Vulkan 1.0.50 spec for
+ * more information.
+ *
+ * @param level_count VK_REMAINING_MIP_LEVELS isn't supported.
+ * @param layer_count VK_REMAINING_ARRAY_LAYERS isn't supported. For 3D images,
+ * this represents the maximum layers to transition at each
+ * specified miplevel.
+ */
+static void
+transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
+ const struct anv_image *image,
+ const uint32_t base_level, uint32_t level_count,
+ uint32_t base_layer, uint32_t layer_count,
+ VkImageLayout initial_layout,
+ VkImageLayout final_layout)
+{
+ /* Validate the inputs. */
+ assert(cmd_buffer);
+ assert(image && image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+ /* These values aren't supported for simplicity's sake. */
+ assert(level_count != VK_REMAINING_MIP_LEVELS &&
+ layer_count != VK_REMAINING_ARRAY_LAYERS);
+ /* Ensure the subresource range is valid. */
+ uint64_t last_level_num = base_level + level_count;
+ const uint32_t max_depth = anv_minify(image->extent.depth, base_level);
+ UNUSED const uint32_t image_layers = MAX2(image->array_size, max_depth);
+ assert((uint64_t)base_layer + layer_count <= image_layers);
+ assert(last_level_num <= image->levels);
+ /* The spec disallows these final layouts. */
+ assert(final_layout != VK_IMAGE_LAYOUT_UNDEFINED &&
+ final_layout != VK_IMAGE_LAYOUT_PREINITIALIZED);
+
+ /* No work is necessary if the layout stays the same or if this subresource
+ * range lacks auxiliary data.
+ */
+ if (initial_layout == final_layout ||
+ base_layer >= anv_image_aux_layers(image, base_level))
+ return;
+
+ /* A transition of a 3D subresource works on all slices at a time. */
+ if (image->type == VK_IMAGE_TYPE_3D) {
+ base_layer = 0;
+ layer_count = anv_minify(image->extent.depth, base_level);
+ }
+
+ /* We're interested in the subresource range subset that has aux data. */
+ level_count = MIN2(level_count, anv_image_aux_levels(image) - base_level);
+ layer_count = MIN2(layer_count,
+ anv_image_aux_layers(image, base_level) - base_layer);
+ last_level_num = base_level + level_count;
+
+ /* Record whether or not the layout is undefined. Pre-initialized images
+ * with auxiliary buffers have a non-linear layout and are thus undefined.
+ */
+ assert(image->tiling == VK_IMAGE_TILING_OPTIMAL);
+ const bool undef_layout = initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
+ initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
+
+ /* Do preparatory work before the resolve operation or return early if no
+ * resolve is actually needed.
+ */
+ if (undef_layout) {
+ /* A subresource in the undefined layout may have been aliased and
+ * populated with any arrangement of bits. Therefore, we must initialize
+ * the related aux buffer and clear buffer entry with desirable values.
+ *
+ * Initialize the relevant clear buffer entries.
+ */
+ for (unsigned level = base_level; level < last_level_num; level++)
+ init_fast_clear_state_entry(cmd_buffer, image, level);
+
+ /* Initialize the aux buffers to enable correct rendering. This operation
+ * requires up to two steps: one to rid the aux buffer of data that may
+ * cause GPU hangs, and another to ensure that writes done without aux
+ * will be visible to reads done with aux.
+ *
+ * Having an aux buffer with invalid data is possible for CCS buffers
+ * SKL+ and for MCS buffers with certain sample counts (2x and 8x). One
+ * easy way to get to a valid state is to fast-clear the specified range.
+ *
+ * Even for MCS buffers that have sample counts that don't require
+ * certain bits to be reserved (4x and 8x), we're unsure if the hardware
+ * will be okay with the sample mappings given by the undefined buffer.
+ * We don't have any data to show that this is a problem, but we want to
+ * avoid causing difficult-to-debug problems.
+ */
+ if ((GEN_GEN >= 9 && image->samples == 1) || image->samples > 1) {
+ if (image->samples == 4 || image->samples == 16) {
+ anv_perf_warn(cmd_buffer->device->instance, image,
+ "Doing a potentially unnecessary fast-clear to "
+ "define an MCS buffer.");
+ }
+
+ anv_image_fast_clear(cmd_buffer, image, base_level, level_count,
+ base_layer, layer_count);
+ }
+ /* At this point, some elements of the CCS buffer may have the fast-clear
+ * bit-arrangement. As the user writes to a subresource, we need to have
+ * the associated CCS elements enter the ambiguated state. This enables
+ * reads (implicit or explicit) to reflect the user-written data instead
+ * of the clear color. The only time such elements will not change their
+ * state as described above, is in a final layout that doesn't have CCS
+ * enabled. In this case, we must force the associated CCS buffers of the
+ * specified range to enter the ambiguated state in advance.
+ */
+ if (image->samples == 1 && image->aux_usage != ISL_AUX_USAGE_CCS_E &&
+ final_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
+ /* The CCS_D buffer may not be enabled in the final layout. Continue
+ * executing this function to perform a resolve.
+ */
+ anv_perf_warn(cmd_buffer->device->instance, image,
+ "Performing an additional resolve for CCS_D layout "
+ "transition. Consider always leaving it on or "
+ "performing an ambiguation pass.");
+ } else {
+ /* Writes in the final layout will be aware of the auxiliary buffer.
+ * In addition, the clear buffer entries and the auxiliary buffers
+ * have been populated with values that will result in correct
+ * rendering.
+ */
+ return;
+ }
+ } else if (initial_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
+ /* Resolves are only necessary if the subresource may contain blocks
+ * fast-cleared to values unsupported in other layouts. This only occurs
+ * if the initial layout is COLOR_ATTACHMENT_OPTIMAL.
+ */
+ return;
+ } else if (image->samples > 1) {
+ /* MCS buffers don't need resolving. */
+ return;
+ }
+
+ /* Perform a resolve to synchronize data between the main and aux buffer.
+ * Before we begin, we must satisfy the cache flushing requirement specified
+ * in the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)":
+ *
+ * Any transition from any value in {Clear, Render, Resolve} to a
+ * different value in {Clear, Render, Resolve} requires end of pipe
+ * synchronization.
+ *
+ * We perform a flush of the write cache before and after the clear and
+ * resolve operations to meet this requirement.
+ *
+ * Unlike other drawing, fast clear operations are not properly
+ * synchronized. The first PIPE_CONTROL here likely ensures that the
+ * contents of the previous render or clear hit the render target before we
+ * resolve and the second likely ensures that the resolve is complete before
+ * we do any more rendering or clearing.