From 5e8069a5726ec62ce21d1d708cb6e82ae628de14 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Nov 2016 22:55:30 -0800 Subject: [PATCH] anv: Add support for fast clears on gen9 Reviewed-by: Jordan Justen --- src/intel/vulkan/anv_blorp.c | 102 +++++++++++++++++++++++++---- src/intel/vulkan/anv_private.h | 3 + src/intel/vulkan/genX_cmd_buffer.c | 100 +++++++++++++++++++++++----- 3 files changed, 176 insertions(+), 29 deletions(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 4b639e4a139..159e4a01d01 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1193,16 +1193,35 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) struct blorp_surf surf; get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, att_state->aux_usage, &surf); + surf.clear_color = vk_to_isl_color(att_state->clear_value.color); const VkRect2D render_area = cmd_buffer->state.render_area; - blorp_clear(&batch, &surf, iview->isl.format, iview->isl.swizzle, - iview->isl.base_level, - iview->isl.base_array_layer, fb->layers, - render_area.offset.x, render_area.offset.y, - render_area.offset.x + render_area.extent.width, - render_area.offset.y + render_area.extent.height, - vk_to_isl_color(att_state->clear_value.color), NULL); + if (att_state->fast_clear) { + blorp_fast_clear(&batch, &surf, iview->isl.format, + iview->isl.base_level, + iview->isl.base_array_layer, fb->layers, + render_area.offset.x, render_area.offset.y, + render_area.offset.x + render_area.extent.width, + render_area.offset.y + render_area.extent.height); + + /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": + * + * "After Render target fast clear, pipe-control with color cache + * write-flush must be issued before sending any DRAW commands on + * that render target." + */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; + } else { + blorp_clear(&batch, &surf, iview->isl.format, iview->isl.swizzle, + iview->isl.base_level, + iview->isl.base_array_layer, fb->layers, + render_area.offset.x, render_area.offset.y, + render_area.offset.x + render_area.extent.width, + render_area.offset.y + render_area.extent.height, + surf.clear_color, NULL); + } att_state->pending_clear_aspects = 0; } @@ -1313,10 +1332,12 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer, struct anv_attachment_state *att_state = &cmd_buffer->state.attachments[att]; - assert(att_state->aux_usage != ISL_AUX_USAGE_CCS_D); - if (att_state->aux_usage != ISL_AUX_USAGE_CCS_E) + if (att_state->aux_usage == ISL_AUX_USAGE_NONE) return; /* Nothing to resolve */ + assert(att_state->aux_usage == ISL_AUX_USAGE_CCS_E || + att_state->aux_usage == ISL_AUX_USAGE_CCS_D); + struct anv_render_pass *pass = cmd_buffer->state.pass; struct anv_subpass *subpass = cmd_buffer->state.subpass; unsigned subpass_idx = subpass - pass->subpasses; @@ -1327,14 +1348,17 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer, * of a particular attachment. That way we only resolve once but it's * still hot in the cache. */ + bool found_draw = false; + enum anv_subpass_usage usage = 0; for (uint32_t s = subpass_idx + 1; s < pass->subpass_count; s++) { - enum anv_subpass_usage usage = pass->attachments[att].subpass_usage[s]; + usage |= pass->attachments[att].subpass_usage[s]; if (usage & (ANV_SUBPASS_USAGE_DRAW | ANV_SUBPASS_USAGE_RESOLVE_DST)) { /* We found another subpass that draws to this attachment. We'll * wait to resolve until then. */ - return; + found_draw = true; + break; } } @@ -1342,12 +1366,60 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image = iview->image; assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); - if (image->aux_usage == ISL_AUX_USAGE_CCS_E) + enum blorp_fast_clear_op resolve_op = BLORP_FAST_CLEAR_OP_NONE; + if (!found_draw) { + /* This is the last subpass that writes to this attachment so we need to + * resolve here. Ideally, we would like to only resolve if the storeOp + * is set to VK_ATTACHMENT_STORE_OP_STORE. However, we need to ensure + * that the CCS bits are set to "resolved" because there may be copy or + * blit operations (which may ignore CCS) between now and the next time + * we render and we need to ensure that anything they write will be + * respected in the next render. Unfortunately, the hardware does not + * provide us with any sort of "invalidate" pass that sets the CCS to + * "resolved" without writing to the render target. + */ + if (iview->image->aux_usage != ISL_AUX_USAGE_CCS_E) { + /* The image destination surface doesn't support compression outside + * the render pass. We need a full resolve. + */ + resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL; + } else if (att_state->fast_clear) { + /* We don't know what to do with clear colors outside the render + * pass. We need a partial resolve. + */ + resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL; + } else { + /* The image "natively" supports all the compression we care about + * and we don't need to resolve at all. If this is the case, we also + * don't need to resolve for any of the input attachment cases below. + */ + } + } else if (usage & ANV_SUBPASS_USAGE_INPUT) { + /* Input attachments are clear-color aware so, at least on Sky Lake, we + * can frequently sample from them with no resolves at all. + */ + if (att_state->aux_usage != att_state->input_aux_usage) { + assert(att_state->input_aux_usage == ISL_AUX_USAGE_NONE); + resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL; + } else if (!att_state->clear_color_is_zero_one) { + /* Sky Lake PRM, Vol. 2d, RENDER_SURFACE_STATE::Red Clear Color: + * + * "If Number of Multisamples is MULTISAMPLECOUNT_1 AND if this RT + * is fast cleared with non-0/1 clear value, this RT must be + * partially resolved (refer to Partial Resolve operation) before + * binding this surface to Sampler." + */ + resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL; + } + } + + if (resolve_op == BLORP_FAST_CLEAR_OP_NONE) return; struct blorp_surf surf; get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, att_state->aux_usage, &surf); + surf.clear_color = vk_to_isl_color(att_state->clear_value.color); /* From the Sky Lake PRM Vol. 7, "Render Target Resolve": * @@ -1368,12 +1440,14 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer, blorp_ccs_resolve(batch, &surf, iview->isl.base_level, iview->isl.base_array_layer + layer, - iview->isl.format, - BLORP_FAST_CLEAR_OP_RESOLVE_FULL); + iview->isl.format, resolve_op); } cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; + + /* Once we've done any sort of resolve, we're no longer fast-cleared */ + att_state->fast_clear = false; } void diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 7931d4bb790..2fc543daacc 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1100,11 +1100,14 @@ void anv_dynamic_state_copy(struct anv_dynamic_state *dest, */ struct anv_attachment_state { enum isl_aux_usage aux_usage; + enum isl_aux_usage input_aux_usage; struct anv_state color_rt_state; struct anv_state input_att_state; VkImageAspectFlags pending_clear_aspects; + bool fast_clear; VkClearValue clear_value; + bool clear_color_is_zero_one; }; /** State required while building cmd buffer */ diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 7aaa8a1cffe..4649dae1a11 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -191,23 +191,87 @@ add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer, } } -static enum isl_aux_usage -fb_attachment_get_aux_usage(struct anv_device *device, - struct anv_framebuffer *fb, - uint32_t attachment) +static bool +color_is_zero_one(VkClearColorValue value, enum isl_format format) { - struct anv_image_view *iview = fb->attachments[attachment]; + if (isl_format_has_int_channel(format)) { + for (unsigned i = 0; i < 4; i++) { + if (value.int32[i] != 0 && value.int32[i] != 1) + return false; + } + } else { + for (unsigned i = 0; i < 4; i++) { + if (value.float32[i] != 0.0f && value.float32[i] != 1.0f) + return false; + } + } - if (iview->image->aux_surface.isl.size == 0) - return ISL_AUX_USAGE_NONE; /* No aux surface */ + return true; +} + +static void +color_attachment_compute_aux_usage(struct anv_device *device, + struct anv_attachment_state *att_state, + struct anv_image_view *iview, + VkRect2D render_area, + union isl_color_value *fast_clear_color) +{ + if (iview->image->aux_surface.isl.size == 0) { + att_state->aux_usage = ISL_AUX_USAGE_NONE; + att_state->input_aux_usage = ISL_AUX_USAGE_NONE; + att_state->fast_clear = false; + return; + } assert(iview->image->aux_surface.isl.usage & ISL_SURF_USAGE_CCS_BIT); - if (isl_format_supports_lossless_compression(&device->info, - iview->isl.format)) - return ISL_AUX_USAGE_CCS_E; + att_state->clear_color_is_zero_one = + color_is_zero_one(att_state->clear_value.color, iview->isl.format); + + if (att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT) { + /* Start off assuming fast clears are possible */ + att_state->fast_clear = true; - return ISL_AUX_USAGE_NONE; + /* Potentially, we could do partial fast-clears but doing so has crazy + * alignment restrictions. It's easier to just restrict to full size + * fast clears for now. + */ + if (render_area.offset.x != 0 || + render_area.offset.y != 0 || + render_area.extent.width != iview->extent.width || + render_area.extent.height != iview->extent.height) + att_state->fast_clear = false; + + if (att_state->fast_clear) { + memcpy(fast_clear_color->u32, att_state->clear_value.color.uint32, + sizeof(fast_clear_color->u32)); + } + } else { + att_state->fast_clear = false; + } + + if (isl_format_supports_lossless_compression(&device->info, + iview->isl.format)) { + att_state->aux_usage = ISL_AUX_USAGE_CCS_E; + att_state->input_aux_usage = ISL_AUX_USAGE_CCS_E; + } else if (att_state->fast_clear) { + att_state->aux_usage = ISL_AUX_USAGE_CCS_D; + /* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode: + * + * "If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D + * setting is only allowed if Surface Format supported for Fast + * Clear. In addition, if the surface is bound to the sampling + * engine, Surface Format must be supported for Render Target + * Compression for surfaces bound to the sampling engine." + * + * In other words, we can't sample from a fast-cleared image if it + * doesn't also support color compression. + */ + att_state->input_aux_usage = ISL_AUX_USAGE_NONE; + } else { + att_state->aux_usage = ISL_AUX_USAGE_NONE; + att_state->input_aux_usage = ISL_AUX_USAGE_NONE; + } } static bool @@ -350,9 +414,12 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer, struct anv_image_view *iview = framebuffer->attachments[i]; assert(iview->vk_format == att->format); + union isl_color_value clear_color = { .u32 = { 0, } }; if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) { - state->attachments[i].aux_usage = - fb_attachment_get_aux_usage(cmd_buffer->device, framebuffer, i); + color_attachment_compute_aux_usage(cmd_buffer->device, + &state->attachments[i], + iview, begin->renderArea, + &clear_color); struct isl_view view = iview->isl; view.usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; @@ -362,6 +429,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer, .view = &view, .aux_surf = &iview->image->aux_surface.isl, .aux_usage = state->attachments[i].aux_usage, + .clear_color = clear_color, .mocs = cmd_buffer->device->default_mocs); add_image_view_relocs(cmd_buffer, iview, @@ -369,6 +437,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer, state->attachments[i].color_rt_state); } else { state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE; + state->attachments[i].input_aux_usage = ISL_AUX_USAGE_NONE; } if (need_input_attachment_state(&pass->attachments[i])) { @@ -386,11 +455,12 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer, .surf = surf, .view = &view, .aux_surf = &iview->image->aux_surface.isl, - .aux_usage = state->attachments[i].aux_usage, + .aux_usage = state->attachments[i].input_aux_usage, + .clear_color = clear_color, .mocs = cmd_buffer->device->default_mocs); add_image_view_relocs(cmd_buffer, iview, - state->attachments[i].aux_usage, + state->attachments[i].input_aux_usage, state->attachments[i].input_att_state); } } -- 2.30.2