From 34541be7b04d76c5589600553995467daca6c30d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 3 Feb 2018 09:12:15 -0800 Subject: [PATCH] intel/blorp: Use wide formats for nicely aligned stencil clears In the case where the stencil clear is nicely aligned, we can clear stencil much more efficiently by mapping it as a wide format (say RGBA32_UINT) and blasting out the stencil clear value with a repclear. On Unigine Heaven, this makes one stencil clear go from non-trivial to unnoticeable when looking at per-draw timings. In order for this change to work properly, ANV needs to do a bit more flushing around depth and stencil clears. i965 and iris already have the cache tracking logic to handle this so no changes are required there. Reviewed-by: Kenneth Graunke Reviewed-by: Lionel Landwerlin --- src/intel/blorp/blorp_clear.c | 108 ++++++++++++++++++++++++++++++++++ src/intel/vulkan/anv_blorp.c | 14 +++++ 2 files changed, 122 insertions(+) diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c index ba103c8377e..595bac18e61 100644 --- a/src/intel/blorp/blorp_clear.c +++ b/src/intel/blorp/blorp_clear.c @@ -565,6 +565,107 @@ blorp_clear(struct blorp_batch *batch, } } +static bool +blorp_clear_stencil_as_rgba(struct blorp_batch *batch, + const struct blorp_surf *surf, + uint32_t level, uint32_t start_layer, + uint32_t num_layers, + uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, + uint8_t stencil_mask, uint8_t stencil_value) +{ + /* We only support separate W-tiled stencil for now */ + if (surf->surf->format != ISL_FORMAT_R8_UINT || + surf->surf->tiling != ISL_TILING_W) + return false; + + /* Stencil mask support would require piles of shader magic */ + if (stencil_mask != 0xff) + return false; + + if (surf->surf->samples > 1) { + /* Adjust x0, y0, x1, and y1 to be in units of samples */ + assert(surf->surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED); + struct isl_extent2d msaa_px_size_sa = + isl_get_interleaved_msaa_px_size_sa(surf->surf->samples); + + x0 *= msaa_px_size_sa.w; + y0 *= msaa_px_size_sa.h; + x1 *= msaa_px_size_sa.w; + y1 *= msaa_px_size_sa.h; + } + + /* W-tiles and Y-tiles have the same layout as far as cache lines are + * concerned: both are 8x8 cache lines laid out Y-major. The difference is + * entirely in how the data is arranged withing the cache line. W-tiling + * is 8x8 pixels in a swizzled pattern while Y-tiling is 16B by 4 rows + * regardless of image format size. As long as everything is aligned to 8, + * we can just treat the W-tiled image as Y-tiled, ignore the layout + * difference within a cache line, and blast out data. + */ + if (x0 % 8 != 0 || y0 % 8 != 0 || x1 % 8 != 0 || y1 % 8 != 0) + return false; + + struct blorp_params params; + blorp_params_init(¶ms); + + if (!blorp_params_get_clear_kernel(batch, ¶ms, true, false)) + return false; + + memset(¶ms.wm_inputs.clear_color, stencil_value, + sizeof(params.wm_inputs.clear_color)); + + /* The Sandy Bridge PRM Vol. 4 Pt. 2, section 2.11.2.1.1 has the + * following footnote to the format table: + * + * 128 BPE Formats cannot be Tiled Y when used as render targets + * + * We have to use RGBA16_UINT on SNB. + */ + enum isl_format wide_format; + if (ISL_DEV_GEN(batch->blorp->isl_dev) <= 6) { + wide_format = ISL_FORMAT_R16G16B16A16_UINT; + + /* For RGBA16_UINT, we need to mask the stencil value otherwise, we risk + * clamping giving us the wrong values + */ + for (unsigned i = 0; i < 4; i++) + params.wm_inputs.clear_color[i] &= 0xffff; + } else { + wide_format = ISL_FORMAT_R32G32B32A32_UINT; + } + + for (uint32_t a = 0; a < num_layers; a++) { + uint32_t layer = start_layer + a; + + brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, level, + layer, ISL_FORMAT_UNSUPPORTED, true); + + if (surf->surf->samples > 1) + blorp_surf_fake_interleaved_msaa(batch->blorp->isl_dev, ¶ms.dst); + + /* Make it Y-tiled */ + blorp_surf_retile_w_to_y(batch->blorp->isl_dev, ¶ms.dst); + + unsigned wide_Bpp = + isl_format_get_layout(wide_format)->bpb / 8; + + params.dst.view.format = params.dst.surf.format = wide_format; + assert(params.dst.surf.logical_level0_px.width % wide_Bpp == 0); + params.dst.surf.logical_level0_px.width /= wide_Bpp; + assert(params.dst.tile_x_sa % wide_Bpp == 0); + params.dst.tile_x_sa /= wide_Bpp; + + params.x0 = params.dst.tile_x_sa + x0 / (wide_Bpp / 2); + params.y0 = params.dst.tile_y_sa + y0 / 2; + params.x1 = params.dst.tile_x_sa + x1 / (wide_Bpp / 2); + params.y1 = params.dst.tile_y_sa + y1 / 2; + + batch->blorp->exec(batch, ¶ms); + } + + return true; +} + void blorp_clear_depth_stencil(struct blorp_batch *batch, const struct blorp_surf *depth, @@ -575,6 +676,13 @@ blorp_clear_depth_stencil(struct blorp_batch *batch, bool clear_depth, float depth_value, uint8_t stencil_mask, uint8_t stencil_value) { + if (!clear_depth && blorp_clear_stencil_as_rgba(batch, stencil, level, + start_layer, num_layers, + x0, y0, x1, y1, + stencil_mask, + stencil_value)) + return; + struct blorp_params params; blorp_params_init(¶ms); diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 17f0ffad576..2cab98dbe16 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1527,6 +1527,13 @@ anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer, ISL_AUX_USAGE_NONE, &stencil); } + /* Blorp may choose to clear stencil using RGBA32_UINT for better + * performance. If it does this, we need to flush it out of the depth + * cache before rendering to it. + */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; + blorp_clear_depth_stencil(&batch, &depth, &stencil, level, base_layer, layer_count, area.offset.x, area.offset.y, @@ -1537,6 +1544,13 @@ anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer, (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0, stencil_value); + /* Blorp may choose to clear stencil using RGBA32_UINT for better + * performance. If it does this, we need to flush it out of the render + * cache before someone starts trying to do stencil on it. + */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; + struct blorp_surf stencil_shadow; if ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image, -- 2.30.2