From 857b5b5a7f7741b5433c59814c66d0612c05ae7d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 17 May 2017 20:33:21 -0700 Subject: [PATCH] intel/blorp: Add a CCS ambiguation pass This pass performs an "ambiguate" operation on a CCS-compressed surface by manually writing zeros into the CCS. On gen8+, ISL gives us a fairly detailed notion of how the CCS is laid out so this is fairly simple to do. On gen7, the CCS tiling is quite crazy but that isn't an issue because we can only do CCS on single-slice images so we can just blast over the entire CCS buffer if we want to. Reviewed-by: Topi Pohjolainen Reviewed-by: Nanley Chery --- src/intel/blorp/blorp.h | 5 ++ src/intel/blorp/blorp_clear.c | 153 ++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h index a1dd57118bb..478a9af5ed1 100644 --- a/src/intel/blorp/blorp.h +++ b/src/intel/blorp/blorp.h @@ -203,6 +203,11 @@ blorp_ccs_resolve(struct blorp_batch *batch, enum isl_format format, enum blorp_fast_clear_op resolve_op); +void +blorp_ccs_ambiguate(struct blorp_batch *batch, + struct blorp_surf *surf, + uint32_t level, uint32_t layer); + void blorp_mcs_partial_resolve(struct blorp_batch *batch, struct blorp_surf *surf, diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c index 8e7bc9f76a5..165dbca8c79 100644 --- a/src/intel/blorp/blorp_clear.c +++ b/src/intel/blorp/blorp_clear.c @@ -881,3 +881,156 @@ blorp_mcs_partial_resolve(struct blorp_batch *batch, batch->blorp->exec(batch, ¶ms); } + +/** Clear a CCS to the "uncompressed" state + * + * This pass is the CCS equivalent of a "HiZ resolve". It sets the CCS values + * for a given layer/level of a surface to 0x0 which is the "uncompressed" + * state which tells the sampler to go look at the main surface. + */ +void +blorp_ccs_ambiguate(struct blorp_batch *batch, + struct blorp_surf *surf, + uint32_t level, uint32_t layer) +{ + struct blorp_params params; + blorp_params_init(¶ms); + + assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7); + + const struct isl_format_layout *aux_fmtl = + isl_format_get_layout(surf->aux_surf->format); + assert(aux_fmtl->txc == ISL_TXC_CCS); + + params.dst = (struct brw_blorp_surface_info) { + .enabled = true, + .addr = surf->aux_addr, + .view = { + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT, + .format = ISL_FORMAT_R32G32B32A32_UINT, + .base_level = 0, + .base_array_layer = 0, + .levels = 1, + .array_len = 1, + .swizzle = ISL_SWIZZLE_IDENTITY, + }, + }; + + uint32_t z = 0; + if (surf->surf->dim == ISL_SURF_DIM_3D) { + z = layer; + layer = 0; + } + + uint32_t offset_B, x_offset_el, y_offset_el; + isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z, + &x_offset_el, &y_offset_el); + isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling, aux_fmtl->bpb, + surf->aux_surf->row_pitch, + x_offset_el, y_offset_el, + &offset_B, &x_offset_el, &y_offset_el); + params.dst.addr.offset += offset_B; + + const uint32_t width_px = + minify(surf->aux_surf->logical_level0_px.width, level); + const uint32_t height_px = + minify(surf->aux_surf->logical_level0_px.height, level); + const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw); + const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh); + + struct isl_tile_info ccs_tile_info; + isl_surf_get_tile_info(surf->aux_surf, &ccs_tile_info); + + /* We're going to map it as a regular RGBA32_UINT surface. We need to + * downscale a good deal. We start by computing the area on the CCS to + * clear in units of Y-tiled cache lines. + */ + uint32_t x_offset_cl, y_offset_cl, width_cl, height_cl; + if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) { + /* From the Sky Lake PRM Vol. 12 in the section on planes: + * + * "The Color Control Surface (CCS) contains the compression status + * of the cache-line pairs. The compression state of the cache-line + * pair is specified by 2 bits in the CCS. Each CCS cache-line + * represents an area on the main surface of 16x16 sets of 128 byte + * Y-tiled cache-line-pairs. CCS is always Y tiled." + * + * Each 2-bit surface element in the CCS corresponds to a single + * cache-line pair in the main surface. This means that 16x16 el block + * in the CCS maps to a Y-tiled cache line. Fortunately, CCS layouts + * are calculated with a very large alignment so we can round up to a + * whole cache line without worrying about overdraw. + */ + + /* On Broadwell and above, a CCS tile is the same as a Y tile when + * viewed at the cache-line granularity. Fortunately, the horizontal + * and vertical alignment requirements of the CCS are such that we can + * align to an entire cache line without worrying about crossing over + * from one LOD to another. + */ + const uint32_t x_el_per_cl = ccs_tile_info.logical_extent_el.w / 8; + const uint32_t y_el_per_cl = ccs_tile_info.logical_extent_el.h / 8; + assert(surf->aux_surf->image_alignment_el.w % x_el_per_cl == 0); + assert(surf->aux_surf->image_alignment_el.h % y_el_per_cl == 0); + + assert(x_offset_el % x_el_per_cl == 0); + assert(y_offset_el % y_el_per_cl == 0); + x_offset_cl = x_offset_el / x_el_per_cl; + y_offset_cl = y_offset_el / y_el_per_cl; + width_cl = DIV_ROUND_UP(width_el, x_el_per_cl); + height_cl = DIV_ROUND_UP(height_el, y_el_per_cl); + } else { + /* On gen7, the CCS tiling is not so nice. However, there we are + * guaranteed that we only have a single level and slice so we don't + * have to worry about it and can just align to a whole tile. + */ + assert(surf->aux_surf->logical_level0_px.depth == 1); + assert(surf->aux_surf->logical_level0_px.array_len == 1); + assert(x_offset_el == 0 && y_offset_el == 0); + const uint32_t width_tl = + DIV_ROUND_UP(width_el, ccs_tile_info.logical_extent_el.w); + const uint32_t height_tl = + DIV_ROUND_UP(height_el, ccs_tile_info.logical_extent_el.h); + x_offset_cl = 0; + y_offset_cl = 0; + width_cl = width_tl * 8; + height_cl = height_tl * 8; + } + + /* We're going to use a RGBA32 format so as to write data as quickly as + * possible. A y-tiled cache line will then be 1x4 px. + */ + const uint32_t x_offset_rgba_px = x_offset_cl; + const uint32_t y_offset_rgba_px = y_offset_cl * 4; + const uint32_t width_rgba_px = width_cl; + const uint32_t height_rgba_px = height_cl * 4; + + MAYBE_UNUSED bool ok = + isl_surf_init(batch->blorp->isl_dev, ¶ms.dst.surf, + .dim = ISL_SURF_DIM_2D, + .format = ISL_FORMAT_R32G32B32A32_UINT, + .width = width_rgba_px + x_offset_rgba_px, + .height = height_rgba_px + y_offset_rgba_px, + .depth = 1, + .levels = 1, + .array_len = 1, + .samples = 1, + .row_pitch = surf->aux_surf->row_pitch, + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT, + .tiling_flags = ISL_TILING_Y0_BIT); + assert(ok); + + params.x0 = x_offset_rgba_px; + params.y0 = y_offset_rgba_px; + params.x1 = x_offset_rgba_px + width_rgba_px; + params.y1 = y_offset_rgba_px + height_rgba_px; + + /* A CCS value of 0 means "uncompressed." */ + memset(¶ms.wm_inputs.clear_color, 0, + sizeof(params.wm_inputs.clear_color)); + + if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, true)) + return; + + batch->blorp->exec(batch, ¶ms); +} -- 2.30.2