From b19cff1639f9c6e3074d11f1199807603046ea4b Mon Sep 17 00:00:00 2001 From: Topi Pohjolainen Date: Sat, 2 Jan 2016 16:25:57 +0200 Subject: [PATCH] i965/gen9: Enable lossless compression I tried first creating the auxiliary buffer the same time with the color buffer. That, however, led me into a situation where we would later create the rest of the mip-levels and the compression would need to be disabled (it is only supported for single level buffers). Here we try to create it on demand just before the hardware starts to render. This is similar what we do with fast clear buffers, their creation is deferred until the first clear. This setup also gives the opportunity to detect if the miptree represents the temporaty texture used internally in the mesa core. This texture is mostly written by cpu and therefore enabling compression for it doesn't make much sense. Note that a heuristic is included. Floating point formats are not enabled yet as they are only seen to hurt performance. Some highlights with window system driver kept fixed to default and only the application driver changing: Manhattan: 8.32152% +/- 0.355881% Offscreen: 9.09713% +/- 0.340763% Glb trex: 8.46231% +/- 0.460624% Offscreen: 9.31872% +/- 0.463743% v2 (Ben): Re-use msaa layout type for single sampled case. v3: Moved the deferred allocation of mcs to brw_try_draw_prims() and brw_blorp_blit_miptrees() instead. v4: (Ken): Drop MIPTREE_LAYOUT_ACCELERATED_UPLOAD when allocating mcs. Do not enable for scanout buffers Signed-off-by: Topi Pohjolainen Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 43 +++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 0b432eca7fd..45569fca800 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -798,7 +798,8 @@ intel_miptree_create(struct brw_context *brw, /* If this miptree is capable of supporting fast color clears, set * fast_clear_state appropriately to ensure that fast clears will occur. * Allocation of the MCS miptree will be deferred until the first fast - * clear actually occurs. + * clear actually occurs or when compressed single sampled buffer is + * written by the GPU for the first time. */ if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) && intel_miptree_supports_non_msrt_fast_clear(brw, mt)) { @@ -1603,11 +1604,28 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, unsigned mcs_height = ALIGN(mt->logical_height0, height_divisor) / height_divisor; assert(mt->logical_depth0 == 1); - uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD | - MIPTREE_LAYOUT_TILING_Y; + uint32_t layout_flags = MIPTREE_LAYOUT_TILING_Y; + if (brw->gen >= 8) { layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; } + + /* On Gen9+ clients are not currently capable of consuming compressed + * single-sampled buffers. Disabling compression allows us to skip + * resolves. + */ + const bool is_lossless_compressed = + brw->gen >= 9 && !mt->is_scanout && + intel_miptree_supports_lossless_compressed(brw, mt); + + /* In case of compression mcs buffer needs to be initialised requiring the + * buffer to be immediately mapped to cpu space for writing. Therefore do + * not use the gpu access flag which can cause an unnecessary delay if the + * backing pages happened to be just used by the GPU. + */ + if (!is_lossless_compressed) + layout_flags |= MIPTREE_LAYOUT_ACCELERATED_UPLOAD; + mt->mcs_mt = miptree_create(brw, mt->target, format, @@ -1619,6 +1637,25 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, 0 /* num_samples */, layout_flags); + /* From Gen9 onwards single-sampled (non-msrt) auxiliary buffers are + * used for lossless compression which requires similar initialisation + * as multi-sample compression. + */ + if (is_lossless_compressed) { + /* Hardware sets the auxiliary buffer to all zeroes when it does full + * resolve. Initialize it accordingly in case the first renderer is + * cpu (or other none compression aware party). + * + * This is also explicitly stated in the spec (MCS Buffer for Render + * Target(s)): + * "If Software wants to enable Color Compression without Fast clear, + * Software needs to initialize MCS with zeros." + */ + intel_miptree_init_mcs(brw, mt, 0); + mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; + mt->msaa_layout = INTEL_MSAA_LAYOUT_CMS; + } + return mt->mcs_mt; } -- 2.30.2