From 5b62eb237c5253f5315c6fd948307e7f2247de2c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 18 Aug 2017 15:51:59 +0200 Subject: [PATCH] radeonsi/gfx9: don't flush TC L2 between rendering and texturing if not needed MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_blit.c | 29 ++++++++++--------------- src/gallium/drivers/radeonsi/si_pipe.h | 23 ++++++++++++++++++++ src/gallium/drivers/radeonsi/si_state.c | 29 ++++++++++--------------- 3 files changed, 47 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index db983eeb221..1b001cc04c8 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -391,29 +391,29 @@ si_decompress_depth(struct si_context *sctx, /* Only in-place decompression needs to flush DB caches, or * when we don't decompress but TC-compatible planes are dirty. */ - sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB | - SI_CONTEXT_INV_GLOBAL_L2 | - SI_CONTEXT_INV_VMEM_L1; + si_make_DB_shader_coherent(sctx, tex->resource.b.b.nr_samples, + inplace_planes & PIPE_MASK_S); /* If we flush DB caches for TC-compatible depth, the dirty * state becomes 0 for the whole mipmap tree and all planes. * (there is nothing else to flush) */ if (tex->tc_compatible_htile) { - if (r600_can_sample_zs(tex, false)) + /* Only clear the mask that we are flushing, because + * si_make_DB_shader_coherent() can treat depth and + * stencil differently. + */ + if (inplace_planes & PIPE_MASK_Z) tex->dirty_level_mask = 0; - if (r600_can_sample_zs(tex, true)) + if (inplace_planes & PIPE_MASK_S) tex->stencil_dirty_level_mask = 0; } } /* set_framebuffer_state takes care of coherency for single-sample. * The DB->CB copy uses CB for the final writes. */ - if (copy_planes && tex->resource.b.b.nr_samples > 1) { - sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | - SI_CONTEXT_INV_GLOBAL_L2 | - SI_CONTEXT_FLUSH_AND_INV_CB; - } + if (copy_planes && tex->resource.b.b.nr_samples > 1) + si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples); } static void @@ -524,10 +524,7 @@ static void si_blit_decompress_color(struct pipe_context *ctx, } sctx->decompression_enabled = false; - - sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | - SI_CONTEXT_INV_GLOBAL_L2 | - SI_CONTEXT_INV_VMEM_L1; + si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples); } static void @@ -1216,9 +1213,7 @@ static void si_do_CB_resolve(struct si_context *sctx, si_blitter_end(&sctx->b.b); /* Flush caches for possible texturing. */ - sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | - SI_CONTEXT_INV_GLOBAL_L2 | - SI_CONTEXT_INV_VMEM_L1; + si_make_CB_shader_coherent(sctx, 1); } static bool do_hardware_msaa_resolve(struct pipe_context *ctx, diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 69a35ea1945..f2a20ba4668 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -611,4 +611,27 @@ si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src) *dst = src; } +static inline void +si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples) +{ + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | + SI_CONTEXT_INV_VMEM_L1; + + /* Single-sample color is coherent with shaders on GFX9. */ + if (sctx->b.chip_class <= VI || num_samples >= 2) + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; +} + +static inline void +si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples, + bool include_stencil) +{ + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB | + SI_CONTEXT_INV_VMEM_L1; + + /* Single-sample depth (not stencil) is coherent with shaders on GFX9. */ + if (sctx->b.chip_class <= VI || num_samples >= 2 || include_stencil) + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; +} + #endif diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 8010df6584d..d116c07ee6e 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2572,11 +2572,9 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * Only flush and wait for CB if there is actually a bound color buffer. */ if (sctx->framebuffer.nr_samples <= 1 && - sctx->framebuffer.state.nr_cbufs) { - sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | - SI_CONTEXT_INV_GLOBAL_L2 | - SI_CONTEXT_FLUSH_AND_INV_CB; - } + sctx->framebuffer.state.nr_cbufs) + si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples); + sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; /* u_blitter doesn't invoke depth decompression when it does multiple @@ -2585,11 +2583,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * individual generate_mipmap blits. * Note that lower mipmap levels aren't compressed. */ - if (sctx->generate_mipmap_for_depth) { - sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | - SI_CONTEXT_INV_GLOBAL_L2 | - SI_CONTEXT_FLUSH_AND_INV_DB; - } + if (sctx->generate_mipmap_for_depth) + si_make_DB_shader_coherent(sctx, 1, false); /* Take the maximum of the old and new count. If the new count is lower, * dirtying is needed to disable the unbound colorbuffers. @@ -4026,11 +4021,8 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags) /* Multisample surfaces are flushed in si_decompress_textures. */ if (sctx->framebuffer.nr_samples <= 1 && - sctx->framebuffer.state.nr_cbufs) { - sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | - SI_CONTEXT_INV_GLOBAL_L2 | - SI_CONTEXT_FLUSH_AND_INV_CB; - } + sctx->framebuffer.state.nr_cbufs) + si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples); } /* This only ensures coherency for shader image/buffer stores. */ @@ -4073,8 +4065,11 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) if (flags & PIPE_BARRIER_FRAMEBUFFER && sctx->framebuffer.nr_samples <= 1 && sctx->framebuffer.state.nr_cbufs) { - sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | - SI_CONTEXT_WRITEBACK_GLOBAL_L2; + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; + + /* Single-sample color is coherent with TC on GFX9. */ + if (sctx->screen->b.chip_class <= VI) + sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } /* Indirect buffers use TC L2 on GFX9, but not older hw. */ -- 2.30.2