radeonsi/gfx9: don't flush TC L2 between rendering and texturing if not needed
authorMarek Olšák <marek.olsak@amd.com>
Fri, 18 Aug 2017 13:51:59 +0000 (15:51 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 22 Aug 2017 11:29:47 +0000 (13:29 +0200)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c

index db983eeb221b967cf0432eb248a9a8524d3a8642..1b001cc04c8adc618f0b2d31153775dba207b2c6 100644 (file)
@@ -391,29 +391,29 @@ si_decompress_depth(struct si_context *sctx,
                /* Only in-place decompression needs to flush DB caches, or
                 * when we don't decompress but TC-compatible planes are dirty.
                 */
-               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
-                                SI_CONTEXT_INV_GLOBAL_L2 |
-                                SI_CONTEXT_INV_VMEM_L1;
+               si_make_DB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
+                                          inplace_planes & PIPE_MASK_S);
 
                /* If we flush DB caches for TC-compatible depth, the dirty
                 * state becomes 0 for the whole mipmap tree and all planes.
                 * (there is nothing else to flush)
                 */
                if (tex->tc_compatible_htile) {
-                       if (r600_can_sample_zs(tex, false))
+                       /* Only clear the mask that we are flushing, because
+                        * si_make_DB_shader_coherent() can treat depth and
+                        * stencil differently.
+                        */
+                       if (inplace_planes & PIPE_MASK_Z)
                                tex->dirty_level_mask = 0;
-                       if (r600_can_sample_zs(tex, true))
+                       if (inplace_planes & PIPE_MASK_S)
                                tex->stencil_dirty_level_mask = 0;
                }
        }
        /* set_framebuffer_state takes care of coherency for single-sample.
         * The DB->CB copy uses CB for the final writes.
         */
-       if (copy_planes && tex->resource.b.b.nr_samples > 1) {
-               sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
-                                SI_CONTEXT_INV_GLOBAL_L2 |
-                                SI_CONTEXT_FLUSH_AND_INV_CB;
-       }
+       if (copy_planes && tex->resource.b.b.nr_samples > 1)
+               si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples);
 }
 
 static void
@@ -524,10 +524,7 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
        }
 
        sctx->decompression_enabled = false;
-
-       sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
-                        SI_CONTEXT_INV_GLOBAL_L2 |
-                        SI_CONTEXT_INV_VMEM_L1;
+       si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples);
 }
 
 static void
@@ -1216,9 +1213,7 @@ static void si_do_CB_resolve(struct si_context *sctx,
        si_blitter_end(&sctx->b.b);
 
        /* Flush caches for possible texturing. */
-       sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
-                        SI_CONTEXT_INV_GLOBAL_L2 |
-                        SI_CONTEXT_INV_VMEM_L1;
+       si_make_CB_shader_coherent(sctx, 1);
 }
 
 static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
index 69a35ea1945af3ccf4ef44162a390eaca099fcc4..f2a20ba46681f0869291b9908d7e3d5673f7cd57 100644 (file)
@@ -611,4 +611,27 @@ si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src)
        *dst = src;
 }
 
+static inline void
+si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples)
+{
+       sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
+                        SI_CONTEXT_INV_VMEM_L1;
+
+       /* Single-sample color is coherent with shaders on GFX9. */
+       if (sctx->b.chip_class <= VI || num_samples >= 2)
+               sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+}
+
+static inline void
+si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples,
+                          bool include_stencil)
+{
+       sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
+                        SI_CONTEXT_INV_VMEM_L1;
+
+       /* Single-sample depth (not stencil) is coherent with shaders on GFX9. */
+       if (sctx->b.chip_class <= VI || num_samples >= 2 || include_stencil)
+               sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+}
+
 #endif
index 8010df6584d1a88402815a2a84811c3add5cc601..d116c07ee6e78e5b4ac39e6ad4114ab6a018984d 100644 (file)
@@ -2572,11 +2572,9 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
         * Only flush and wait for CB if there is actually a bound color buffer.
         */
        if (sctx->framebuffer.nr_samples <= 1 &&
-           sctx->framebuffer.state.nr_cbufs) {
-               sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
-                                SI_CONTEXT_INV_GLOBAL_L2 |
-                                SI_CONTEXT_FLUSH_AND_INV_CB;
-       }
+           sctx->framebuffer.state.nr_cbufs)
+               si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples);
+
        sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
 
        /* u_blitter doesn't invoke depth decompression when it does multiple
@@ -2585,11 +2583,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
         * individual generate_mipmap blits.
         * Note that lower mipmap levels aren't compressed.
         */
-       if (sctx->generate_mipmap_for_depth) {
-               sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
-                                SI_CONTEXT_INV_GLOBAL_L2 |
-                                SI_CONTEXT_FLUSH_AND_INV_DB;
-       }
+       if (sctx->generate_mipmap_for_depth)
+               si_make_DB_shader_coherent(sctx, 1, false);
 
        /* Take the maximum of the old and new count. If the new count is lower,
         * dirtying is needed to disable the unbound colorbuffers.
@@ -4026,11 +4021,8 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
 
        /* Multisample surfaces are flushed in si_decompress_textures. */
        if (sctx->framebuffer.nr_samples <= 1 &&
-           sctx->framebuffer.state.nr_cbufs) {
-               sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
-                                SI_CONTEXT_INV_GLOBAL_L2 |
-                                SI_CONTEXT_FLUSH_AND_INV_CB;
-       }
+           sctx->framebuffer.state.nr_cbufs)
+               si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples);
 }
 
 /* This only ensures coherency for shader image/buffer stores. */
@@ -4073,8 +4065,11 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
        if (flags & PIPE_BARRIER_FRAMEBUFFER &&
            sctx->framebuffer.nr_samples <= 1 &&
            sctx->framebuffer.state.nr_cbufs) {
-               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
-                                SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
+
+               /* Single-sample color is coherent with TC on GFX9. */
+               if (sctx->screen->b.chip_class <= VI)
+                       sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
        }
 
        /* Indirect buffers use TC L2 on GFX9, but not older hw. */