radeonsi/gfx9: don't flush L2 metadata for CB if not needed
authorMarek Olšák <marek.olsak@amd.com>
Sat, 19 Aug 2017 13:06:22 +0000 (15:06 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 22 Aug 2017 11:29:47 +0000 (13:29 +0200)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state_draw.c

index 1b001cc04c8adc618f0b2d31153775dba207b2c6..5545f5b2a5bd0e86417fa1196b78a605a24e9fd1 100644 (file)
@@ -413,7 +413,8 @@ si_decompress_depth(struct si_context *sctx,
         * The DB->CB copy uses CB for the final writes.
         */
        if (copy_planes && tex->resource.b.b.nr_samples > 1)
-               si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples);
+               si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
+                                          false);
 }
 
 static void
@@ -524,7 +525,8 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
        }
 
        sctx->decompression_enabled = false;
-       si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples);
+       si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples,
+                                  vi_dcc_enabled(rtex, first_level));
 }
 
 static void
@@ -1213,7 +1215,7 @@ static void si_do_CB_resolve(struct si_context *sctx,
        si_blitter_end(&sctx->b.b);
 
        /* Flush caches for possible texturing. */
-       si_make_CB_shader_coherent(sctx, 1);
+       si_make_CB_shader_coherent(sctx, 1, false);
 }
 
 static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
index f2a20ba46681f0869291b9908d7e3d5673f7cd57..e5aeb9a7e88a6f6ef813cd7d4737a4d408b77156 100644 (file)
 /* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't
  * invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */
 #define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (R600_CONTEXT_PRIVATE_FLAG << 4)
-/* gaps */
+/* Writeback & invalidate the L2 metadata cache. It can only be coupled with
+ * a CB or DB flush. */
+#define SI_CONTEXT_INV_L2_METADATA     (R600_CONTEXT_PRIVATE_FLAG << 5)
+/* gap */
 /* Framebuffer caches. */
 #define SI_CONTEXT_FLUSH_AND_INV_DB    (R600_CONTEXT_PRIVATE_FLAG << 7)
 #define SI_CONTEXT_FLUSH_AND_INV_CB    (R600_CONTEXT_PRIVATE_FLAG << 8)
@@ -198,6 +201,7 @@ struct si_framebuffer {
        ubyte                           dirty_cbufs;
        bool                            dirty_zsbuf;
        bool                            any_dst_linear;
+       bool                            CB_has_shader_readable_metadata;
 };
 
 struct si_clip_state {
@@ -612,14 +616,25 @@ si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src)
 }
 
 static inline void
-si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples)
+si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples,
+                          bool shaders_read_metadata)
 {
        sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
                         SI_CONTEXT_INV_VMEM_L1;
 
-       /* Single-sample color is coherent with shaders on GFX9. */
-       if (sctx->b.chip_class <= VI || num_samples >= 2)
+       if (sctx->b.chip_class >= GFX9) {
+               /* Single-sample color is coherent with shaders on GFX9, but
+                * L2 metadata must be flushed if shaders read metadata.
+                * (DCC, CMASK).
+                */
+               if (num_samples >= 2)
+                       sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+               else if (shaders_read_metadata)
+                       sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA;
+       } else {
+               /* SI-CI-VI */
                sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+       }
 }
 
 static inline void
index d116c07ee6e78e5b4ac39e6ad4114ab6a018984d..fb97052aacfce166f4657a878e14450a83b2362b 100644 (file)
@@ -2573,7 +2573,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
         */
        if (sctx->framebuffer.nr_samples <= 1 &&
            sctx->framebuffer.state.nr_cbufs)
-               si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples);
+               si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
+                                          sctx->framebuffer.CB_has_shader_readable_metadata);
 
        sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
 
@@ -2608,6 +2609,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
        sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
        sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
        sctx->framebuffer.any_dst_linear = false;
+       sctx->framebuffer.CB_has_shader_readable_metadata = false;
 
        for (i = 0; i < state->nr_cbufs; i++) {
                if (!state->cbufs[i])
@@ -2642,6 +2644,9 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
                if (rtex->surface.is_linear)
                        sctx->framebuffer.any_dst_linear = true;
 
+               if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
+                       sctx->framebuffer.CB_has_shader_readable_metadata = true;
+
                r600_context_add_resource_size(ctx, surf->base.texture);
 
                p_atomic_inc(&rtex->framebuffers_bound);
@@ -4022,7 +4027,8 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
        /* Multisample surfaces are flushed in si_decompress_textures. */
        if (sctx->framebuffer.nr_samples <= 1 &&
            sctx->framebuffer.state.nr_cbufs)
-               si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples);
+               si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
+                                          sctx->framebuffer.CB_has_shader_readable_metadata);
 }
 
 /* This only ensures coherency for shader image/buffer stores. */
@@ -4067,8 +4073,7 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
            sctx->framebuffer.state.nr_cbufs) {
                sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
 
-               /* Single-sample color is coherent with TC on GFX9. */
-               if (sctx->screen->b.chip_class <= VI)
+               if (sctx->b.chip_class <= VI)
                        sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
        }
 
index 7bc52f29e2551afa5de739286f5f195811bcce3f..1d8be49a480752fbf9d6702854ba0d873a3ab1f9 100644 (file)
@@ -988,13 +988,12 @@ void si_emit_cache_flush(struct si_context *sctx)
                 * TC    | TC_MD         = writeback & invalidate L2 metadata (DCC, etc.)
                 * TCL1                  = invalidate L1
                 */
+               tc_flags = 0;
 
-               /* When flushing CB or DB, L2 metadata should always be invali-
-                * dated before texturing. Invalidating L2 data is not needed
-                * in some cases.
-                */
-               tc_flags = EVENT_TC_ACTION_ENA |
-                          EVENT_TC_MD_ACTION_ENA;
+               if (rctx->flags & SI_CONTEXT_INV_L2_METADATA) {
+                       tc_flags = EVENT_TC_ACTION_ENA |
+                                  EVENT_TC_MD_ACTION_ENA;
+               }
 
                /* Ideally flush TC together with CB/DB. */
                if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2) {