radeonsi/gfx9: don't flush L2 metadata for DB if not needed
authorMarek Olšák <marek.olsak@amd.com>
Sat, 19 Aug 2017 13:06:22 +0000 (15:06 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 22 Aug 2017 11:29:47 +0000 (13:29 +0200)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c

index 5545f5b2a5bd0e86417fa1196b78a605a24e9fd1..4e3b707b6cfad8587984aba968cfb5c8ae8200e8 100644 (file)
@@ -392,21 +392,19 @@ si_decompress_depth(struct si_context *sctx,
                 * when we don't decompress but TC-compatible planes are dirty.
                 */
                si_make_DB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
-                                          inplace_planes & PIPE_MASK_S);
+                                          inplace_planes & PIPE_MASK_S,
+                                          tex->tc_compatible_htile &&
+                                          first_level == 0);
 
-               /* If we flush DB caches for TC-compatible depth, the dirty
-                * state becomes 0 for the whole mipmap tree and all planes.
-                * (there is nothing else to flush)
-                */
                if (tex->tc_compatible_htile) {
                        /* Only clear the mask that we are flushing, because
                         * si_make_DB_shader_coherent() can treat depth and
                         * stencil differently.
                         */
                        if (inplace_planes & PIPE_MASK_Z)
-                               tex->dirty_level_mask = 0;
+                               tex->dirty_level_mask &= ~levels_z;
                        if (inplace_planes & PIPE_MASK_S)
-                               tex->stencil_dirty_level_mask = 0;
+                               tex->stencil_dirty_level_mask &= ~levels_s;
                }
        }
        /* set_framebuffer_state takes care of coherency for single-sample.
index e5aeb9a7e88a6f6ef813cd7d4737a4d408b77156..386a6dc886dcf3719929e33f18a6f97e86521ce9 100644 (file)
@@ -202,6 +202,7 @@ struct si_framebuffer {
        bool                            dirty_zsbuf;
        bool                            any_dst_linear;
        bool                            CB_has_shader_readable_metadata;
+       bool                            DB_has_shader_readable_metadata;
 };
 
 struct si_clip_state {
@@ -639,14 +640,24 @@ si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples,
 
 static inline void
 si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples,
-                          bool include_stencil)
+                          bool include_stencil, bool shaders_read_metadata)
 {
        sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
                         SI_CONTEXT_INV_VMEM_L1;
 
-       /* Single-sample depth (not stencil) is coherent with shaders on GFX9. */
-       if (sctx->b.chip_class <= VI || num_samples >= 2 || include_stencil)
+       if (sctx->b.chip_class >= GFX9) {
+               /* Single-sample depth (not stencil) is coherent with shaders
+                * on GFX9, but L2 metadata must be flushed if shaders read
+                * metadata.
+                */
+               if (num_samples >= 2 || include_stencil)
+                       sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+               else if (shaders_read_metadata)
+                       sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA;
+       } else {
+               /* SI-CI-VI */
                sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+       }
 }
 
 #endif
index fb97052aacfce166f4657a878e14450a83b2362b..d391d5f501a99138d3259bc176244eb60e292c1c 100644 (file)
@@ -2585,7 +2585,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
         * Note that lower mipmap levels aren't compressed.
         */
        if (sctx->generate_mipmap_for_depth)
-               si_make_DB_shader_coherent(sctx, 1, false);
+               si_make_DB_shader_coherent(sctx, 1, false,
+                                          sctx->framebuffer.DB_has_shader_readable_metadata);
 
        /* Take the maximum of the old and new count. If the new count is lower,
         * dirtying is needed to disable the unbound colorbuffers.
@@ -2610,6 +2611,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
        sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
        sctx->framebuffer.any_dst_linear = false;
        sctx->framebuffer.CB_has_shader_readable_metadata = false;
+       sctx->framebuffer.DB_has_shader_readable_metadata = false;
 
        for (i = 0; i < state->nr_cbufs; i++) {
                if (!state->cbufs[i])
@@ -2665,6 +2667,10 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
                if (!surf->depth_initialized) {
                        si_init_depth_surface(sctx, surf);
                }
+
+               if (rtex->tc_compatible_htile && !surf->base.u.tex.level)
+                       sctx->framebuffer.DB_has_shader_readable_metadata = true;
+
                r600_context_add_resource_size(ctx, surf->base.texture);
        }