radeonsi: flush CB after MSAA only when transitioning from CB to textures
authorMarek Olšák <marek.olsak@amd.com>
Wed, 14 Jun 2017 23:42:46 +0000 (01:42 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 21 Jun 2017 23:51:02 +0000 (01:51 +0200)
The main flush before texturing is done after the FMASK decompress pass.

CB after MSAA rendering is not flushed in set_framebuffer_state and also
not in memory_barrier if the current color buffer is MSAA. We fully rely
on the FMASK decompress pass for the flushing.

Some CB decompress and resolve passes need an explicit flush before and
after.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_state.c

index d137a1fbec309bdd3a4660b4b7e184a57d58350c..631676bcd799915cc58a698498fec62e88f78208 100644 (file)
@@ -405,6 +405,14 @@ si_decompress_depth(struct si_context *sctx,
                                tex->stencil_dirty_level_mask = 0;
                }
        }
+       /* set_framebuffer_state takes care of coherency for single-sample.
+        * The DB->CB copy uses CB for the final writes.
+        */
+       if (copy_planes && tex->resource.b.b.nr_samples > 1) {
+               sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+                                SI_CONTEXT_INV_GLOBAL_L2 |
+                                SI_CONTEXT_FLUSH_AND_INV_CB;
+       }
 }
 
 static void
@@ -487,10 +495,19 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
                        surf_tmpl.u.tex.last_layer = layer;
                        cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl);
 
+                       /* Required before and after FMASK and DCC_DECOMPRESS. */
+                       if (custom_blend == sctx->custom_blend_fmask_decompress ||
+                           custom_blend == sctx->custom_blend_dcc_decompress)
+                               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
+
                        si_blitter_begin(ctx, SI_DECOMPRESS);
                        util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend);
                        si_blitter_end(ctx);
 
+                       if (custom_blend == sctx->custom_blend_fmask_decompress ||
+                           custom_blend == sctx->custom_blend_dcc_decompress)
+                               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
+
                        pipe_surface_reference(&cbsurf, NULL);
                }
 
@@ -503,6 +520,10 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
 
        sctx->decompression_enabled = false;
        sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
+
+       sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
+                        SI_CONTEXT_INV_GLOBAL_L2 |
+                        SI_CONTEXT_INV_VMEM_L1;
 }
 
 static void
@@ -1157,6 +1178,9 @@ static void si_do_CB_resolve(struct si_context *sctx,
                             unsigned dst_level, unsigned dst_z,
                             enum pipe_format format)
 {
+       /* Required before and after CB_RESOLVE. */
+       sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
+
        si_blitter_begin(&sctx->b.b, SI_COLOR_RESOLVE |
                         (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
        util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z,
@@ -1164,6 +1188,11 @@ static void si_do_CB_resolve(struct si_context *sctx,
                                          ~0, sctx->custom_blend_resolve,
                                          format);
        si_blitter_end(&sctx->b.b);
+
+       /* Flush caches for possible texturing. */
+       sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
+                        SI_CONTEXT_INV_GLOBAL_L2 |
+                        SI_CONTEXT_INV_VMEM_L1;
 }
 
 static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
index 193816d2bf795a7bd871a3799be437b3e20c5c60..a0d790ac2a946780bc49aae6a5b4f2d6dd6a8a61 100644 (file)
@@ -2530,11 +2530,18 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
         * - shader write -> FB read
         *
         * DB caches are flushed on demand (using si_decompress_textures).
+        *
+        * When MSAA is enabled, CB and TC caches are flushed on demand
+        * (after FMASK decompression). Shader write -> FB read transitions
+        * cannot happen for MSAA textures, because MSAA shader images are
+        * not supported.
         */
-       sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
-                        SI_CONTEXT_INV_GLOBAL_L2 |
-                        SI_CONTEXT_FLUSH_AND_INV_CB |
-                        SI_CONTEXT_CS_PARTIAL_FLUSH;
+       if (sctx->framebuffer.nr_samples <= 1) {
+               sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+                                SI_CONTEXT_INV_GLOBAL_L2 |
+                                SI_CONTEXT_FLUSH_AND_INV_CB;
+       }
+       sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
 
        /* u_blitter doesn't invoke depth decompression when it does multiple
         * blits in a row, but the only case when it matters for DB is when
@@ -2542,8 +2549,11 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
         * individual generate_mipmap blits.
         * Note that lower mipmap levels aren't compressed.
         */
-       if (sctx->generate_mipmap_for_depth)
-               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
+       if (sctx->generate_mipmap_for_depth) {
+               sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+                                SI_CONTEXT_INV_GLOBAL_L2 |
+                                SI_CONTEXT_FLUSH_AND_INV_DB;
+       }
 
        /* Take the maximum of the old and new count. If the new count is lower,
         * dirtying is needed to disable the unbound colorbuffers.
@@ -3961,9 +3971,12 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
 {
        struct si_context *sctx = (struct si_context *)ctx;
 
-       sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
-                        SI_CONTEXT_INV_GLOBAL_L2 |
-                        SI_CONTEXT_FLUSH_AND_INV_CB;
+       /* Multisample surfaces are flushed in si_decompress_textures. */
+       if (sctx->framebuffer.nr_samples <= 1) {
+               sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+                                SI_CONTEXT_INV_GLOBAL_L2 |
+                                SI_CONTEXT_FLUSH_AND_INV_CB;
+       }
        sctx->framebuffer.do_update_surf_dirtiness = true;
 }
 
@@ -4001,12 +4014,16 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
                        sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
        }
 
-       /* Depth and stencil are flushed in si_decompress_textures when needed. */
-       if (flags & PIPE_BARRIER_FRAMEBUFFER)
-               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
+       /* MSAA color, any depth and any stencil are flushed in
+        * si_decompress_textures when needed.
+        */
+       if (flags & PIPE_BARRIER_FRAMEBUFFER &&
+           sctx->framebuffer.nr_samples <= 1) {
+               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
+                                SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+       }
 
-       if (flags & (PIPE_BARRIER_FRAMEBUFFER |
-                    PIPE_BARRIER_INDIRECT_BUFFER))
+       if (flags & PIPE_BARRIER_INDIRECT_BUFFER)
                sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 }