radeonsi: don't flush and wait for CB after depth-only rendering
[mesa.git] / src / gallium / drivers / radeonsi / si_state.c
index 831c6b8ea32648fde540b1cab919c4a789ce0b14..b236bed306542bafe612a0b6293693e57f098dd5 100644 (file)
@@ -2525,14 +2525,38 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
         * the only client not using TC that can change textures is
         * the framebuffer.
         *
-        * Flush all CB and DB caches here because all buffers can be used
-        * for write by both TC (with shader image stores) and CB/DB.
+        * Wait for compute shaders because of possible transitions:
+        * - FB write -> shader read
+        * - shader write -> FB read
+        *
+        * DB caches are flushed on demand (using si_decompress_textures).
+        *
+        * When MSAA is enabled, CB and TC caches are flushed on demand
+        * (after FMASK decompression). Shader write -> FB read transitions
+        * cannot happen for MSAA textures, because MSAA shader images are
+        * not supported.
+        *
+        * Only flush and wait for CB if there is actually a bound color buffer.
+        */
+       if (sctx->framebuffer.nr_samples <= 1 &&
+           sctx->framebuffer.state.nr_cbufs) {
+               sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+                                SI_CONTEXT_INV_GLOBAL_L2 |
+                                SI_CONTEXT_FLUSH_AND_INV_CB;
+       }
+       sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
+
+       /* u_blitter doesn't invoke depth decompression when it does multiple
+        * blits in a row, but the only case when it matters for DB is when
+        * doing generate_mipmap. So here we flush DB manually between
+        * individual generate_mipmap blits.
+        * Note that lower mipmap levels aren't compressed.
         */
-       sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
-                        SI_CONTEXT_INV_GLOBAL_L2 |
-                        SI_CONTEXT_FLUSH_AND_INV_CB |
-                        SI_CONTEXT_FLUSH_AND_INV_DB |
-                        SI_CONTEXT_CS_PARTIAL_FLUSH;
+       if (sctx->generate_mipmap_for_depth) {
+               sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+                                SI_CONTEXT_INV_GLOBAL_L2 |
+                                SI_CONTEXT_FLUSH_AND_INV_DB;
+       }
 
        /* Take the maximum of the old and new count. If the new count is lower,
         * dirtying is needed to disable the unbound colorbuffers.
@@ -3950,9 +3974,12 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
 {
        struct si_context *sctx = (struct si_context *)ctx;
 
-       sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
-                        SI_CONTEXT_INV_GLOBAL_L2 |
-                        SI_CONTEXT_FLUSH_AND_INV_CB;
+       /* Multisample surfaces are flushed in si_decompress_textures. */
+       if (sctx->framebuffer.nr_samples <= 1) {
+               sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+                                SI_CONTEXT_INV_GLOBAL_L2 |
+                                SI_CONTEXT_FLUSH_AND_INV_CB;
+       }
        sctx->framebuffer.do_update_surf_dirtiness = true;
 }
 
@@ -3990,12 +4017,18 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
                        sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
        }
 
-       if (flags & PIPE_BARRIER_FRAMEBUFFER)
+       /* MSAA color, any depth and any stencil are flushed in
+        * si_decompress_textures when needed.
+        */
+       if (flags & PIPE_BARRIER_FRAMEBUFFER &&
+           sctx->framebuffer.nr_samples <= 1) {
                sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
-                                SI_CONTEXT_FLUSH_AND_INV_DB;
+                                SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+       }
 
-       if (flags & (PIPE_BARRIER_FRAMEBUFFER |
-                    PIPE_BARRIER_INDIRECT_BUFFER))
+       /* Indirect buffers use TC L2 on GFX9, but not older hw. */
+       if (sctx->screen->b.chip_class <= VI &&
+           flags & PIPE_BARRIER_INDIRECT_BUFFER)
                sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 }