radeonsi: flush DB caches only when transitioning from DB to texturing
authorMarek Olšák <marek.olsak@amd.com>
Wed, 14 Jun 2017 22:34:08 +0000 (00:34 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 21 Jun 2017 23:51:02 +0000 (01:51 +0200)
Use the mechanism of si_decompress_textures, but instead of doing
the actual decompression, just flag the DB cache flush there.

This removes a lot of unnecessary DB cache flushes.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state_draw.c

index c62efbfa7d35d236268171e373e23353cd447781..74f46705b3c0d22abad52034739ca38be02d37c2 100644 (file)
@@ -344,10 +344,6 @@ si_decompress_depth(struct si_context *sctx,
                }
        }
 
-       assert(!tex->tc_compatible_htile || levels_z == 0);
-       assert(!tex->tc_compatible_htile || levels_s == 0 ||
-              !r600_can_sample_zs(tex, true));
-
        /* We may have to allocate the flushed texture here when called from
         * si_decompress_subresource.
         */
@@ -384,10 +380,30 @@ si_decompress_depth(struct si_context *sctx,
        }
 
        if (inplace_planes) {
-               si_blit_decompress_zs_in_place(
-                       sctx, tex,
-                       levels_z, levels_s,
-                       first_layer, last_layer);
+               if (!tex->tc_compatible_htile) {
+                       si_blit_decompress_zs_in_place(
+                                               sctx, tex,
+                                               levels_z, levels_s,
+                                               first_layer, last_layer);
+               }
+
+               /* Only in-place decompression needs to flush DB caches, or
+                * when we don't decompress but TC-compatible planes are dirty.
+                */
+               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
+                                SI_CONTEXT_INV_GLOBAL_L2 |
+                                SI_CONTEXT_INV_VMEM_L1;
+
+               /* If we flush DB caches for TC-compatible depth, the dirty
+                * state becomes 0 for the whole mipmap tree and all planes.
+                * (there is nothing else to flush)
+                */
+               if (tex->tc_compatible_htile) {
+                       if (r600_can_sample_zs(tex, false))
+                               tex->dirty_level_mask = 0;
+                       if (r600_can_sample_zs(tex, true))
+                               tex->stencil_dirty_level_mask = 0;
+               }
        }
 }
 
@@ -1352,11 +1368,15 @@ static boolean si_generate_mipmap(struct pipe_context *ctx,
        rtex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1,
                                                     last_level - base_level);
 
+       sctx->generate_mipmap_for_depth = rtex->is_depth;
+
        si_blitter_begin(ctx, SI_BLIT | SI_DISABLE_RENDER_COND);
        util_blitter_generate_mipmap(sctx->blitter, tex, format,
                                     base_level, last_level,
                                     first_layer, last_layer);
        si_blitter_end(ctx);
+
+       sctx->generate_mipmap_for_depth = false;
        return true;
 }
 
index a7031ec3632125d4af229c6d7e1072f4fbfc6c37..acf6fb47fae5870c77a539855d4959c6a9c04b33 100644 (file)
@@ -584,12 +584,14 @@ static bool color_needs_decompression(struct r600_texture *rtex)
                (rtex->cmask.size || rtex->dcc_offset));
 }
 
-static bool depth_needs_decompression(struct r600_texture *rtex,
-                                     struct si_sampler_view *sview)
+static bool depth_needs_decompression(struct r600_texture *rtex)
 {
-       return rtex->db_compatible &&
-              (!rtex->tc_compatible_htile ||
-               !r600_can_sample_zs(rtex, sview->is_stencil_sampler));
+       /* If the depth/stencil texture is TC-compatible, no decompression
+        * will be done. The decompression function will only flush DB caches
+        * to make it coherent with shaders. That's necessary because the driver
+        * doesn't flush DB caches in any other case.
+        */
+       return rtex->db_compatible;
 }
 
 static void si_update_shader_needs_decompress_mask(struct si_context *sctx,
@@ -633,9 +635,8 @@ static void si_set_sampler_views(struct pipe_context *ctx,
                if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
                        struct r600_texture *rtex =
                                (struct r600_texture*)views[i]->texture;
-                       struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
 
-                       if (depth_needs_decompression(rtex, rview)) {
+                       if (depth_needs_decompression(rtex)) {
                                samplers->needs_depth_decompress_mask |= 1u << slot;
                        } else {
                                samplers->needs_depth_decompress_mask &= ~(1u << slot);
@@ -2470,7 +2471,7 @@ static void si_make_texture_handle_resident(struct pipe_context *ctx,
                        struct r600_texture *rtex =
                                (struct r600_texture *)sview->base.texture;
 
-                       if (depth_needs_decompression(rtex, sview)) {
+                       if (depth_needs_decompression(rtex)) {
                                util_dynarray_append(
                                        &sctx->resident_tex_needs_depth_decompress,
                                        struct si_texture_handle *,
index 0c77f933127016a7848e1532e8063779055837d7..1c1740838190b75ec6185ff81697e34317e7dfcc 100644 (file)
@@ -362,6 +362,7 @@ struct si_context {
        bool                    db_stencil_clear:1;
        bool                    db_stencil_disable_expclear:1;
        bool                    occlusion_queries_disabled:1;
+       bool                    generate_mipmap_for_depth:1;
 
        /* Emitted draw state. */
        bool                    gs_tri_strip_adj_fix:1;
index 831c6b8ea32648fde540b1cab919c4a789ce0b14..193816d2bf795a7bd871a3799be437b3e20c5c60 100644 (file)
@@ -2525,15 +2525,26 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
         * the only client not using TC that can change textures is
         * the framebuffer.
         *
-        * Flush all CB and DB caches here because all buffers can be used
-        * for write by both TC (with shader image stores) and CB/DB.
+        * Wait for compute shaders because of possible transitions:
+        * - FB write -> shader read
+        * - shader write -> FB read
+        *
+        * DB caches are flushed on demand (using si_decompress_textures).
         */
        sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
                         SI_CONTEXT_INV_GLOBAL_L2 |
                         SI_CONTEXT_FLUSH_AND_INV_CB |
-                        SI_CONTEXT_FLUSH_AND_INV_DB |
                         SI_CONTEXT_CS_PARTIAL_FLUSH;
 
+       /* u_blitter doesn't invoke depth decompression when it does multiple
+        * blits in a row, but the only case when it matters for DB is when
+        * doing generate_mipmap. So here we flush DB manually between
+        * individual generate_mipmap blits.
+        * Note that lower mipmap levels aren't compressed.
+        */
+       if (sctx->generate_mipmap_for_depth)
+               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
+
        /* Take the maximum of the old and new count. If the new count is lower,
         * dirtying is needed to disable the unbound colorbuffers.
         */
@@ -3990,9 +4001,9 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
                        sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
        }
 
+       /* Depth and stencil are flushed in si_decompress_textures when needed. */
        if (flags & PIPE_BARRIER_FRAMEBUFFER)
-               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
-                                SI_CONTEXT_FLUSH_AND_INV_DB;
+               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
 
        if (flags & (PIPE_BARRIER_FRAMEBUFFER |
                     PIPE_BARRIER_INDIRECT_BUFFER))
index d039e015cc67a2252e34454e36861187006d87ea..d13c8b7086fa11172b1d7c34ee26737852eecfb8 100644 (file)
@@ -1402,11 +1402,9 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
                        struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
                        struct r600_texture *rtex = (struct r600_texture *)surf->texture;
 
-                       if (!rtex->tc_compatible_htile)
-                               rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+                       rtex->dirty_level_mask |= 1 << surf->u.tex.level;
 
-                       if (rtex->surface.flags & RADEON_SURF_SBUFFER &&
-                           (!rtex->tc_compatible_htile || !rtex->can_sample_s))
+                       if (rtex->surface.flags & RADEON_SURF_SBUFFER)
                                rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
                }
                if (sctx->framebuffer.compressed_cb_mask) {