radeonsi: only do depth-only or stencil-only in-place decompression
authorMarek Olšák <marek.olsak@amd.com>
Sun, 6 Sep 2015 15:37:38 +0000 (17:37 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Sat, 3 Oct 2015 20:06:08 +0000 (22:06 +0200)
instead of always doing both.
Usually, only depth is needed, so stencil decompression is useless.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c

index 93fa67a953ec313851b05f8edb10097daca84578..d5c5db30029d5d560b2384fe6f6c7acf8c0de78d 100644 (file)
@@ -180,19 +180,27 @@ static void si_blit_decompress_depth(struct pipe_context *ctx,
 
 static void si_blit_decompress_depth_in_place(struct si_context *sctx,
                                               struct r600_texture *texture,
+                                             bool is_stencil_sampler,
                                               unsigned first_level, unsigned last_level,
                                               unsigned first_layer, unsigned last_layer)
 {
        struct pipe_surface *zsurf, surf_tmpl = {{0}};
        unsigned layer, max_layer, checked_last_layer, level;
-
-       sctx->db_inplace_flush_enabled = true;
+       unsigned *dirty_level_mask;
+
+       if (is_stencil_sampler) {
+               sctx->db_flush_stencil_inplace = true;
+               dirty_level_mask = &texture->stencil_dirty_level_mask;
+       } else {
+               sctx->db_flush_depth_inplace = true;
+               dirty_level_mask = &texture->dirty_level_mask;
+       }
        si_mark_atom_dirty(sctx, &sctx->db_render_state);
 
        surf_tmpl.format = texture->resource.b.b.format;
 
        for (level = first_level; level <= last_level; level++) {
-               if (!(texture->dirty_level_mask & (1 << level)))
+               if (!(*dirty_level_mask & (1 << level)))
                        continue;
 
                surf_tmpl.u.tex.level = level;
@@ -220,11 +228,12 @@ static void si_blit_decompress_depth_in_place(struct si_context *sctx,
                /* The texture will always be dirty if some layers aren't flushed.
                 * I don't think this case occurs often though. */
                if (first_layer == 0 && last_layer == max_layer) {
-                       texture->dirty_level_mask &= ~(1 << level);
+                       *dirty_level_mask &= ~(1 << level);
                }
        }
 
-       sctx->db_inplace_flush_enabled = false;
+       sctx->db_flush_depth_inplace = false;
+       sctx->db_flush_stencil_inplace = false;
        si_mark_atom_dirty(sctx, &sctx->db_render_state);
 }
 
@@ -236,17 +245,20 @@ void si_flush_depth_textures(struct si_context *sctx,
 
        while (mask) {
                struct pipe_sampler_view *view;
+               struct si_sampler_view *sview;
                struct r600_texture *tex;
 
                i = u_bit_scan(&mask);
 
                view = textures->views.views[i];
                assert(view);
+               sview = (struct si_sampler_view*)view;
 
                tex = (struct r600_texture *)view->texture;
                assert(tex->is_depth && !tex->is_flushing_texture);
 
                si_blit_decompress_depth_in_place(sctx, tex,
+                                                 sview->is_stencil_sampler,
                                                  view->u.tex.first_level, view->u.tex.last_level,
                                                  0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
        }
@@ -436,9 +448,13 @@ static void si_decompress_subresource(struct pipe_context *ctx,
        struct r600_texture *rtex = (struct r600_texture*)tex;
 
        if (rtex->is_depth && !rtex->is_flushing_texture) {
-               si_blit_decompress_depth_in_place(sctx, rtex,
+               si_blit_decompress_depth_in_place(sctx, rtex, false,
                                                  level, level,
                                                  first_layer, last_layer);
+               if (rtex->surface.flags & RADEON_SURF_SBUFFER)
+                       si_blit_decompress_depth_in_place(sctx, rtex, true,
+                                                         level, level,
+                                                         first_layer, last_layer);
        } else if (rtex->fmask.size || rtex->cmask.size) {
                si_blit_decompress_color(ctx, rtex, level, level,
                                         first_layer, last_layer);
index 41b2832322c9262e7ce3c10a9fdbe0468625664d..a882d36e17041e6affbdb53b360463d0b49a9c64 100644 (file)
@@ -100,6 +100,7 @@ struct si_sampler_view {
          * [4..7] = buffer descriptor */
        uint32_t                        state[8];
        uint32_t                        fmask_state[8];
+       bool is_stencil_sampler;
 };
 
 struct si_sampler_state {
@@ -237,7 +238,8 @@ struct si_context {
        bool                    dbcb_depth_copy_enabled;
        bool                    dbcb_stencil_copy_enabled;
        unsigned                dbcb_copy_sample;
-       bool                    db_inplace_flush_enabled;
+       bool                    db_flush_depth_inplace;
+       bool                    db_flush_stencil_inplace;
        bool                    db_depth_clear;
        bool                    db_depth_disable_expclear;
        unsigned                ps_db_shader_control;
index 5d4e579b39277e8e41ea2781bc9316538f329c95..85074bdbf5be9bd312a7f91b63ab939b8e038f7b 100644 (file)
@@ -998,10 +998,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
                            S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
                            S_028000_COPY_CENTROID(1) |
                            S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
-       } else if (sctx->db_inplace_flush_enabled) {
+       } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
                radeon_emit(cs,
-                           S_028000_DEPTH_COMPRESS_DISABLE(1) |
-                           S_028000_STENCIL_COMPRESS_DISABLE(1));
+                           S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
+                           S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
        } else if (sctx->db_depth_clear) {
                radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1));
        } else {
@@ -2411,6 +2411,12 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
        pipe_resource_reference(&view->base.texture, texture);
        view->resource = &tmp->resource;
 
+       if (state->format == PIPE_FORMAT_X24S8_UINT ||
+           state->format == PIPE_FORMAT_S8X24_UINT ||
+           state->format == PIPE_FORMAT_X32_S8X24_UINT ||
+           state->format == PIPE_FORMAT_S8_UINT)
+               view->is_stencil_sampler = true;
+
        /* Buffer resource. */
        if (texture->target == PIPE_BUFFER) {
                unsigned stride, num_records;