radeonsi: implement fast stencil clear
authorMarek Olšák <marek.olsak@amd.com>
Thu, 10 Dec 2015 00:37:39 +0000 (01:37 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 11 Dec 2015 14:25:12 +0000 (15:25 +0100)
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c

index dd23ed5be890ed56c7cf1f0dc4e000647617f4f6..8fbbe88c62b5d16c98aa1595e92e668c138e8678 100644 (file)
@@ -222,6 +222,8 @@ struct r600_texture {
        struct r600_resource            *htile_buffer;
        bool                            depth_cleared; /* if it was cleared at least once */
        float                           depth_clear_value;
+       bool                            stencil_cleared; /* if it was cleared at least once */
+       uint8_t                         stencil_clear_value;
 
        bool                            non_disp_tiling; /* R600-Cayman only */
 };
index 13d8e6f2a5f26833d60e6f107b07f125202fac33..75a9d56d1103c676170325357d8932a75823919c 100644 (file)
@@ -377,22 +377,39 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
                }
        }
 
-       if (buffers & PIPE_CLEAR_DEPTH &&
-           zstex && zstex->htile_buffer &&
+       if (zstex && zstex->htile_buffer &&
            zsbuf->u.tex.level == 0 &&
            zsbuf->u.tex.first_layer == 0 &&
            zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
-               /* Need to disable EXPCLEAR temporarily if clearing
-                * to a new value. */
-               if (zstex->depth_cleared && zstex->depth_clear_value != depth) {
-                       sctx->db_depth_disable_expclear = true;
+               if (buffers & PIPE_CLEAR_DEPTH) {
+                       /* Need to disable EXPCLEAR temporarily if clearing
+                        * to a new value. */
+                       if (zstex->depth_cleared && zstex->depth_clear_value != depth) {
+                               sctx->db_depth_disable_expclear = true;
+                       }
+
+                       zstex->depth_clear_value = depth;
+                       sctx->framebuffer.dirty_zsbuf = true;
+                       si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
+                       sctx->db_depth_clear = true;
+                       si_mark_atom_dirty(sctx, &sctx->db_render_state);
                }
 
-               zstex->depth_clear_value = depth;
-               sctx->framebuffer.dirty_zsbuf = true;
-               si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
-               sctx->db_depth_clear = true;
-               si_mark_atom_dirty(sctx, &sctx->db_render_state);
+               if (buffers & PIPE_CLEAR_STENCIL) {
+                       stencil &= 0xff;
+
+                       /* Need to disable EXPCLEAR temporarily if clearing
+                        * to a new value. */
+                       if (zstex->stencil_cleared && zstex->stencil_clear_value != stencil) {
+                               sctx->db_stencil_disable_expclear = true;
+                       }
+
+                       zstex->stencil_clear_value = stencil;
+                       sctx->framebuffer.dirty_zsbuf = true;
+                       si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_STENCIL_CLEAR */
+                       sctx->db_stencil_clear = true;
+                       si_mark_atom_dirty(sctx, &sctx->db_render_state);
+               }
        }
 
        si_blitter_begin(ctx, SI_CLEAR);
@@ -407,6 +424,13 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
                zstex->depth_cleared = true;
                si_mark_atom_dirty(sctx, &sctx->db_render_state);
        }
+
+       if (sctx->db_stencil_clear) {
+               sctx->db_stencil_clear = false;
+               sctx->db_stencil_disable_expclear = false;
+               zstex->stencil_cleared = true;
+               si_mark_atom_dirty(sctx, &sctx->db_render_state);
+       }
 }
 
 static void si_clear_render_target(struct pipe_context *ctx,
index 834c3587569a3bb51435436c3c288cd0a8275b0a..65c7e198d1d9d802d7b11cf2e4e404c80a798f91 100644 (file)
@@ -253,6 +253,8 @@ struct si_context {
        bool                    db_flush_stencil_inplace;
        bool                    db_depth_clear;
        bool                    db_depth_disable_expclear;
+       bool                    db_stencil_clear;
+       bool                    db_stencil_disable_expclear;
        unsigned                ps_db_shader_control;
 
        /* Emitted draw state. */
index c2d4d44513cff83c7eda72f89d4bf7adf29a20b9..4086819c1acc300f4a1f71788d1e8c2278698ba0 100644 (file)
@@ -1090,10 +1090,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
                radeon_emit(cs,
                            S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
                            S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
-       } else if (sctx->db_depth_clear) {
-               radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1));
        } else {
-               radeon_emit(cs, 0);
+               radeon_emit(cs,
+                           S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
+                           S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear));
        }
 
        /* DB_COUNT_CONTROL (occlusion queries) */
@@ -1120,12 +1120,9 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
        }
 
        /* DB_RENDER_OVERRIDE2 */
-       if (sctx->db_depth_disable_expclear) {
-               radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
-                       S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1));
-       } else {
-               radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
-       }
+       radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
+               S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
+               S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear));
 
        db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) |
                            sctx->ps_db_shader_control;
@@ -2217,7 +2214,10 @@ static void si_init_depth_surface(struct si_context *sctx,
                z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
                          S_028040_ALLOW_EXPCLEAR(1);
 
-               if (!(rtex->surface.flags & RADEON_SURF_SBUFFER))
+               if (rtex->surface.flags & RADEON_SURF_SBUFFER)
+                       s_info |= S_028044_ALLOW_EXPCLEAR(1);
+               else
+                       /* Use all of the htile_buffer for depth if there's no stencil. */
                        s_info |= S_028044_TILE_STENCIL_DISABLE(1);
 
                uint64_t va = rtex->htile_buffer->gpu_address;
@@ -2486,8 +2486,11 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
                radeon_emit(cs, zb->db_depth_size);     /* R_028058_DB_DEPTH_SIZE */
                radeon_emit(cs, zb->db_depth_slice);    /* R_02805C_DB_DEPTH_SLICE */
 
+               radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
+               radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */
+               radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */
+
                radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
-               radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
                radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
                                       zb->pa_su_poly_offset_db_fmt_cntl);
        } else if (sctx->framebuffer.dirty_zsbuf) {
@@ -3578,7 +3581,6 @@ static void si_init_config(struct si_context *sctx)
        si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
        si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0));
        si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0));
-       si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0);
        si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
        si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
        si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);