radeonsi: implement EXPCLEAR optimization for depth
authorMarek Olšák <marek.olsak@amd.com>
Sat, 23 Aug 2014 14:46:53 +0000 (16:46 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 1 Sep 2014 19:18:52 +0000 (21:18 +0200)
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state_draw.c

index cac6faac951bd4c5d29e372af2b58069c38c850e..b9a35c76633b796a1804968a248bf8b95dd1c514 100644 (file)
@@ -191,6 +191,7 @@ struct r600_texture {
 
        /* Depth buffer compression and fast clear. */
        struct r600_resource            *htile_buffer;
+       bool                            depth_cleared; /* if it was cleared at least once */
        float                           depth_clear_value;
 
        bool                            non_disp_tiling; /* R600-Cayman only */
index 4e77d74a79f9a3f92e7766c2392063480c4f7bbb..96d27ec062f97a0957042d54c33cdac5c3babd13 100644 (file)
@@ -362,6 +362,12 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
            zsbuf->u.tex.level == 0 &&
            zsbuf->u.tex.first_layer == 0 &&
            zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
+               /* Need to disable EXPCLEAR temporarily if clearing
+                * to a new value. */
+               if (zstex->depth_cleared && zstex->depth_clear_value != depth) {
+                       sctx->db_depth_disable_expclear = true;
+               }
+
                zstex->depth_clear_value = depth;
                sctx->framebuffer.atom.dirty = true; /* updates DB_DEPTH_CLEAR */
                sctx->db_depth_clear = true;
@@ -373,7 +379,11 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
                           buffers, color, depth, stencil);
        si_blitter_end(ctx);
 
-       sctx->db_depth_clear = false;
+       if (sctx->db_depth_clear) {
+               sctx->db_depth_clear = false;
+               sctx->db_depth_disable_expclear = false;
+               zstex->depth_cleared = true;
+       }
 }
 
 static void si_clear_render_target(struct pipe_context *ctx,
index 0a79983a49a64f6438bdc5c0d74802434016968b..55643d6eb7dc8018e1fc119a7dffc81b34014f7f 100644 (file)
@@ -165,6 +165,7 @@ struct si_context {
        unsigned dbcb_copy_sample;
        bool db_inplace_flush_enabled;
        bool db_depth_clear;
+       bool db_depth_disable_expclear;
 };
 
 /* si_blit.c */
index 9edc23fa1fd46edae6905ee6b362ce21568a7b86..bb8deb6ccff38c7a6bacf25cb4d1eecd96719cc1 100644 (file)
@@ -1823,7 +1823,8 @@ static void si_init_depth_surface(struct si_context *sctx,
        /* HiZ aka depth buffer htile */
        /* use htile only for first level */
        if (rtex->htile_buffer && !level) {
-               z_info |= S_028040_TILE_SURFACE_ENABLE(1);
+               z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
+                         S_028040_ALLOW_EXPCLEAR(1);
 
                /* This is optimal for the clear value of 1.0 and using
                 * the LESS and LEQUAL test functions. Set this to 0
index 4fb205a04b83ed5e1b3dd701b316bdc454ff6c89..2e9d9516d7c6d194263dea230bbfc23228d23ad8 100644 (file)
@@ -758,6 +758,14 @@ static void si_state_draw(struct si_context *sctx,
                si_pm4_set_reg(pm4, R_028000_DB_RENDER_CONTROL, 0);
        }
 
+       /* DB_RENDER_OVERRIDE2 */
+       if (sctx->db_depth_disable_expclear) {
+               si_pm4_set_reg(pm4, R_028010_DB_RENDER_OVERRIDE2,
+                              S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1));
+       } else {
+               si_pm4_set_reg(pm4, R_028010_DB_RENDER_OVERRIDE2, 0);
+       }
+
        if (info->count_from_stream_output) {
                struct r600_so_target *t =
                        (struct r600_so_target*)info->count_from_stream_output;