radeonsi: emit_db_render_state packets optimization
authorSonny Jiang <sonny.jiang@amd.com>
Thu, 7 Jun 2018 16:13:48 +0000 (12:13 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 8 Jun 2018 03:26:25 +0000 (23:26 -0400)
Remembering latest states of registers to eliminate redunant SET_CONTEXT_REG packets

Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_build_pm4.h
src/gallium/drivers/radeonsi/si_gfx_cs.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h

index 22f5558b7f1bed57fb065035c09b43a4ad57bd46..45d943fac90f9f2a26490948f71152a7cccfc9a9 100644 (file)
@@ -110,4 +110,47 @@ static inline void radeon_set_uconfig_reg_idx(struct radeon_winsys_cs *cs,
        radeon_emit(cs, value);
 }
 
+/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */
+static inline void radeon_opt_set_context_reg(struct si_context *sctx, unsigned offset,
+                                             enum si_tracked_reg reg, unsigned value)
+{
+       struct radeon_winsys_cs *cs = sctx->gfx_cs;
+
+       if (!(sctx->tracked_regs.reg_saved & (1 << reg)) ||
+           sctx->tracked_regs.reg_value[reg] != value ) {
+
+               radeon_set_context_reg(cs, offset, value);
+
+               sctx->tracked_regs.reg_saved |= 1 << reg;
+               sctx->tracked_regs.reg_value[reg] = value;
+       }
+}
+
+/**
+ * Set 2 consecutive registers if any registers value is different.
+ * @param offset        starting register offset
+ * @param value1        is written to first register
+ * @param value2        is written to second register
+ */
+static inline void radeon_opt_set_context_reg2(struct si_context *sctx, unsigned offset,
+                                              enum si_tracked_reg reg, unsigned value1,
+                                              unsigned value2)
+{
+       struct radeon_winsys_cs *cs = sctx->gfx_cs;
+
+       if (!(sctx->tracked_regs.reg_saved & (1 << reg)) ||
+           !(sctx->tracked_regs.reg_saved & (1 << (reg + 1))) ||
+           sctx->tracked_regs.reg_value[reg] != value1 ||
+           sctx->tracked_regs.reg_value[reg+1] != value2 ) {
+
+               radeon_set_context_reg_seq(cs, offset, 2);
+               radeon_emit(cs, value1);
+               radeon_emit(cs, value2);
+
+               sctx->tracked_regs.reg_value[reg] = value1;
+               sctx->tracked_regs.reg_value[reg+1] = value2;
+               sctx->tracked_regs.reg_saved |= 3 << reg;
+       }
+}
+
 #endif
index ec74c1bc70368363cdf87a85a0b696ce0277f527..0b9a020587452b52b6e5786cc06f4d676c2191a0 100644 (file)
@@ -321,4 +321,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
        ctx->last_num_tcs_input_cp = -1;
 
        ctx->cs_shader_state.initialized = false;
+
+       /* Set all saved registers state to unknown */
+       ctx->tracked_regs.reg_saved = 0;
 }
index cf9b124fe5a370f558808433a1678ac9016522ec..4edefbdbd254b86e78aec86d48187c7bf72dd416 100644 (file)
@@ -1033,6 +1033,8 @@ struct si_context {
 
        void (*dma_clear_buffer)(struct si_context *sctx, struct pipe_resource *dst,
                                 uint64_t offset, uint64_t size, unsigned value);
+
+       struct si_tracked_regs                  tracked_regs;
 };
 
 /* cik_sdma.c */
index 3a7e928df5325aa7d9ddd655677e8f1328fe13a6..c95b92940aa4caa1f80edc84d9b11a7700d4c155 100644 (file)
@@ -1343,28 +1343,25 @@ void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st)
 
 static void si_emit_db_render_state(struct si_context *sctx)
 {
-       struct radeon_winsys_cs *cs = sctx->gfx_cs;
        struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
-       unsigned db_shader_control;
-
-       radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
+       unsigned db_shader_control, db_render_control, db_count_control;
 
        /* DB_RENDER_CONTROL */
        if (sctx->dbcb_depth_copy_enabled ||
            sctx->dbcb_stencil_copy_enabled) {
-               radeon_emit(cs,
-                           S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
-                           S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
-                           S_028000_COPY_CENTROID(1) |
-                           S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
+               db_render_control =
+                       S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
+                       S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
+                       S_028000_COPY_CENTROID(1) |
+                       S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample);
        } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
-               radeon_emit(cs,
-                           S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
-                           S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
+               db_render_control =
+                       S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
+                       S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace);
        } else {
-               radeon_emit(cs,
-                           S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
-                           S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear));
+               db_render_control =
+                       S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
+                       S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear);
        }
 
        /* DB_COUNT_CONTROL (occlusion queries) */
@@ -1373,28 +1370,33 @@ static void si_emit_db_render_state(struct si_context *sctx)
                bool perfect = sctx->num_perfect_occlusion_queries > 0;
 
                if (sctx->chip_class >= CIK) {
-                       radeon_emit(cs,
-                                   S_028004_PERFECT_ZPASS_COUNTS(perfect) |
-                                   S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
-                                   S_028004_ZPASS_ENABLE(1) |
-                                   S_028004_SLICE_EVEN_ENABLE(1) |
-                                   S_028004_SLICE_ODD_ENABLE(1));
+                       db_count_control =
+                               S_028004_PERFECT_ZPASS_COUNTS(perfect) |
+                               S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
+                               S_028004_ZPASS_ENABLE(1) |
+                               S_028004_SLICE_EVEN_ENABLE(1) |
+                               S_028004_SLICE_ODD_ENABLE(1);
                } else {
-                       radeon_emit(cs,
-                                   S_028004_PERFECT_ZPASS_COUNTS(perfect) |
-                                   S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
+                       db_count_control =
+                               S_028004_PERFECT_ZPASS_COUNTS(perfect) |
+                               S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples);
                }
        } else {
                /* Disable occlusion queries. */
                if (sctx->chip_class >= CIK) {
-                       radeon_emit(cs, 0);
+                       db_count_control = 0;
                } else {
-                       radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
+                       db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1);
                }
        }
 
+       radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL,
+                                   SI_TRACKED_DB_RENDER_CONTROL, db_render_control,
+                                   db_count_control);
+
        /* DB_RENDER_OVERRIDE2 */
-       radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
+       radeon_opt_set_context_reg(sctx,  R_028010_DB_RENDER_OVERRIDE2,
+               SI_TRACKED_DB_RENDER_OVERRIDE2,
                S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
                S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
                S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
@@ -1415,8 +1417,8 @@ static void si_emit_db_render_state(struct si_context *sctx)
            !sctx->screen->rbplus_allowed)
                db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
 
-       radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
-                              db_shader_control);
+       radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL,
+                                  SI_TRACKED_DB_SHADER_CONTROL, db_shader_control);
 }
 
 /*
index d235f31c7925264817e95fd3850638a1a5f0801b..fb5f7211895dc986b7469b27d164475dff911327 100644 (file)
@@ -206,6 +206,22 @@ struct si_shader_data {
        uint32_t                sh_base[SI_NUM_SHADERS];
 };
 
+/* The list of registers whose emitted values are remembered by si_context. */
+enum si_tracked_reg {
+       SI_TRACKED_DB_RENDER_CONTROL, /* 2 consecutive registers */
+       SI_TRACKED_DB_COUNT_CONTROL,
+
+       SI_TRACKED_DB_RENDER_OVERRIDE2,
+       SI_TRACKED_DB_SHADER_CONTROL,
+
+       SI_NUM_TRACKED_REGS,
+};
+
+struct si_tracked_regs {
+       uint32_t                reg_saved;
+       uint32_t                reg_value[SI_NUM_TRACKED_REGS];
+};
+
 /* Private read-write buffer slots. */
 enum {
        SI_ES_RING_ESGS,