From 43b0269ce3e349590c49f082008ecfe9e7f85014 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Thu, 7 Jun 2018 12:13:48 -0400 Subject: [PATCH] radeonsi: emit_db_render_state packets optimization MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Remembering latest states of registers to eliminate redunant SET_CONTEXT_REG packets Signed-off-by: Sonny Jiang Signed-off-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_build_pm4.h | 43 +++++++++++++++ src/gallium/drivers/radeonsi/si_gfx_cs.c | 3 ++ src/gallium/drivers/radeonsi/si_pipe.h | 2 + src/gallium/drivers/radeonsi/si_state.c | 60 +++++++++++---------- src/gallium/drivers/radeonsi/si_state.h | 16 ++++++ 5 files changed, 95 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h index 22f5558b7f1..45d943fac90 100644 --- a/src/gallium/drivers/radeonsi/si_build_pm4.h +++ b/src/gallium/drivers/radeonsi/si_build_pm4.h @@ -110,4 +110,47 @@ static inline void radeon_set_uconfig_reg_idx(struct radeon_winsys_cs *cs, radeon_emit(cs, value); } +/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */ +static inline void radeon_opt_set_context_reg(struct si_context *sctx, unsigned offset, + enum si_tracked_reg reg, unsigned value) +{ + struct radeon_winsys_cs *cs = sctx->gfx_cs; + + if (!(sctx->tracked_regs.reg_saved & (1 << reg)) || + sctx->tracked_regs.reg_value[reg] != value ) { + + radeon_set_context_reg(cs, offset, value); + + sctx->tracked_regs.reg_saved |= 1 << reg; + sctx->tracked_regs.reg_value[reg] = value; + } +} + +/** + * Set 2 consecutive registers if any registers value is different. + * @param offset starting register offset + * @param value1 is written to first register + * @param value2 is written to second register + */ +static inline void radeon_opt_set_context_reg2(struct si_context *sctx, unsigned offset, + enum si_tracked_reg reg, unsigned value1, + unsigned value2) +{ + struct radeon_winsys_cs *cs = sctx->gfx_cs; + + if (!(sctx->tracked_regs.reg_saved & (1 << reg)) || + !(sctx->tracked_regs.reg_saved & (1 << (reg + 1))) || + sctx->tracked_regs.reg_value[reg] != value1 || + sctx->tracked_regs.reg_value[reg+1] != value2 ) { + + radeon_set_context_reg_seq(cs, offset, 2); + radeon_emit(cs, value1); + radeon_emit(cs, value2); + + sctx->tracked_regs.reg_value[reg] = value1; + sctx->tracked_regs.reg_value[reg+1] = value2; + sctx->tracked_regs.reg_saved |= 3 << reg; + } +} + #endif diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index ec74c1bc703..0b9a0205874 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -321,4 +321,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->last_num_tcs_input_cp = -1; ctx->cs_shader_state.initialized = false; + + /* Set all saved registers state to unknown */ + ctx->tracked_regs.reg_saved = 0; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index cf9b124fe5a..4edefbdbd25 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1033,6 +1033,8 @@ struct si_context { void (*dma_clear_buffer)(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value); + + struct si_tracked_regs tracked_regs; }; /* cik_sdma.c */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 3a7e928df53..c95b92940aa 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1343,28 +1343,25 @@ void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st) static void si_emit_db_render_state(struct si_context *sctx) { - struct radeon_winsys_cs *cs = sctx->gfx_cs; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; - unsigned db_shader_control; - - radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); + unsigned db_shader_control, db_render_control, db_count_control; /* DB_RENDER_CONTROL */ if (sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled) { - radeon_emit(cs, - S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | - S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | - S_028000_COPY_CENTROID(1) | - S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); + db_render_control = + S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | + S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | + S_028000_COPY_CENTROID(1) | + S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample); } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { - radeon_emit(cs, - S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | - S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace)); + db_render_control = + S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | + S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace); } else { - radeon_emit(cs, - S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | - S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear)); + db_render_control = + S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | + S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear); } /* DB_COUNT_CONTROL (occlusion queries) */ @@ -1373,28 +1370,33 @@ static void si_emit_db_render_state(struct si_context *sctx) bool perfect = sctx->num_perfect_occlusion_queries > 0; if (sctx->chip_class >= CIK) { - radeon_emit(cs, - S_028004_PERFECT_ZPASS_COUNTS(perfect) | - S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | - S_028004_ZPASS_ENABLE(1) | - S_028004_SLICE_EVEN_ENABLE(1) | - S_028004_SLICE_ODD_ENABLE(1)); + db_count_control = + S_028004_PERFECT_ZPASS_COUNTS(perfect) | + S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | + S_028004_ZPASS_ENABLE(1) | + S_028004_SLICE_EVEN_ENABLE(1) | + S_028004_SLICE_ODD_ENABLE(1); } else { - radeon_emit(cs, - S_028004_PERFECT_ZPASS_COUNTS(perfect) | - S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); + db_count_control = + S_028004_PERFECT_ZPASS_COUNTS(perfect) | + S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples); } } else { /* Disable occlusion queries. */ if (sctx->chip_class >= CIK) { - radeon_emit(cs, 0); + db_count_control = 0; } else { - radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1)); + db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); } } + radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, + SI_TRACKED_DB_RENDER_CONTROL, db_render_control, + db_count_control); + /* DB_RENDER_OVERRIDE2 */ - radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, + radeon_opt_set_context_reg(sctx, R_028010_DB_RENDER_OVERRIDE2, + SI_TRACKED_DB_RENDER_OVERRIDE2, S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4)); @@ -1415,8 +1417,8 @@ static void si_emit_db_render_state(struct si_context *sctx) !sctx->screen->rbplus_allowed) db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); - radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, - db_shader_control); + radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, + SI_TRACKED_DB_SHADER_CONTROL, db_shader_control); } /* diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index d235f31c792..fb5f7211895 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -206,6 +206,22 @@ struct si_shader_data { uint32_t sh_base[SI_NUM_SHADERS]; }; +/* The list of registers whose emitted values are remembered by si_context. */ +enum si_tracked_reg { + SI_TRACKED_DB_RENDER_CONTROL, /* 2 consecutive registers */ + SI_TRACKED_DB_COUNT_CONTROL, + + SI_TRACKED_DB_RENDER_OVERRIDE2, + SI_TRACKED_DB_SHADER_CONTROL, + + SI_NUM_TRACKED_REGS, +}; + +struct si_tracked_regs { + uint32_t reg_saved; + uint32_t reg_value[SI_NUM_TRACKED_REGS]; +}; + /* Private read-write buffer slots. */ enum { SI_ES_RING_ESGS, -- 2.30.2