From c6737756addee6c1ed80c042ba91ede87f50732a Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Wed, 18 Jul 2018 17:48:50 -0400 Subject: [PATCH] radeonsi: emit_spi_map packets optimization MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit v2: marek: remove an empty line before break; rename reg_val_seq -> spi_ps_input_cntl "type * x" -> "type *x" Signed-off-by: Sonny Jiang Signed-off-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_build_pm4.h | 22 +++++++++++++++++++ src/gallium/drivers/radeonsi/si_gfx_cs.c | 3 +++ src/gallium/drivers/radeonsi/si_state.h | 1 + .../drivers/radeonsi/si_state_shaders.c | 21 +++++++++++------- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h index b339cd57ce7..f10929a67f9 100644 --- a/src/gallium/drivers/radeonsi/si_build_pm4.h +++ b/src/gallium/drivers/radeonsi/si_build_pm4.h @@ -214,4 +214,26 @@ static inline void radeon_opt_set_context_reg4(struct si_context *sctx, unsigned } } +/** + * Set consecutive registers if any registers value is different. + */ +static inline void radeon_opt_set_context_regn(struct si_context *sctx, unsigned offset, + unsigned *value, unsigned *saved_val, + unsigned num) +{ + struct radeon_cmdbuf *cs = sctx->gfx_cs; + int i, j; + + for (i = 0; i < num; i++) { + if (saved_val[i] != value[i]) { + radeon_set_context_reg_seq(cs, offset, num); + for (j = 0; j < num; j++) + radeon_emit(cs, value[j]); + + memcpy(saved_val, value, sizeof(uint32_t) * num); + break; + } + } +} + #endif diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 628b6c50e46..9dbd4c64f2a 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -353,4 +353,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx) /* Set all saved registers state to unknown. */ ctx->tracked_regs.reg_saved = 0; } + + /* 0xffffffff is a impossible value to register SPI_PS_INPUT_CNTL_n */ + memset(ctx->tracked_regs.spi_ps_input_cntl, 0xff, sizeof(uint32_t) * 32); } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 71056c76c38..1edf5d646b6 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -287,6 +287,7 @@ enum si_tracked_reg { struct si_tracked_regs { uint32_t reg_saved; uint32_t reg_value[SI_NUM_TRACKED_REGS]; + uint32_t spi_ps_input_cntl[32]; }; /* Private read-write buffer slots. */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index ffc8821df09..4e0320a226d 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2634,27 +2634,25 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, static void si_emit_spi_map(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; struct si_shader *ps = sctx->ps_shader.current; struct si_shader *vs = si_get_vs_state(sctx); struct tgsi_shader_info *psinfo = ps ? &ps->selector->info : NULL; unsigned i, num_interp, num_written = 0, bcol_interp[2]; + unsigned spi_ps_input_cntl[32]; if (!ps || !ps->selector->info.num_inputs) return; num_interp = si_get_ps_num_interp(ps); assert(num_interp > 0); - radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, num_interp); for (i = 0; i < psinfo->num_inputs; i++) { unsigned name = psinfo->input_semantic_name[i]; unsigned index = psinfo->input_semantic_index[i]; unsigned interpolate = psinfo->input_interpolate[i]; - radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, name, index, - interpolate)); - num_written++; + spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, name, + index, interpolate); if (name == TGSI_SEMANTIC_COLOR) { assert(index < ARRAY_SIZE(bcol_interp)); @@ -2669,12 +2667,19 @@ static void si_emit_spi_map(struct si_context *sctx) if (!(psinfo->colors_read & (0xf << (i * 4)))) continue; - radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, bcol, - i, bcol_interp[i])); - num_written++; + spi_ps_input_cntl[num_written++] = + si_get_ps_input_cntl(sctx, vs, bcol, i, bcol_interp[i]); + } } assert(num_interp == num_written); + + /* R_028644_SPI_PS_INPUT_CNTL_0 */ + /* Dota 2: Only ~16% of SPI map updates set different values. */ + /* Talos: Only ~9% of SPI map updates set different values. */ + radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0, + spi_ps_input_cntl, + sctx->tracked_regs.spi_ps_input_cntl, num_interp); } /** -- 2.30.2