radeonsi: emit_spi_map packets optimization
authorSonny Jiang <sonny.jiang@amd.com>
Wed, 18 Jul 2018 21:48:50 +0000 (17:48 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 20 Jul 2018 17:50:26 +0000 (13:50 -0400)
v2: marek: remove an empty line before break;
    rename reg_val_seq -> spi_ps_input_cntl
    "type * x" -> "type *x"

Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_build_pm4.h
src/gallium/drivers/radeonsi/si_gfx_cs.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index b339cd57ce74aecbbbfb3456c5fe62d3fc83f4c8..f10929a67f9681787b20c480f091b0ca16358a98 100644 (file)
@@ -214,4 +214,26 @@ static inline void radeon_opt_set_context_reg4(struct si_context *sctx, unsigned
        }
 }
 
+/**
+ * Set consecutive registers if any registers value is different.
+ */
+static inline void radeon_opt_set_context_regn(struct si_context *sctx, unsigned offset,
+                                              unsigned *value, unsigned *saved_val,
+                                              unsigned num)
+{
+       struct radeon_cmdbuf *cs = sctx->gfx_cs;
+       int i, j;
+
+       for (i = 0; i < num; i++) {
+               if (saved_val[i] != value[i]) {
+                       radeon_set_context_reg_seq(cs, offset, num);
+                       for (j = 0; j < num; j++)
+                               radeon_emit(cs, value[j]);
+
+                       memcpy(saved_val, value, sizeof(uint32_t) * num);
+                       break;
+               }
+       }
+}
+
 #endif
index 628b6c50e4637b998faa98d387b225839ae94e97..9dbd4c64f2a61e7c4eae77d83e4866f054056fa7 100644 (file)
@@ -353,4 +353,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
                /* Set all saved registers state to unknown. */
                ctx->tracked_regs.reg_saved = 0;
        }
+
+       /* 0xffffffff is a impossible value to register SPI_PS_INPUT_CNTL_n */
+       memset(ctx->tracked_regs.spi_ps_input_cntl, 0xff, sizeof(uint32_t) * 32);
 }
index 71056c76c38dd4e9c0b2b7696e9f7429d6f4d2be..1edf5d646b63fdb2007cb55566d872a07e850867 100644 (file)
@@ -287,6 +287,7 @@ enum si_tracked_reg {
 struct si_tracked_regs {
        uint32_t                reg_saved;
        uint32_t                reg_value[SI_NUM_TRACKED_REGS];
+       uint32_t                spi_ps_input_cntl[32];
 };
 
 /* Private read-write buffer slots. */
index ffc8821df09c395b8a8610a52e401144c08ad462..4e0320a226d823dc277ed040c23ea69206e5a006 100644 (file)
@@ -2634,27 +2634,25 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
 
 static void si_emit_spi_map(struct si_context *sctx)
 {
-       struct radeon_cmdbuf *cs = sctx->gfx_cs;
        struct si_shader *ps = sctx->ps_shader.current;
        struct si_shader *vs = si_get_vs_state(sctx);
        struct tgsi_shader_info *psinfo = ps ? &ps->selector->info : NULL;
        unsigned i, num_interp, num_written = 0, bcol_interp[2];
+       unsigned spi_ps_input_cntl[32];
 
        if (!ps || !ps->selector->info.num_inputs)
                return;
 
        num_interp = si_get_ps_num_interp(ps);
        assert(num_interp > 0);
-       radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, num_interp);
 
        for (i = 0; i < psinfo->num_inputs; i++) {
                unsigned name = psinfo->input_semantic_name[i];
                unsigned index = psinfo->input_semantic_index[i];
                unsigned interpolate = psinfo->input_interpolate[i];
 
-               radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, name, index,
-                                                    interpolate));
-               num_written++;
+               spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, name,
+                                                           index, interpolate);
 
                if (name == TGSI_SEMANTIC_COLOR) {
                        assert(index < ARRAY_SIZE(bcol_interp));
@@ -2669,12 +2667,19 @@ static void si_emit_spi_map(struct si_context *sctx)
                        if (!(psinfo->colors_read & (0xf << (i * 4))))
                                continue;
 
-                       radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, bcol,
-                                                            i, bcol_interp[i]));
-                       num_written++;
+                       spi_ps_input_cntl[num_written++] =
+                         si_get_ps_input_cntl(sctx, vs, bcol, i, bcol_interp[i]);
+
                }
        }
        assert(num_interp == num_written);
+
+       /* R_028644_SPI_PS_INPUT_CNTL_0 */
+       /* Dota 2: Only ~16% of SPI map updates set different values. */
+       /* Talos: Only ~9% of SPI map updates set different values. */
+       radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0,
+                                   spi_ps_input_cntl,
+                                   sctx->tracked_regs.spi_ps_input_cntl, num_interp);
 }
 
 /**