radeonsi:optimizing SET_CONTEXT_REG for shaders PS
authorSonny Jiang <sonny.jiang@amd.com>
Wed, 3 Oct 2018 15:53:12 +0000 (11:53 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 5 Oct 2018 23:04:13 +0000 (19:04 -0400)
Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_gfx_cs.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 1b931d71369a259043f2d332cbc34c8d7c237e98..5f1c69870094044ff0ad5cba9a6c31c734083684 100644 (file)
@@ -369,6 +369,13 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
                ctx->tracked_regs.reg_value[SI_TRACKED_SPI_VS_OUT_CONFIG]  = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_POS_FORMAT]  = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VTE_CNTL]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ENA]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ADDR]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SPI_BARYC_CNTL]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_IN_CONTROL]  = 0x00000002;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_Z_FORMAT]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK]  = 0xffffffff;
 
                /* Set all saved registers state to saved. */
                ctx->tracked_regs.reg_saved = 0xffffffffffffffff;
index bf1ae9f18f86f22f57b885688947b60d0053db96..878b67f0ed342bea386a87be1c4bfa48c1547311 100644 (file)
@@ -302,6 +302,17 @@ enum si_tracked_reg {
        SI_TRACKED_SPI_SHADER_POS_FORMAT,
        SI_TRACKED_PA_CL_VTE_CNTL,
 
+       SI_TRACKED_SPI_PS_INPUT_ENA, /* 2 consecutive registers */
+       SI_TRACKED_SPI_PS_INPUT_ADDR,
+
+       SI_TRACKED_SPI_BARYC_CNTL,
+       SI_TRACKED_SPI_PS_IN_CONTROL,
+
+       SI_TRACKED_SPI_SHADER_Z_FORMAT, /* 2 consecutive registers */
+       SI_TRACKED_SPI_SHADER_COL_FORMAT,
+
+       SI_TRACKED_CB_SHADER_MASK,
+
        SI_NUM_TRACKED_REGS,
 };
 
index 25c01382a0851200f8fd211956e9a93d3825978c..b074214bbd31a55313f3bcc48443e91575972971 100644 (file)
@@ -1124,6 +1124,36 @@ static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
        return value;
 }
 
+static void si_emit_shader_ps(struct si_context *sctx)
+{
+       struct si_shader *shader = sctx->queued.named.ps->shader;
+       if (!shader)
+               return;
+
+       /* R_0286CC_SPI_PS_INPUT_ENA, R_0286D0_SPI_PS_INPUT_ADDR*/
+       radeon_opt_set_context_reg2(sctx, R_0286CC_SPI_PS_INPUT_ENA,
+                                   SI_TRACKED_SPI_PS_INPUT_ENA,
+                                   shader->ctx_reg.ps.spi_ps_input_ena,
+                                   shader->ctx_reg.ps.spi_ps_input_addr);
+
+       radeon_opt_set_context_reg(sctx, R_0286E0_SPI_BARYC_CNTL,
+                                  SI_TRACKED_SPI_BARYC_CNTL,
+                                  shader->ctx_reg.ps.spi_baryc_cntl);
+       radeon_opt_set_context_reg(sctx, R_0286D8_SPI_PS_IN_CONTROL,
+                                  SI_TRACKED_SPI_PS_IN_CONTROL,
+                                  shader->ctx_reg.ps.spi_ps_in_control);
+
+       /* R_028710_SPI_SHADER_Z_FORMAT, R_028714_SPI_SHADER_COL_FORMAT */
+       radeon_opt_set_context_reg2(sctx, R_028710_SPI_SHADER_Z_FORMAT,
+                                   SI_TRACKED_SPI_SHADER_Z_FORMAT,
+                                   shader->ctx_reg.ps.spi_shader_z_format,
+                                   shader->ctx_reg.ps.spi_shader_col_format);
+
+       radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK,
+                                  SI_TRACKED_CB_SHADER_MASK,
+                                  shader->ctx_reg.ps.cb_shader_mask);
+}
+
 static void si_shader_ps(struct si_shader *shader)
 {
        struct tgsi_shader_info *info = &shader->selector->info;
@@ -1181,6 +1211,8 @@ static void si_shader_ps(struct si_shader *shader)
        if (!pm4)
                return;
 
+       pm4->atom.emit = si_emit_shader_ps;
+
        /* SPI_BARYC_CNTL.POS_FLOAT_LOCATION
         * Possible vaules:
         * 0 -> Position = pixel center
@@ -1223,24 +1255,20 @@ static void si_shader_ps(struct si_shader *shader)
            !info->writes_z && !info->writes_stencil && !info->writes_samplemask)
                spi_shader_col_format = V_028714_SPI_SHADER_32_R;
 
-       si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena);
-       si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR,
-                      shader->config.spi_ps_input_addr);
+       shader->ctx_reg.ps.spi_ps_input_ena = input_ena;
+       shader->ctx_reg.ps.spi_ps_input_addr = shader->config.spi_ps_input_addr;
 
        /* Set interpolation controls. */
        spi_ps_in_control = S_0286D8_NUM_INTERP(si_get_ps_num_interp(shader));
 
-       /* Set registers. */
-       si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
-       si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
-
-       si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT,
-                      ac_get_spi_shader_z_format(info->writes_z,
-                                                 info->writes_stencil,
-                                                 info->writes_samplemask));
-
-       si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, spi_shader_col_format);
-       si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, cb_shader_mask);
+       shader->ctx_reg.ps.spi_baryc_cntl = spi_baryc_cntl;
+       shader->ctx_reg.ps.spi_ps_in_control = spi_ps_in_control;
+       shader->ctx_reg.ps.spi_shader_z_format =
+                       ac_get_spi_shader_z_format(info->writes_z,
+                                                  info->writes_stencil,
+                                                  info->writes_samplemask);
+       shader->ctx_reg.ps.spi_shader_col_format = spi_shader_col_format;
+       shader->ctx_reg.ps.cb_shader_mask = cb_shader_mask;
 
        va = shader->bo->gpu_address;
        si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);