radeonsi: optimizing SET_CONTEXT_REG for shaders ES
authorSonny Jiang <sonny.jiang@amd.com>
Wed, 3 Oct 2018 15:53:09 +0000 (11:53 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 5 Oct 2018 21:53:52 +0000 (17:53 -0400)
Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_gfx_cs.c
src/gallium/drivers/radeonsi/si_pm4.c
src/gallium/drivers/radeonsi/si_pm4.h
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index bdb576f7e5cc8bd32bd01e6ba597520e140c55e0..95c02fbc1ea64bc2a73bce9702075570e15c7f63 100644 (file)
@@ -349,6 +349,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ]  = 0x3f800000;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ]  = 0x3f800000;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE]     = 0xffff;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE]  = 0x00000000;
 
                /* Set all saved registers state to saved. */
                ctx->tracked_regs.reg_saved = 0xffffffff;
index 446edea49a94415026324126f3c8a6bb2d415c7d..1e686d8060b684915afb2d0f086d929704e2d3cc 100644 (file)
@@ -144,6 +144,9 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
                radeon_emit(cs, ib->gpu_address >> 32);
                radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff);
        }
+
+       if (state->atom.emit)
+               state->atom.emit(sctx);
 }
 
 void si_pm4_reset_emitted(struct si_context *sctx)
index 020da7a390343d59b542441cb353c83853b1ee7d..b2db236c495678e30b9916a6295316501a42a3d4 100644 (file)
 // forward defines
 struct si_context;
 
+/* State atoms are callbacks which write a sequence of packets into a GPU
+ * command buffer (AKA indirect buffer, AKA IB, AKA command stream, AKA CS).
+ */
+struct si_atom {
+       void (*emit)(struct si_context *ctx);
+};
+
 struct si_pm4_state
 {
        /* optional indirect buffer */
@@ -52,6 +59,10 @@ struct si_pm4_state
        struct r600_resource    *bo[SI_PM4_MAX_BO];
        enum radeon_bo_usage    bo_usage[SI_PM4_MAX_BO];
        enum radeon_bo_priority bo_priority[SI_PM4_MAX_BO];
+
+       /* For shader states only */
+       struct si_shader *shader;
+       struct si_atom atom;
 };
 
 void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode);
index 89bb5b64a3ecd1ccbdb72bb577fcd073c1bf1043..7fb09ca953b3f1cc18079ae8066a4fac0a1a5a09 100644 (file)
@@ -45,13 +45,6 @@ struct si_shader_selector;
 struct si_texture;
 struct si_qbo_state;
 
-/* State atoms are callbacks which write a sequence of packets into a GPU
- * command buffer (AKA indirect buffer, AKA IB, AKA command stream, AKA CS).
- */
-struct si_atom {
-       void (*emit)(struct si_context *ctx);
-};
-
 struct si_state_blend {
        struct si_pm4_state     pm4;
        uint32_t                cb_target_mask;
@@ -284,6 +277,8 @@ enum si_tracked_reg {
 
        SI_TRACKED_PA_SC_CLIPRECT_RULE,
 
+       SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
+
        SI_NUM_TRACKED_REGS,
 };
 
index 0bf783fc17e586019adba9d5dfb30cfeba66a0d3..fb0a2f1d190166aaa5899e253f63fe00cefef4b5 100644 (file)
@@ -452,7 +452,13 @@ static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader)
        else
                shader->pm4 = CALLOC_STRUCT(si_pm4_state);
 
-       return shader->pm4;
+       if (shader->pm4) {
+               shader->pm4->shader = shader;
+               return shader->pm4;
+       } else {
+               fprintf(stderr, "radeonsi: Failed to create pm4 state.\n");
+               return NULL;
+       }
 }
 
 static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs)
@@ -552,6 +558,18 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
        }
 }
 
+static void si_emit_shader_es(struct si_context *sctx)
+{
+       struct si_shader *shader = sctx->queued.named.es->shader;
+
+       if (!shader)
+               return;
+
+       radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+                                  SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
+                                  shader->selector->esgs_itemsize / 4);
+}
+
 static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
 {
        struct si_pm4_state *pm4;
@@ -566,6 +584,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
        if (!pm4)
                return;
 
+       pm4->atom.emit = si_emit_shader_es;
        va = shader->bo->gpu_address;
        si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
 
@@ -581,8 +600,6 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
 
        oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0;
 
-       si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
-                      shader->selector->esgs_itemsize / 4);
        si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
        si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(va >> 40));
        si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,