From eeb9170599468f4e9c0a48e1507a53cde66a9825 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Wed, 3 Oct 2018 11:53:09 -0400 Subject: [PATCH] radeonsi: optimizing SET_CONTEXT_REG for shaders ES MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Sonny Jiang Signed-off-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_gfx_cs.c | 1 + src/gallium/drivers/radeonsi/si_pm4.c | 3 +++ src/gallium/drivers/radeonsi/si_pm4.h | 11 +++++++++ src/gallium/drivers/radeonsi/si_state.h | 9 ++------ .../drivers/radeonsi/si_state_shaders.c | 23 ++++++++++++++++--- 5 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index bdb576f7e5c..95c02fbc1ea 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -349,6 +349,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE] = 0xffff; + ctx->tracked_regs.reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE] = 0x00000000; /* Set all saved registers state to saved. */ ctx->tracked_regs.reg_saved = 0xffffffff; diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index 446edea49a9..1e686d8060b 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -144,6 +144,9 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state) radeon_emit(cs, ib->gpu_address >> 32); radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff); } + + if (state->atom.emit) + state->atom.emit(sctx); } void si_pm4_reset_emitted(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h index 020da7a3903..b2db236c495 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.h +++ b/src/gallium/drivers/radeonsi/si_pm4.h @@ -33,6 +33,13 @@ // forward defines struct si_context; +/* State atoms are callbacks which write a sequence of packets into a GPU + * command buffer (AKA indirect buffer, AKA IB, AKA command stream, AKA CS). + */ +struct si_atom { + void (*emit)(struct si_context *ctx); +}; + struct si_pm4_state { /* optional indirect buffer */ @@ -52,6 +59,10 @@ struct si_pm4_state struct r600_resource *bo[SI_PM4_MAX_BO]; enum radeon_bo_usage bo_usage[SI_PM4_MAX_BO]; enum radeon_bo_priority bo_priority[SI_PM4_MAX_BO]; + + /* For shader states only */ + struct si_shader *shader; + struct si_atom atom; }; void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 89bb5b64a3e..7fb09ca953b 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -45,13 +45,6 @@ struct si_shader_selector; struct si_texture; struct si_qbo_state; -/* State atoms are callbacks which write a sequence of packets into a GPU - * command buffer (AKA indirect buffer, AKA IB, AKA command stream, AKA CS). - */ -struct si_atom { - void (*emit)(struct si_context *ctx); -}; - struct si_state_blend { struct si_pm4_state pm4; uint32_t cb_target_mask; @@ -284,6 +277,8 @@ enum si_tracked_reg { SI_TRACKED_PA_SC_CLIPRECT_RULE, + SI_TRACKED_VGT_ESGS_RING_ITEMSIZE, + SI_NUM_TRACKED_REGS, }; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 0bf783fc17e..fb0a2f1d190 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -452,7 +452,13 @@ static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader) else shader->pm4 = CALLOC_STRUCT(si_pm4_state); - return shader->pm4; + if (shader->pm4) { + shader->pm4->shader = shader; + return shader->pm4; + } else { + fprintf(stderr, "radeonsi: Failed to create pm4 state.\n"); + return NULL; + } } static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs) @@ -552,6 +558,18 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) } } +static void si_emit_shader_es(struct si_context *sctx) +{ + struct si_shader *shader = sctx->queued.named.es->shader; + + if (!shader) + return; + + radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE, + SI_TRACKED_VGT_ESGS_RING_ITEMSIZE, + shader->selector->esgs_itemsize / 4); +} + static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) { struct si_pm4_state *pm4; @@ -566,6 +584,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) if (!pm4) return; + pm4->atom.emit = si_emit_shader_es; va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY); @@ -581,8 +600,6 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0; - si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, - shader->selector->esgs_itemsize / 4); si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(va >> 40)); si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES, -- 2.30.2