From aa2d846604b7e46f98c05242f4f97b3508bf183e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 24 Dec 2019 13:50:06 -0500 Subject: [PATCH] radeonsi/gfx10: move GE_PC_ALLOC setting to shader states The value is not changed. I just use a different way to compute it. The value will vary with NGG culling. Acked-by: Pierre-Eric Pelloux-Prayer --- src/gallium/drivers/radeonsi/si_gfx_cs.c | 4 +-- src/gallium/drivers/radeonsi/si_shader.h | 2 ++ src/gallium/drivers/radeonsi/si_state.c | 13 ------- src/gallium/drivers/radeonsi/si_state.h | 2 ++ .../drivers/radeonsi/si_state_shaders.c | 34 +++++++++++++++++++ 5 files changed, 40 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 270304ae9cb..38b2abd1acb 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -524,10 +524,10 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x0000001e; /* From GFX8 */ /* Set all cleared context registers to saved. */ - ctx->tracked_regs.reg_saved = 0xffffffffffffffff; + ctx->tracked_regs.reg_saved = ~(1ull << SI_TRACKED_GE_PC_ALLOC); /* uconfig reg */ ctx->last_gs_out_prim = 0; /* cleared by CLEAR_STATE */ } else { - /* Set all saved registers state to unknown. */ + /* Set all register values to unknown. */ ctx->tracked_regs.reg_saved = 0; ctx->last_gs_out_prim = -1; /* unknown */ } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 3a6ad2b5117..295db469be3 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -775,6 +775,7 @@ struct si_shader { unsigned pa_cl_vte_cntl; unsigned pa_cl_ngg_cntl; unsigned vgt_gs_max_vert_out; /* for API GS */ + unsigned ge_pc_alloc; /* uconfig register */ } ngg; struct { @@ -784,6 +785,7 @@ struct si_shader { unsigned spi_vs_out_config; unsigned spi_shader_pos_format; unsigned pa_cl_vte_cntl; + unsigned ge_pc_alloc; /* uconfig register */ } vs; struct { diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 9390206f72f..c811f76e6d8 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5669,19 +5669,6 @@ static void si_init_config(struct si_context *sctx) S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1)); si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0); - - if (sctx->family == CHIP_NAVI10 || - sctx->family == CHIP_NAVI12 || - sctx->family == CHIP_NAVI14) { - /* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */ - si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE); - si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0)); - si_pm4_cmd_end(pm4, false); - } - /* TODO: For culling, replace 128 with 256. */ - si_pm4_set_reg(pm4, R_030980_GE_PC_ALLOC, - S_030980_OVERSUB_EN(1) | - S_030980_NUM_PC_LINES(sscreen->info.pc_lines / 4 - 1)); } if (sctx->chip_class >= GFX8) { diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 8769fd7c270..897c0adb922 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -355,6 +355,8 @@ enum si_tracked_reg { SI_TRACKED_VGT_TF_PARAM, SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, + SI_TRACKED_GE_PC_ALLOC, + SI_NUM_TRACKED_REGS, }; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 6b2b9e15890..e5ae110fa82 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -963,6 +963,29 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) } } +static void gfx10_emit_ge_pc_alloc(struct si_context *sctx, unsigned value) +{ + enum si_tracked_reg reg = SI_TRACKED_GE_PC_ALLOC; + + if (((sctx->tracked_regs.reg_saved >> reg) & 0x1) != 0x1 || + sctx->tracked_regs.reg_value[reg] != value) { + struct radeon_cmdbuf *cs = sctx->gfx_cs; + + if (sctx->family == CHIP_NAVI10 || + sctx->family == CHIP_NAVI12 || + sctx->family == CHIP_NAVI14) { + /* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */ + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0)); + } + + radeon_set_uconfig_reg(cs, R_030980_GE_PC_ALLOC, value); + + sctx->tracked_regs.reg_saved |= 0x1ull << reg; + sctx->tracked_regs.reg_value[reg] = value; + } +} + /* Common tail code for NGG primitive shaders. */ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader *shader, @@ -1007,6 +1030,9 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, if (initial_cdw != sctx->gfx_cs->current.cdw) sctx->context_roll = true; + + /* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */ + gfx10_emit_ge_pc_alloc(sctx, shader->ctx_reg.ngg.ge_pc_alloc); } static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx) @@ -1246,6 +1272,8 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader shader->ctx_reg.ngg.pa_cl_ngg_cntl = S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX); shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true); + shader->ctx_reg.ngg.ge_pc_alloc = S_030980_OVERSUB_EN(1) | + S_030980_NUM_PC_LINES(sscreen->info.pc_lines / 4 - 1); shader->ge_cntl = S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) | @@ -1344,6 +1372,10 @@ static void si_emit_shader_vs(struct si_context *sctx) if (initial_cdw != sctx->gfx_cs->current.cdw) sctx->context_roll = true; + + /* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */ + if (sctx->chip_class >= GFX10) + gfx10_emit_ge_pc_alloc(sctx, shader->ctx_reg.vs.ge_pc_alloc); } /** @@ -1440,6 +1472,8 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE); + shader->ctx_reg.vs.ge_pc_alloc = S_030980_OVERSUB_EN(1) | + S_030980_NUM_PC_LINES(sscreen->info.pc_lines / 4 - 1); shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, false); oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0; -- 2.30.2