From 43d7305a405c82e81c9b7b3cc4958169b13777bb Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Tue, 12 Apr 2016 20:28:46 +0200 Subject: [PATCH] radeonsi: Allow TES distribution between shader engines. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The R_028B50_VGT_TESS_DISTRIBUTION value is copied from amdgpu-pro. Smaller values in the ACCUM fields seem to decrease the performance advantage from this patch, higher values don't seem to matter. v2: Add distribution mode field enums. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Nicolai Hähnle Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_state.c | 5 +++ src/gallium/drivers/radeonsi/si_state_draw.c | 8 ++++ .../drivers/radeonsi/si_state_shaders.c | 39 ++++++++++++------- src/gallium/drivers/radeonsi/sid.h | 3 ++ 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index aefa336c50f..ab321ef0b8f 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3791,6 +3791,11 @@ static void si_init_config(struct si_context *sctx) S_028424_OVERWRITE_COMBINER_WATERMARK(4)); si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); + si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, + S_028B50_ACCUM_ISOLINE(32) | + S_028B50_ACCUM_TRI(11) | + S_028B50_ACCUM_QUAD(11) | + S_028B50_DONUT_SPLIT(16)); } if (sctx->b.family == CHIP_STONEY) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index c8b87a9f1a0..788869e5a91 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -279,6 +279,14 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, sctx->b.family == CHIP_BONAIRE) && sctx->gs_shader.cso) partial_vs_wave = true; + + /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ + if (sctx->b.chip_class >= VI) { + if (sctx->gs_shader.cso) + partial_es_wave = true; + else + partial_vs_wave = true; + } } /* This is a hardware requirement. */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 116bf27b471..c6f51ea84d5 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -249,7 +249,8 @@ void si_destroy_shader_cache(struct si_screen *sscreen) /* SHADER STATES */ -static void si_set_tesseval_regs(struct si_shader *shader, +static void si_set_tesseval_regs(struct si_screen *sscreen, + struct si_shader *shader, struct si_pm4_state *pm4) { struct tgsi_shader_info *info = &shader->selector->info; @@ -257,7 +258,7 @@ static void si_set_tesseval_regs(struct si_shader *shader, unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING]; bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW]; bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE]; - unsigned type, partitioning, topology; + unsigned type, partitioning, topology, distribution_mode; switch (tes_prim_mode) { case PIPE_PRIM_LINES: @@ -299,10 +300,16 @@ static void si_set_tesseval_regs(struct si_shader *shader, else topology = V_028B6C_OUTPUT_TRIANGLE_CW; + if (sscreen->b.chip_class >= VI) + distribution_mode = V_028B6C_DISTRIBUTION_MODE_DONUTS; + else + distribution_mode = V_028B6C_DISTRIBUTION_MODE_NO_DIST; + si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM, S_028B6C_TYPE(type) | S_028B6C_PARTITIONING(partitioning) | - S_028B6C_TOPOLOGY(topology)); + S_028B6C_TOPOLOGY(topology) | + S_028B6C_DISTRIBUTION_MODE(distribution_mode)); } static void si_shader_ls(struct si_shader *shader) @@ -359,7 +366,7 @@ static void si_shader_hs(struct si_shader *shader) S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); } -static void si_shader_es(struct si_shader *shader) +static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) { struct si_pm4_state *pm4; unsigned num_user_sgprs; @@ -402,7 +409,7 @@ static void si_shader_es(struct si_shader *shader) S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); if (shader->selector->type == PIPE_SHADER_TESS_EVAL) - si_set_tesseval_regs(shader, pm4); + si_set_tesseval_regs(sscreen, shader, pm4); } /** @@ -489,7 +496,8 @@ static void si_shader_gs(struct si_shader *shader) * If \p gs is non-NULL, it points to the geometry shader for which this shader * is the copy shader. */ -static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) +static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, + struct si_shader *gs) { struct si_pm4_state *pm4; unsigned num_user_sgprs; @@ -583,7 +591,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); if (shader->selector->type == PIPE_SHADER_TESS_EVAL) - si_set_tesseval_regs(shader, pm4); + si_set_tesseval_regs(sscreen, shader, pm4); } static unsigned si_get_ps_num_interp(struct si_shader *ps) @@ -769,7 +777,8 @@ static void si_shader_ps(struct si_shader *shader) shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z; } -static void si_shader_init_pm4_state(struct si_shader *shader) +static void si_shader_init_pm4_state(struct si_screen *sscreen, + struct si_shader *shader) { if (shader->pm4) @@ -780,22 +789,22 @@ static void si_shader_init_pm4_state(struct si_shader *shader) if (shader->key.vs.as_ls) si_shader_ls(shader); else if (shader->key.vs.as_es) - si_shader_es(shader); + si_shader_es(sscreen, shader); else - si_shader_vs(shader, NULL); + si_shader_vs(sscreen, shader, NULL); break; case PIPE_SHADER_TESS_CTRL: si_shader_hs(shader); break; case PIPE_SHADER_TESS_EVAL: if (shader->key.tes.as_es) - si_shader_es(shader); + si_shader_es(sscreen, shader); else - si_shader_vs(shader, NULL); + si_shader_vs(sscreen, shader, NULL); break; case PIPE_SHADER_GEOMETRY: si_shader_gs(shader); - si_shader_vs(shader->gs_copy_shader, shader); + si_shader_vs(sscreen, shader->gs_copy_shader, shader); break; case PIPE_SHADER_FRAGMENT: si_shader_ps(shader); @@ -989,7 +998,7 @@ static int si_shader_select_with_key(struct pipe_context *ctx, pipe_mutex_unlock(sel->mutex); return r; } - si_shader_init_pm4_state(shader); + si_shader_init_pm4_state(sctx->screen, shader); if (!sel->last_variant) { sel->first_variant = shader; @@ -1664,7 +1673,7 @@ static int si_update_scratch_buffer(struct si_context *sctx, return r; /* Update the shader state to use the new shader bo. */ - si_shader_init_pm4_state(shader); + si_shader_init_pm4_state(sctx->screen, shader); r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer); diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index dcd63727058..25f8cf5cee4 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -8074,6 +8074,9 @@ #define S_028B6C_DISTRIBUTION_MODE(x) (((unsigned)(x) & 0x03) << 17) #define G_028B6C_DISTRIBUTION_MODE(x) (((x) >> 17) & 0x03) #define C_028B6C_DISTRIBUTION_MODE 0xFFF9FFFF +#define V_028B6C_DISTRIBUTION_MODE_NO_DIST 0x00 +#define V_028B6C_DISTRIBUTION_MODE_PATCHES 0x01 +#define V_028B6C_DISTRIBUTION_MODE_DONUTS 0x02 #define S_028B6C_MTYPE(x) (((unsigned)(x) & 0x03) << 19) #define G_028B6C_MTYPE(x) (((x) >> 19) & 0x03) #define C_028B6C_MTYPE 0xFFE7FFFF -- 2.30.2