From 4ef1c8d60bd5f7ee2d8bc7e878d293256b921008 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 20 Mar 2020 17:35:56 -0400 Subject: [PATCH] radeonsi/gfx10: fix the wave size for compute-based culling Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_pipe.h | 8 +++++--- src/gallium/drivers/radeonsi/si_shader.c | 11 ++++++++++- src/gallium/drivers/radeonsi/si_shader.h | 1 + src/gallium/drivers/radeonsi/si_shader_llvm_gs.c | 3 ++- src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +- 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 41c5bd45036..400f2152243 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1967,13 +1967,14 @@ static inline bool si_compute_prim_discard_enabled(struct si_context *sctx) static inline unsigned si_get_wave_size(struct si_screen *sscreen, enum pipe_shader_type shader_type, - bool ngg, bool es) + bool ngg, bool es, bool prim_discard_cs) { if (shader_type == PIPE_SHADER_COMPUTE) return sscreen->compute_wave_size; else if (shader_type == PIPE_SHADER_FRAGMENT) return sscreen->ps_wave_size; - else if ((shader_type == PIPE_SHADER_VERTEX && es && !ngg) || + else if ((shader_type == PIPE_SHADER_VERTEX && prim_discard_cs) || /* only Wave64 implemented */ + (shader_type == PIPE_SHADER_VERTEX && es && !ngg) || (shader_type == PIPE_SHADER_TESS_EVAL && es && !ngg) || (shader_type == PIPE_SHADER_GEOMETRY && !ngg)) /* legacy GS only supports Wave64 */ return 64; @@ -1984,7 +1985,8 @@ static inline unsigned si_get_wave_size(struct si_screen *sscreen, static inline unsigned si_get_shader_wave_size(struct si_shader *shader) { return si_get_wave_size(shader->selector->screen, shader->selector->type, - shader->key.as_ngg, shader->key.as_es); + shader->key.as_ngg, shader->key.as_es, + shader->key.opt.vs_as_prim_discard_cs); } #define PRINT_ERR(fmt, args...) \ diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 8c8f90b80e2..f0e60087dbf 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1636,6 +1636,7 @@ static void si_get_vs_prolog_key(const struct si_shader_info *info, key->vs_prolog.as_ls = shader_out->key.as_ls; key->vs_prolog.as_es = shader_out->key.as_es; key->vs_prolog.as_ngg = shader_out->key.as_ngg; + key->vs_prolog.as_prim_discard_cs = shader_out->key.opt.vs_as_prim_discard_cs; if (ngg_cull_shader) { key->vs_prolog.gs_fast_launch_tri_list = !!(shader_out->key.opt.ngg_culling & @@ -1656,6 +1657,12 @@ static void si_get_vs_prolog_key(const struct si_shader_info *info, key->vs_prolog.num_merged_next_stage_vgprs = 5; } + /* Only one of these combinations can be set. as_ngg can be set with as_es. */ + assert(key->vs_prolog.as_ls + + key->vs_prolog.as_ngg + + (key->vs_prolog.as_es && !key->vs_prolog.as_ngg) + + key->vs_prolog.as_prim_discard_cs <= 1); + /* Enable loading the InstanceID VGPR. */ uint16_t input_mask = u_bit_consecutive(0, info->num_inputs); @@ -2088,6 +2095,7 @@ si_get_shader_part(struct si_screen *sscreen, shader.key.as_ls = key->vs_prolog.as_ls; shader.key.as_es = key->vs_prolog.as_es; shader.key.as_ngg = key->vs_prolog.as_ngg; + shader.key.opt.vs_as_prim_discard_cs = key->vs_prolog.as_prim_discard_cs; break; case PIPE_SHADER_TESS_CTRL: assert(!prolog); @@ -2110,7 +2118,8 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_context ctx; si_llvm_context_init(&ctx, sscreen, compiler, si_get_wave_size(sscreen, type, shader.key.as_ngg, - shader.key.as_es)); + shader.key.as_es, + shader.key.opt.vs_as_prim_discard_cs)); ctx.shader = &shader; ctx.type = type; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 1ee9aba9faf..ef571a5d684 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -566,6 +566,7 @@ union si_shader_part_key { unsigned as_ls:1; unsigned as_es:1; unsigned as_ngg:1; + unsigned as_prim_discard_cs:1; unsigned has_ngg_cull_inputs:1; /* from the NGG cull shader */ unsigned gs_fast_launch_tri_list:1; /* for NGG culling */ unsigned gs_fast_launch_tri_strip:1; /* for NGG culling */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index ac734d0924b..99ffdd2e980 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -523,7 +523,8 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, shader->is_gs_copy_shader = true; si_llvm_context_init(&ctx, sscreen, compiler, - si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false)); + si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, + false, false, false)); ctx.shader = shader; ctx.type = PIPE_SHADER_VERTEX; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index ce7e791be3d..be7cda1d332 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -71,7 +71,7 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es, shader_variant_flags |= 1 << 0; if (sel->nir) shader_variant_flags |= 1 << 1; - if (si_get_wave_size(sel->screen, sel->type, ngg, es) == 32) + if (si_get_wave_size(sel->screen, sel->type, ngg, es, false) == 32) shader_variant_flags |= 1 << 2; if (sel->type == PIPE_SHADER_FRAGMENT && sel->info.uses_derivatives && -- 2.30.2