static inline unsigned si_get_wave_size(struct si_screen *sscreen,
enum pipe_shader_type shader_type,
- bool ngg, bool es)
+ bool ngg, bool es, bool prim_discard_cs)
{
if (shader_type == PIPE_SHADER_COMPUTE)
return sscreen->compute_wave_size;
else if (shader_type == PIPE_SHADER_FRAGMENT)
return sscreen->ps_wave_size;
- else if ((shader_type == PIPE_SHADER_VERTEX && es && !ngg) ||
+ else if ((shader_type == PIPE_SHADER_VERTEX && prim_discard_cs) || /* only Wave64 implemented */
+ (shader_type == PIPE_SHADER_VERTEX && es && !ngg) ||
(shader_type == PIPE_SHADER_TESS_EVAL && es && !ngg) ||
(shader_type == PIPE_SHADER_GEOMETRY && !ngg)) /* legacy GS only supports Wave64 */
return 64;
static inline unsigned si_get_shader_wave_size(struct si_shader *shader)
{
return si_get_wave_size(shader->selector->screen, shader->selector->type,
- shader->key.as_ngg, shader->key.as_es);
+ shader->key.as_ngg, shader->key.as_es,
+ shader->key.opt.vs_as_prim_discard_cs);
}
#define PRINT_ERR(fmt, args...) \
key->vs_prolog.as_ls = shader_out->key.as_ls;
key->vs_prolog.as_es = shader_out->key.as_es;
key->vs_prolog.as_ngg = shader_out->key.as_ngg;
+ key->vs_prolog.as_prim_discard_cs = shader_out->key.opt.vs_as_prim_discard_cs;
if (ngg_cull_shader) {
key->vs_prolog.gs_fast_launch_tri_list = !!(shader_out->key.opt.ngg_culling &
key->vs_prolog.num_merged_next_stage_vgprs = 5;
}
+ /* Only one of these combinations can be set. as_ngg can be set with as_es. */
+ assert(key->vs_prolog.as_ls +
+ key->vs_prolog.as_ngg +
+ (key->vs_prolog.as_es && !key->vs_prolog.as_ngg) +
+ key->vs_prolog.as_prim_discard_cs <= 1);
+
/* Enable loading the InstanceID VGPR. */
uint16_t input_mask = u_bit_consecutive(0, info->num_inputs);
shader.key.as_ls = key->vs_prolog.as_ls;
shader.key.as_es = key->vs_prolog.as_es;
shader.key.as_ngg = key->vs_prolog.as_ngg;
+ shader.key.opt.vs_as_prim_discard_cs = key->vs_prolog.as_prim_discard_cs;
break;
case PIPE_SHADER_TESS_CTRL:
assert(!prolog);
struct si_shader_context ctx;
si_llvm_context_init(&ctx, sscreen, compiler,
si_get_wave_size(sscreen, type, shader.key.as_ngg,
- shader.key.as_es));
+ shader.key.as_es,
+ shader.key.opt.vs_as_prim_discard_cs));
ctx.shader = &shader;
ctx.type = type;
unsigned as_ls:1;
unsigned as_es:1;
unsigned as_ngg:1;
+ unsigned as_prim_discard_cs:1;
unsigned has_ngg_cull_inputs:1; /* from the NGG cull shader */
unsigned gs_fast_launch_tri_list:1; /* for NGG culling */
unsigned gs_fast_launch_tri_strip:1; /* for NGG culling */
shader->is_gs_copy_shader = true;
si_llvm_context_init(&ctx, sscreen, compiler,
- si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false));
+ si_get_wave_size(sscreen, PIPE_SHADER_VERTEX,
+ false, false, false));
ctx.shader = shader;
ctx.type = PIPE_SHADER_VERTEX;
shader_variant_flags |= 1 << 0;
if (sel->nir)
shader_variant_flags |= 1 << 1;
- if (si_get_wave_size(sel->screen, sel->type, ngg, es) == 32)
+ if (si_get_wave_size(sel->screen, sel->type, ngg, es, false) == 32)
shader_variant_flags |= 1 << 2;
if (sel->type == PIPE_SHADER_FRAGMENT &&
sel->info.uses_derivatives &&