shader_variant_flags |= 1 << 0;
if (sel->nir)
shader_variant_flags |= 1 << 1;
- if (si_get_wave_size(sel->screen, sel->type, ngg, es, false) == 32)
+ if (si_get_wave_size(sel->screen, sel->type, ngg, es, false, false) == 32)
shader_variant_flags |= 1 << 2;
if (sel->type == PIPE_SHADER_FRAGMENT && sel->info.uses_derivatives && sel->info.uses_kill &&
sel->screen->debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL))
else
gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
+ unsigned wave_size = si_get_shader_wave_size(shader);
+
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
si_pm4_set_reg(
pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
- S_00B228_VGPRS((shader->config.num_vgprs - 1) / (sscreen->ge_wave_size == 32 ? 8 : 4)) |
+ S_00B228_VGPRS((shader->config.num_vgprs - 1) / (wave_size == 32 ? 8 : 4)) |
S_00B228_FLOAT_MODE(shader->config.float_mode) | S_00B228_DX10_CLAMP(1) |
S_00B228_MEM_ORDERED(1) | S_00B228_WGP_MODE(1) |
S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt));
if (screen->info.chip_class >= GFX9)
stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
- if (screen->info.chip_class >= GFX10 && screen->ge_wave_size == 32) {
+ if (screen->info.chip_class >= GFX10 &&
+ /* GS fast launch hangs with Wave64, so always use Wave32. */
+ (screen->ge_wave_size == 32 || (key.u.ngg && key.u.ngg_gs_fast_launch))) {
stages |= S_028B54_HS_W32_EN(1) |
S_028B54_GS_W32_EN(key.u.ngg) | /* legacy GS only supports Wave64 */
S_028B54_VS_W32_EN(1);