/* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
if (ctx->options->chip_class == GFX6) {
- unsigned one_wave = ctx->options->ge_wave_size / MAX2(num_tcs_input_cp, num_tcs_output_cp);
+ unsigned one_wave = ctx->options->wave_size / MAX2(num_tcs_input_cp, num_tcs_output_cp);
num_patches = MIN2(num_patches, one_wave);
}
return num_patches;
LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
}
-static uint8_t
-radv_nir_shader_wave_size(struct nir_shader *const *shaders, int shader_count,
- const struct radv_nir_compiler_options *options)
-{
- if (shaders[0]->info.stage == MESA_SHADER_COMPUTE)
- return options->cs_wave_size;
- else if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT)
- return options->ps_wave_size;
- return options->ge_wave_size;
-}
-
static
LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
struct nir_shader *const *shaders,
options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
AC_FLOAT_MODE_DEFAULT;
- uint8_t wave_size = radv_nir_shader_wave_size(shaders,
- shader_count, options);
-
ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class,
- options->family, float_mode, wave_size);
+ options->family, float_mode, options->wave_size);
ctx.context = ctx.ac.context;
radv_nir_shader_info_init(&shader_info->info);
shader_info->gs.es_type = nir[0]->info.stage;
}
}
+ shader_info->info.wave_size = options->wave_size;
}
static void
radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
S_0286D8_NUM_INTERP(ps->info.fs.num_interp) |
- S_0286D8_PS_W32_EN(pipeline->device->physical_device->ps_wave_size == 32));
+ S_0286D8_PS_W32_EN(ps->info.info.wave_size == 32));
radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
- pipeline->device->physical_device->ge_wave_size == 32) {
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+ uint8_t hs_size = 64, gs_size = 64, vs_size = 64;
+
+ if (radv_pipeline_has_tess(pipeline))
+ hs_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.info.wave_size;
+
+ if (pipeline->shaders[MESA_SHADER_GEOMETRY]) {
+ vs_size = gs_size = pipeline->shaders[MESA_SHADER_GEOMETRY]->info.info.wave_size;
+ if (pipeline->gs_copy_shader)
+ vs_size = pipeline->gs_copy_shader->info.info.wave_size;
+ } else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
+ vs_size = pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.info.wave_size;
+ else if (pipeline->shaders[MESA_SHADER_VERTEX])
+ vs_size = pipeline->shaders[MESA_SHADER_VERTEX]->info.info.wave_size;
+
+ if (radv_pipeline_has_ngg(pipeline))
+ gs_size = vs_size;
+
/* legacy GS only supports Wave64 */
- stages |= S_028B54_HS_W32_EN(1) |
- S_028B54_GS_W32_EN(radv_pipeline_has_ngg(pipeline)) |
- S_028B54_VS_W32_EN(1);
+ stages |= S_028B54_HS_W32_EN(hs_size == 32 ? 1 : 0) |
+ S_028B54_GS_W32_EN(gs_size == 32 ? 1 : 0) |
+ S_028B54_VS_W32_EN(vs_size == 32 ? 1 : 0);
}
return stages;
return code_size + DEBUGGER_NUM_MARKERS * 4;
}
-static uint8_t
-radv_get_shader_wave_size(const struct radv_physical_device *pdevice,
- gl_shader_stage stage)
-{
- if (stage == MESA_SHADER_COMPUTE)
- return pdevice->cs_wave_size;
- else if (stage == MESA_SHADER_FRAGMENT)
- return pdevice->ps_wave_size;
- return pdevice->ge_wave_size;
-}
-
static void radv_postprocess_config(const struct radv_physical_device *pdevice,
const struct ac_shader_config *config_in,
const struct radv_shader_variant_info *info,
struct ac_shader_config *config_out)
{
bool scratch_enabled = config_in->scratch_bytes_per_wave > 0;
- uint8_t wave_size = radv_get_shader_wave_size(pdevice, stage);
unsigned vgpr_comp_cnt = 0;
unsigned num_input_vgprs = info->num_input_vgprs;
S_00B12C_SO_EN(!!info->info.so.num_outputs);
config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) /
- (wave_size == 32 ? 8 : 4)) |
+ (info->info.wave_size == 32 ? 8 : 4)) |
S_00B848_DX10_CLAMP(1) |
S_00B848_FLOAT_MODE(config_out->float_mode);
sym->size -= 32;
}
- uint8_t wave_size =
- radv_get_shader_wave_size(device->physical_device,
- binary->stage);
-
struct ac_rtld_open_info open_info = {
.info = &device->physical_device->rad_info,
.shader_type = binary->stage,
- .wave_size = wave_size,
+ .wave_size = binary->variant_info.info.wave_size,
.num_parts = 1,
.elf_ptrs = &elf_data,
.elf_sizes = &elf_size,
options->check_ir = device->instance->debug_flags & RADV_DEBUG_CHECKIR;
options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
options->address32_hi = device->physical_device->rad_info.address32_hi;
- options->cs_wave_size = device->physical_device->cs_wave_size;
- options->ps_wave_size = device->physical_device->ps_wave_size;
- options->ge_wave_size = device->physical_device->ge_wave_size;
+
+ if (stage == MESA_SHADER_COMPUTE)
+ options->wave_size = device->physical_device->cs_wave_size;
+ else if (stage == MESA_SHADER_FRAGMENT)
+ options->wave_size = device->physical_device->ps_wave_size;
+ else
+ options->wave_size = device->physical_device->ge_wave_size;
if (options->supports_spill)
tm_options |= AC_TM_SUPPORTS_SPILL;
radv_init_llvm_compiler(&ac_llvm,
thread_compiler,
chip_family, tm_options,
- radv_get_shader_wave_size(device->physical_device, stage));
+ options->wave_size);
if (gs_copy_shader) {
assert(shader_count == 1);
radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary,
{
enum chip_class chip_class = device->physical_device->rad_info.chip_class;
unsigned lds_increment = chip_class >= GFX7 ? 512 : 256;
- uint8_t wave_size = radv_get_shader_wave_size(device->physical_device, stage);
+ uint8_t wave_size = variant->info.info.wave_size;
struct ac_shader_config *conf;
unsigned max_simd_waves;
unsigned lds_per_wave = 0;
enum chip_class chip_class;
uint32_t tess_offchip_block_dw_size;
uint32_t address32_hi;
- uint8_t cs_wave_size;
- uint8_t ps_wave_size;
- uint8_t ge_wave_size;
+ uint8_t wave_size;
};
enum radv_ud_index {
bool needs_multiview_view_index;
bool uses_invocation_id;
bool uses_prim_id;
+ uint8_t wave_size;
struct {
uint64_t ls_outputs_written;
uint8_t input_usage_mask[VERT_ATTRIB_MAX];