for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
const VkPipelineShaderStageCreateInfo *stage = pStages[i];
+ unsigned subgroup_size = 64;
if (!modules[i])
continue;
radv_start_feedback(stage_feedbacks[i]);
+ if (key->compute_subgroup_size) {
+ /* Only GFX10+ and compute shaders currently support
+ * requiring a specific subgroup size.
+ */
+ assert(device->physical_device->rad_info.chip_class >= GFX10 &&
+ i == MESA_SHADER_COMPUTE);
+ subgroup_size = key->compute_subgroup_size;
+ }
+
nir[i] = radv_shader_compile_to_nir(device, modules[i],
stage ? stage->pName : "main", i,
stage ? stage->pSpecializationInfo : NULL,
- flags, pipeline->layout);
+ flags, pipeline->layout,
+ subgroup_size);
/* We don't want to alter meta shaders IR directly so clone it
* first.
gl_shader_stage stage,
const VkSpecializationInfo *spec_info,
const VkPipelineCreateFlags flags,
- const struct radv_pipeline_layout *layout)
+ const struct radv_pipeline_layout *layout,
+ unsigned subgroup_size)
{
nir_shader *nir;
const nir_shader_compiler_options *nir_options =
nir_remove_dead_variables(nir, nir_var_function_temp);
bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
- .subgroup_size = 64,
+ .subgroup_size = subgroup_size,
.ballot_bit_size = 64,
.lower_to_scalar = 1,
.lower_subgroup_masks = 1,
gl_shader_stage stage,
const VkSpecializationInfo *spec_info,
const VkPipelineCreateFlags flags,
- const struct radv_pipeline_layout *layout);
+ const struct radv_pipeline_layout *layout,
+ unsigned subgroup_size);
void *
radv_alloc_shader_memory(struct radv_device *device,