ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class,
args->options->family, float_mode,
- args->shader_info->wave_size, 64);
+ args->shader_info->wave_size,
+ args->shader_info->ballot_bit_size);
ctx.context = ctx.ac.context;
ctx.max_workgroup_size = 0;
return device->physical_device->ge_wave_size;
}
+static uint8_t
+radv_get_ballot_bit_size(struct radv_device *device,
+ const VkPipelineShaderStageCreateInfo *pStage,
+ gl_shader_stage stage,
+ const struct radv_shader_variant_key *key)
+{
+ if (stage == MESA_SHADER_COMPUTE && key->cs.subgroup_size)
+ return key->cs.subgroup_size;
+ return 64;
+}
+
static void
radv_fill_shader_info(struct radv_pipeline *pipeline,
const VkPipelineShaderStageCreateInfo **pStages,
}
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
- if (nir[i])
+ if (nir[i]) {
infos[i].wave_size =
radv_get_wave_size(pipeline->device, pStages[i],
i, &keys[i]);
+ infos[i].ballot_bit_size =
+ radv_get_ballot_bit_size(pipeline->device,
+ pStages[i], i,
+ &keys[i]);
+ }
}
}
for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
const VkPipelineShaderStageCreateInfo *stage = pStages[i];
- unsigned subgroup_size = 64;
+ unsigned subgroup_size = 64, ballot_bit_size = 64;
if (!modules[i])
continue;
assert(device->physical_device->rad_info.chip_class >= GFX10 &&
i == MESA_SHADER_COMPUTE);
subgroup_size = key->compute_subgroup_size;
+ ballot_bit_size = key->compute_subgroup_size;
}
nir[i] = radv_shader_compile_to_nir(device, modules[i],
stage ? stage->pName : "main", i,
stage ? stage->pSpecializationInfo : NULL,
flags, pipeline->layout,
- subgroup_size);
+ subgroup_size, ballot_bit_size);
/* We don't want to alter meta shaders IR directly so clone it
* first.
pipeline->layout, &key,
&info);
info.wave_size = 64; /* Wave32 not supported. */
+ info.ballot_bit_size = 64;
pipeline->gs_copy_shader = radv_create_gs_copy_shader(
device, nir[MESA_SHADER_GEOMETRY], &info,
const VkSpecializationInfo *spec_info,
const VkPipelineCreateFlags flags,
const struct radv_pipeline_layout *layout,
- unsigned subgroup_size)
+ unsigned subgroup_size, unsigned ballot_bit_size)
{
nir_shader *nir;
const nir_shader_compiler_options *nir_options =
bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
.subgroup_size = subgroup_size,
- .ballot_bit_size = 64,
+ .ballot_bit_size = ballot_bit_size,
.lower_to_scalar = 1,
.lower_subgroup_masks = 1,
.lower_shuffle = 1,
bool uses_invocation_id;
bool uses_prim_id;
uint8_t wave_size;
+ uint8_t ballot_bit_size;
struct radv_userdata_locations user_sgprs_locs;
unsigned num_user_sgprs;
unsigned num_input_sgprs;
const VkSpecializationInfo *spec_info,
const VkPipelineCreateFlags flags,
const struct radv_pipeline_layout *layout,
- unsigned subgroup_size);
+ unsigned subgroup_size, unsigned ballot_bit_size);
void *
radv_alloc_shader_memory(struct radv_device *device,