features->timelineSemaphore = true;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
+ VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
+ (VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
+ features->subgroupSizeControl = true;
+ features->computeFullSubgroups = true;
+ break;
+ }
default:
break;
}
props->maxTimelineSemaphoreValueDifference = UINT64_MAX;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
+ VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
+ (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
+ props->minSubgroupSize = 64;
+ props->maxSubgroupSize = 64;
+ props->maxComputeWorkgroupSubgroups = UINT32_MAX;
+ props->requiredSubgroupSizeStages = 0;
+
+ if (pdevice->rad_info.chip_class >= GFX10) {
+ /* Only GFX10+ supports wave32. */
+ props->minSubgroupSize = 32;
+ props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
+ }
+ break;
+ }
default:
break;
}
Extension('VK_EXT_shader_stencil_export', 1, True),
Extension('VK_EXT_shader_subgroup_ballot', 1, True),
Extension('VK_EXT_shader_subgroup_vote', 1, True),
+ Extension('VK_EXT_subgroup_size_control', 2, '!device->use_aco'),
Extension('VK_EXT_texel_buffer_alignment', 1, True),
Extension('VK_EXT_transform_feedback', 1, True),
Extension('VK_EXT_vertex_attribute_divisor', 3, True),
keys[MESA_SHADER_FRAGMENT].fs.is_int10 = key->is_int10;
keys[MESA_SHADER_FRAGMENT].fs.log2_ps_iter_samples = key->log2_ps_iter_samples;
keys[MESA_SHADER_FRAGMENT].fs.num_samples = key->num_samples;
+
+ if (nir[MESA_SHADER_COMPUTE]) {
+ keys[MESA_SHADER_COMPUTE].cs.subgroup_size = key->compute_subgroup_size;
+ }
}
static uint8_t
radv_get_wave_size(struct radv_device *device,
+ const VkPipelineShaderStageCreateInfo *pStage,
gl_shader_stage stage,
const struct radv_shader_variant_key *key)
{
if (stage == MESA_SHADER_GEOMETRY && !key->vs_common_out.as_ngg)
return 64;
- else if (stage == MESA_SHADER_COMPUTE)
+ else if (stage == MESA_SHADER_COMPUTE) {
+ if (key->cs.subgroup_size) {
+ /* Return the required subgroup size if specified. */
+ return key->cs.subgroup_size;
+ }
return device->physical_device->cs_wave_size;
+ }
else if (stage == MESA_SHADER_FRAGMENT)
return device->physical_device->ps_wave_size;
else
static void
radv_fill_shader_info(struct radv_pipeline *pipeline,
+ const VkPipelineShaderStageCreateInfo **pStages,
struct radv_shader_variant_key *keys,
struct radv_shader_info *infos,
nir_shader **nir)
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
if (nir[i])
infos[i].wave_size =
- radv_get_wave_size(pipeline->device, i, &keys[i]);
+ radv_get_wave_size(pipeline->device, pStages[i],
+ i, &keys[i]);
}
}
radv_fill_shader_keys(device, keys, key, nir);
- radv_fill_shader_info(pipeline, keys, infos, nir);
+ radv_fill_shader_info(pipeline, pStages, keys, infos, nir);
if ((nir[MESA_SHADER_VERTEX] &&
keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg) ||
radv_generate_compute_pipeline_key(struct radv_pipeline *pipeline,
const VkComputePipelineCreateInfo *pCreateInfo)
{
+ const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->stage;
struct radv_pipeline_key key;
memset(&key, 0, sizeof(key));
if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
key.optimisations_disabled = 1;
+ const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *subgroup_size =
+ vk_find_struct_const(stage->pNext,
+ PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
+
+ if (subgroup_size) {
+ assert(subgroup_size->requiredSubgroupSize == 32 ||
+ subgroup_size->requiredSubgroupSize == 64);
+ key.compute_subgroup_size = subgroup_size->requiredSubgroupSize;
+ }
+
return key;
}