vk_free2(&device->alloc, pAllocator, module);
}
+
+static void
+radv_pipeline_destroy(struct radv_device *device,
+ struct radv_pipeline *pipeline,
+ const VkAllocationCallbacks* allocator)
+{
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i)
+ if (pipeline->shaders[i])
+ radv_shader_variant_destroy(device, pipeline->shaders[i]);
+
+ vk_free2(&device->alloc, allocator, pipeline);
+}
+
void radv_DestroyPipeline(
VkDevice _device,
VkPipeline _pipeline,
if (!_pipeline)
return;
- for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i)
- if (pipeline->shaders[i])
- radv_shader_variant_destroy(device, pipeline->shaders[i]);
-
- vk_free2(&device->alloc, pAllocator, pipeline);
+ radv_pipeline_destroy(device, pipeline, pAllocator);
}
assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
spec_entries[i].id = spec_info->pMapEntries[i].constantID;
- spec_entries[i].data = *(const uint32_t *)data;
+ if (spec_info->dataSize == 8)
+ spec_entries[i].data64 = *(const uint64_t *)data;
+ else
+ spec_entries[i].data32 = *(const uint32_t *)data;
}
}
const struct nir_spirv_supported_extensions supported_ext = {
free(spec_entries);
- nir_lower_returns(nir);
- nir_validate_shader(nir);
-
- nir_inline_functions(nir);
- nir_validate_shader(nir);
+ /* We have to lower away local constant initializers right before we
+ * inline functions. That way they get properly initialized at the top
+ * of the function and not at the top of its caller.
+ */
+ NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
+ NIR_PASS_V(nir, nir_lower_returns);
+ NIR_PASS_V(nir, nir_inline_functions);
/* Pick off the single entrypoint that we want */
foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
assert(exec_list_length(&nir->functions) == 1);
entry_point->name = ralloc_strdup(entry_point, "main");
- nir_remove_dead_variables(nir, nir_var_shader_in);
- nir_remove_dead_variables(nir, nir_var_shader_out);
- nir_remove_dead_variables(nir, nir_var_system_value);
- nir_validate_shader(nir);
+ NIR_PASS_V(nir, nir_remove_dead_variables,
+ nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
- nir_lower_system_values(nir);
- nir_validate_shader(nir);
+ /* Now that we've deleted all but the main function, we can go ahead and
+ * lower the rest of the constant initializers.
+ */
+ NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
+ NIR_PASS_V(nir, nir_lower_system_values);
}
/* Vulkan uses the separate-shader linking model */
}
static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pipeline *pipeline)
{
- unsigned lds_increment = device->instance->physicalDevice.rad_info.chip_class >= CIK ? 512 : 256;
+ unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
struct radv_shader_variant *var;
struct ac_shader_config *conf;
int i;
}
if (conf->num_sgprs) {
- if (device->instance->physicalDevice.rad_info.chip_class >= VI)
+ if (device->physical_device->rad_info.chip_class >= VI)
max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
else
max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
bool dump)
{
struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
- enum radeon_family chip_family = device->instance->physicalDevice.rad_info.family;
+ enum radeon_family chip_family = device->physical_device->rad_info.family;
LLVMTargetMachineRef tm;
if (!variant)
return NULL;
struct ac_shader_binary binary;
- options.unsafe_math = env_var_as_boolean("RADV_UNSAFE_MATH", false);
+ options.unsafe_math = !!(device->debug_flags & RADV_DEBUG_UNSAFE_MATH);
options.family = chip_family;
- options.chip_class = device->instance->physicalDevice.rad_info.chip_class;
- tm = ac_create_target_machine(chip_family);
+ options.chip_class = device->physical_device->rad_info.chip_class;
+ options.supports_spill = false;
+ tm = ac_create_target_machine(chip_family, false);
ac_compile_nir_shader(tm, &binary, &variant->config,
&variant->info, shader, &options, dump);
LLVMDisposeTargetMachine(tm);
gl_shader_stage stage,
const VkSpecializationInfo *spec_info,
struct radv_pipeline_layout *layout,
- const union ac_shader_variant_key *key,
- bool dump)
+ const union ac_shader_variant_key *key)
{
unsigned char sha1[20];
struct radv_shader_variant *variant;
nir_shader *nir;
void *code = NULL;
unsigned code_size = 0;
+ bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS);
if (module->nir)
_mesa_sha1_compute(module->nir->info->name,
return variant;
}
+static VkResult
+radv_pipeline_scratch_init(struct radv_device *device,
+ struct radv_pipeline *pipeline)
+{
+ unsigned scratch_bytes_per_wave = 0;
+ unsigned max_waves = 0;
+ unsigned min_waves = 1;
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (pipeline->shaders[i]) {
+ unsigned max_stage_waves = device->scratch_waves;
+
+ scratch_bytes_per_wave = MAX2(scratch_bytes_per_wave,
+ pipeline->shaders[i]->config.scratch_bytes_per_wave);
+
+ max_stage_waves = MIN2(max_stage_waves,
+ 4 * device->physical_device->rad_info.num_good_compute_units *
+ (256 / pipeline->shaders[i]->config.num_vgprs));
+ max_waves = MAX2(max_waves, max_stage_waves);
+ }
+ }
+
+ if (pipeline->shaders[MESA_SHADER_COMPUTE]) {
+ unsigned group_size = pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[0] *
+ pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[1] *
+ pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[2];
+ min_waves = MAX2(min_waves, round_up_u32(group_size, 64));
+ }
+
+ if (scratch_bytes_per_wave)
+ max_waves = MIN2(max_waves, 0xffffffffu / scratch_bytes_per_wave);
+
+ if (scratch_bytes_per_wave && max_waves < min_waves) {
+ /* Not really true at this moment, but will be true on first
+ * execution. Avoid having hanging shaders. */
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ }
+ pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave;
+ pipeline->max_waves = max_waves;
+ return VK_SUCCESS;
+}
+
static uint32_t si_translate_blend_function(VkBlendOp op)
{
switch (op) {
const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState;
struct radv_blend_state *blend = &pipeline->graphics.blend;
struct radv_multisample_state *ms = &pipeline->graphics.ms;
- unsigned num_tile_pipes = pipeline->device->instance->physicalDevice.rad_info.num_tile_pipes;
+ unsigned num_tile_pipes = pipeline->device->physical_device->rad_info.num_tile_pipes;
int ps_iter_samples = 1;
uint32_t mask = 0xffff;
const VkAllocationCallbacks *alloc)
{
struct radv_shader_module fs_m = {0};
+ VkResult result;
- bool dump = getenv("RADV_DUMP_SHADERS");
if (alloc == NULL)
alloc = &device->alloc;
pStages[MESA_SHADER_VERTEX]->pName,
MESA_SHADER_VERTEX,
pStages[MESA_SHADER_VERTEX]->pSpecializationInfo,
- pipeline->layout, &key, dump);
+ pipeline->layout, &key);
pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_VERTEX);
}
stage ? stage->pName : "main",
MESA_SHADER_FRAGMENT,
stage ? stage->pSpecializationInfo : NULL,
- pipeline->layout, &key, dump);
+ pipeline->layout, &key);
pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_FRAGMENT);
}
pipeline->binding_stride[desc->binding] = desc->stride;
}
- if (device->shader_stats_dump) {
+ if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
radv_dump_pipeline_stats(device, pipeline);
}
- return VK_SUCCESS;
+ result = radv_pipeline_scratch_init(device, pipeline);
+ return result;
}
VkResult
result = radv_pipeline_init(pipeline, device, cache,
pCreateInfo, extra, pAllocator);
if (result != VK_SUCCESS) {
- vk_free2(&device->alloc, pAllocator, pipeline);
+ radv_pipeline_destroy(device, pipeline, pAllocator);
return result;
}
RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
RADV_FROM_HANDLE(radv_shader_module, module, pCreateInfo->stage.module);
struct radv_pipeline *pipeline;
- bool dump = getenv("RADV_DUMP_SHADERS");
+ VkResult result;
pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
pCreateInfo->stage.pName,
MESA_SHADER_COMPUTE,
pCreateInfo->stage.pSpecializationInfo,
- pipeline->layout, NULL, dump);
+ pipeline->layout, NULL);
+
+
+ result = radv_pipeline_scratch_init(device, pipeline);
+ if (result != VK_SUCCESS) {
+ radv_pipeline_destroy(device, pipeline, pAllocator);
+ return result;
+ }
*pPipeline = radv_pipeline_to_handle(pipeline);
- if (device->shader_stats_dump) {
+ if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
radv_dump_pipeline_stats(device, pipeline);
}
return VK_SUCCESS;