#include <fcntl.h>
#include "util/mesa-sha1.h"
+#include "util/os_time.h"
#include "common/gen_l3_config.h"
#include "anv_private.h"
#include "compiler/brw_nir.h"
struct spirv_to_nir_options spirv_options = {
.lower_workgroup_access_to_offsets = true,
.caps = {
+ .derivative_group = true,
+ .descriptor_array_dynamic_indexing = true,
+ .descriptor_array_non_uniform_indexing = true,
+ .descriptor_indexing = true,
.device_group = true,
.draw_parameters = true,
+ .float16 = pdevice->info.gen >= 8,
.float64 = pdevice->info.gen >= 8,
.geometry_streams = true,
.image_write_without_format = true,
+ .int8 = pdevice->info.gen >= 8,
.int16 = pdevice->info.gen >= 8,
.int64 = pdevice->info.gen >= 8,
+ .int64_atomics = pdevice->info.gen >= 9 && pdevice->use_softpin,
.min_lod = true,
.multiview = true,
- .physical_storage_buffer_address = pdevice->info.gen >= 8 &&
- pdevice->use_softpin,
+ .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
.post_depth_coverage = pdevice->info.gen >= 9,
+ .runtime_descriptor_array = true,
.shader_viewport_index_layer = true,
.stencil_export = pdevice->info.gen >= 9,
.storage_8bit = pdevice->info.gen >= 8,
.transform_feedback = pdevice->info.gen >= 8,
.variable_pointers = true,
},
- .ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
- .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
- .phys_ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT64, 1),
- .push_const_ptr_type = glsl_uint_type(),
- .shared_ptr_type = glsl_uint_type(),
+ .ubo_addr_format = nir_address_format_32bit_index_offset,
+ .ssbo_addr_format =
+ anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
+ .phys_ssbo_addr_format = nir_address_format_64bit_global,
+ .push_const_addr_format = nir_address_format_logical,
+
+ /* TODO: Consider changing this to an address format that has the NULL
+ * pointer equals to 0. That might be a better format to play nice
+ * with certain code / code generators.
+ */
+ .shared_addr_format = nir_address_format_32bit_offset,
};
- nir_function *entry_point =
+
+ nir_shader *nir =
spirv_to_nir(spirv, module->size / 4,
spec_entries, num_spec_entries,
stage, entrypoint_name, &spirv_options, nir_options);
- nir_shader *nir = entry_point->shader;
assert(nir->info.stage == stage);
nir_validate_shader(nir, "after spirv_to_nir");
ralloc_steal(mem_ctx, nir);
/* Pick off the single entrypoint that we want */
foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
- if (func != entry_point)
+ if (!func->is_entrypoint)
exec_node_remove(&func->node);
}
assert(exec_list_length(&nir->functions) == 1);
NIR_PASS_V(nir, nir_propagate_invariant);
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
- entry_point->impl, true, false);
+ nir_shader_get_entrypoint(nir), true, false);
+
+ NIR_PASS_V(nir, nir_lower_frexp);
/* Vulkan uses the separate-shader linking model */
nir->info.separate_shader = true;
key->nr_color_regions = util_bitcount(key->color_outputs_valid);
- key->replicate_alpha = key->nr_color_regions > 1 &&
- ms_info && ms_info->alphaToCoverageEnable;
+ /* To reduce possible shader recompilations we would need to know if
+ * there is a SampleMask output variable to compute if we should emit
+ * code to workaround the issue that hardware disables alpha to coverage
+ * when there is SampleMask output.
+ */
+ key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;
+
+ /* Vulkan doesn't support fixed-function alpha test */
+ key->alpha_test_replicate_alpha = false;
if (ms_info) {
/* We should probably pull this out of the shader, but it's fairly
* harmless to compute it and then let dead-code take care of it.
*/
if (ms_info->rasterizationSamples > 1) {
- key->persample_interp =
+ key->persample_interp = ms_info->sampleShadingEnable &&
(ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
key->multisample_fbo = true;
}
- key->frag_coord_adds_sample_pos = ms_info->sampleShadingEnable;
+ key->frag_coord_adds_sample_pos = key->persample_interp;
}
}
struct anv_pipeline_bind_map bind_map;
union brw_any_prog_data prog_data;
+
+ VkPipelineCreationFeedbackEXT feedback;
};
static void
struct anv_pipeline_stage *stage,
struct anv_pipeline_layout *layout)
{
- const struct brw_compiler *compiler =
- pipeline->device->instance->physicalDevice.compiler;
+ const struct anv_physical_device *pdevice =
+ &pipeline->device->instance->physicalDevice;
+ const struct brw_compiler *compiler = pdevice->compiler;
struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
nir_shader *nir = stage->nir;
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
if (layout) {
- anv_nir_apply_pipeline_layout(&pipeline->device->instance->physicalDevice,
+ anv_nir_apply_pipeline_layout(pdevice,
pipeline->device->robust_buffer_access,
layout, nir, prog_data,
&stage->bind_map);
- NIR_PASS_V(nir, nir_lower_explicit_io,
- nir_var_mem_ubo | nir_var_mem_ssbo,
+ NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
nir_address_format_32bit_index_offset);
+ NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
+ anv_nir_ssbo_addr_format(pdevice,
+ pipeline->device->robust_buffer_access));
NIR_PASS_V(nir, nir_opt_constant_folding);
+
+ /* We don't support non-uniform UBOs and non-uniform SSBO access is
+ * handled naturally by falling back to A64 messages.
+ */
+ NIR_PASS_V(nir, nir_lower_non_uniform_access,
+ nir_lower_non_uniform_texture_access |
+ nir_lower_non_uniform_image_access);
}
if (nir->info.stage != MESA_SHADER_COMPUTE)
continue;
const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
- /* Unused or out-of-bounds */
- if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid & (1 << rt)))
+ /* Out-of-bounds */
+ if (rt >= MAX_RTS)
continue;
const unsigned array_len =
glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
assert(rt + array_len <= max_rt);
+ /* Unused */
+ if (!(stage->key.wm.color_outputs_valid & BITFIELD_RANGE(rt, array_len))) {
+ /* If this is the RT at location 0 and we have alpha to coverage
+ * enabled we will have to create a null RT for it, so mark it as
+ * used.
+ */
+ if (rt > 0 || !stage->key.wm.alpha_to_coverage)
+ continue;
+ }
+
for (unsigned i = 0; i < array_len; i++)
rt_used[rt + i] = true;
}
continue;
rt_to_bindings[i] = num_rts;
- rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
- .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
- .binding = 0,
- .index = i,
- };
+
+ if (stage->key.wm.color_outputs_valid & (1 << i)) {
+ rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
+ .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
+ .binding = 0,
+ .index = i,
+ };
+ } else {
+ /* Setup a null render target */
+ rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
+ .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
+ .binding = 0,
+ .index = UINT32_MAX,
+ };
+ }
+
num_rts++;
}
continue;
const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
- if (rt >= MAX_RTS ||
- !(stage->key.wm.color_outputs_valid & (1 << rt))) {
- /* Unused or out-of-bounds, throw it away */
+
+ if (rt >= MAX_RTS || !rt_used[rt]) {
+ /* Unused or out-of-bounds, throw it away, unless it is the first
+ * RT and we have alpha to coverage enabled.
+ */
deleted_output = true;
var->data.mode = nir_var_function_temp;
exec_node_remove(&var->node);
struct anv_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *info)
{
+ VkPipelineCreationFeedbackEXT pipeline_feedback = {
+ .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
+ };
+ int64_t pipeline_start = os_time_get_nano();
+
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
pipeline->active_stages |= sinfo->stage;
+ int64_t stage_start = os_time_get_nano();
+
stages[stage].stage = stage;
stages[stage].module = anv_shader_module_from_handle(sinfo->module);
stages[stage].entrypoint = sinfo->pName;
default:
unreachable("Invalid graphics shader stage");
}
+
+ stages[stage].feedback.duration += os_time_get_nano() - stage_start;
+ stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
}
if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
unsigned found = 0;
+ unsigned cache_hits = 0;
for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
if (!stages[s].entrypoint)
continue;
+ int64_t stage_start = os_time_get_nano();
+
stages[s].cache_key.stage = s;
memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
+ bool cache_hit;
struct anv_shader_bin *bin =
anv_device_search_for_kernel(pipeline->device, cache,
&stages[s].cache_key,
- sizeof(stages[s].cache_key));
+ sizeof(stages[s].cache_key), &cache_hit);
if (bin) {
found++;
pipeline->shaders[s] = bin;
}
+
+ if (cache_hit) {
+ cache_hits++;
+ stages[s].feedback.flags |=
+ VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
+ }
+ stages[s].feedback.duration += os_time_get_nano() - stage_start;
}
if (found == __builtin_popcount(pipeline->active_stages)) {
+ if (cache_hits == found) {
+ pipeline_feedback.flags |=
+ VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
+ }
/* We found all our shaders in the cache. We're done. */
goto done;
} else if (found > 0) {
* cache again as part of the compilation process.
*/
for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+ stages[s].feedback.flags = 0;
if (pipeline->shaders[s]) {
anv_shader_bin_unref(pipeline->device, pipeline->shaders[s]);
pipeline->shaders[s] = NULL;
if (!stages[s].entrypoint)
continue;
+ int64_t stage_start = os_time_get_nano();
+
assert(stages[s].stage == s);
assert(pipeline->shaders[s] == NULL);
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail;
}
+
+ stages[s].feedback.duration += os_time_get_nano() - stage_start;
}
/* Walk backwards to link */
if (!stages[s].entrypoint)
continue;
+ int64_t stage_start = os_time_get_nano();
+
void *stage_ctx = ralloc_context(NULL);
nir_xfb_info *xfb_info = NULL;
pipeline->shaders[s] = bin;
ralloc_free(stage_ctx);
+ stages[s].feedback.duration += os_time_get_nano() - stage_start;
+
prev_stage = &stages[s];
}
pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
}
+ pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
+
+ const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
+ vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
+ if (create_feedback) {
+ *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
+
+ assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
+ for (uint32_t i = 0; i < info->stageCount; i++) {
+ gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
+ create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
+ }
+ }
+
return VK_SUCCESS;
fail:
const char *entrypoint,
const VkSpecializationInfo *spec_info)
{
+ VkPipelineCreationFeedbackEXT pipeline_feedback = {
+ .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
+ };
+ int64_t pipeline_start = os_time_get_nano();
+
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
.spec_info = spec_info,
.cache_key = {
.stage = MESA_SHADER_COMPUTE,
- }
+ },
+ .feedback = {
+ .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
+ },
};
anv_pipeline_hash_shader(stage.module,
stage.entrypoint,
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
+ bool cache_hit;
bin = anv_device_search_for_kernel(pipeline->device, cache, &stage.cache_key,
- sizeof(stage.cache_key));
+ sizeof(stage.cache_key), &cache_hit);
if (bin == NULL) {
+ int64_t stage_start = os_time_get_nano();
+
stage.bind_map = (struct anv_pipeline_bind_map) {
.surface_to_descriptor = stage.surface_to_descriptor,
.sampler_to_descriptor = stage.sampler_to_descriptor
}
ralloc_free(mem_ctx);
+
+ stage.feedback.duration = os_time_get_nano() - stage_start;
+ }
+
+ if (cache_hit) {
+ stage.feedback.flags |=
+ VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
+ pipeline_feedback.flags |=
+ VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
+ }
+ pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
+
+ const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
+ vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
+ if (create_feedback) {
+ *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
+
+ assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
+ create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
}
pipeline->active_stages = VK_SHADER_STAGE_COMPUTE_BIT;