col_format |= cf << (4 * i);
}
+ if (!col_format && blend->need_src_alpha & (1 << 0)) {
+ /* When a subpass doesn't have any color attachments, write the
+ * alpha channel of MRT0 when alpha coverage is enabled because
+ * the depth attachment needs it.
+ */
+ col_format |= V_028714_SPI_SHADER_32_AR;
+ }
+
/* If the i-th target format is set, all previous target formats must
* be non-zero to avoid hangs.
*/
if (vkms && vkms->alphaToCoverageEnable) {
blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
+ blend.need_src_alpha |= 0x1;
}
blend.cb_target_mask = 0;
}
}
-static unsigned si_map_swizzle(unsigned swizzle)
-{
- switch (swizzle) {
- case VK_SWIZZLE_Y:
- return V_008F0C_SQ_SEL_Y;
- case VK_SWIZZLE_Z:
- return V_008F0C_SQ_SEL_Z;
- case VK_SWIZZLE_W:
- return V_008F0C_SQ_SEL_W;
- case VK_SWIZZLE_0:
- return V_008F0C_SQ_SEL_0;
- case VK_SWIZZLE_1:
- return V_008F0C_SQ_SEL_1;
- default: /* VK_SWIZZLE_X */
- return V_008F0C_SQ_SEL_X;
- }
-}
-
-
static unsigned radv_dynamic_state_mask(VkDynamicState state)
{
switch(state) {
const VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
- if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
+ if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
dynamic->discard_rectangle.count = discard_rectangle_info->discardRectangleCount;
- typed_memcpy(dynamic->discard_rectangle.rectangles,
- discard_rectangle_info->pDiscardRectangles,
- discard_rectangle_info->discardRectangleCount);
+ if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
+ typed_memcpy(dynamic->discard_rectangle.rectangles,
+ discard_rectangle_info->pDiscardRectangles,
+ discard_rectangle_info->discardRectangleCount);
+ }
}
pipeline->dynamic_state.mask = states;
unsigned num_se = device->physical_device->rad_info.max_se;
unsigned wave_size = 64;
unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
- /* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16.
- * On VI+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
+ /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16.
+ * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
*/
unsigned gs_vertex_reuse =
- (device->physical_device->rad_info.chip_class >= VI ? 32 : 16) * num_se;
+ (device->physical_device->rad_info.chip_class >= GFX8 ? 32 : 16) * num_se;
unsigned alignment = 256 * num_se;
/* The maximum size is 63.999 MB per SE. */
unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
esgs_ring_size = align(esgs_ring_size, alignment);
gsvs_ring_size = align(gsvs_ring_size, alignment);
- if (pipeline->device->physical_device->rad_info.chip_class <= VI)
+ if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
lds_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.lds_size;
- if (pipeline->device->physical_device->rad_info.chip_class >= CIK) {
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
assert(lds_size <= 65536);
lds_size = align(lds_size, 512) / 512;
} else {
}
}
+static uint32_t
+radv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo *input_state,
+ uint32_t attrib_binding)
+{
+ for (uint32_t i = 0; i < input_state->vertexBindingDescriptionCount; i++) {
+ const VkVertexInputBindingDescription *input_binding =
+ &input_state->pVertexBindingDescriptions[i];
+
+ if (input_binding->binding == attrib_binding)
+ return input_binding->stride;
+ }
+
+ return 0;
+}
static struct radv_pipeline_key
radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
}
for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) {
- unsigned location = input_state->pVertexAttributeDescriptions[i].location;
- unsigned binding = input_state->pVertexAttributeDescriptions[i].binding;
+ const VkVertexInputAttributeDescription *desc =
+ &input_state->pVertexAttributeDescriptions[i];
+ const struct vk_format_description *format_desc;
+ unsigned location = desc->location;
+ unsigned binding = desc->binding;
+ unsigned num_format, data_format;
+ int first_non_void;
+
if (binding_input_rate & (1u << binding)) {
key.instance_rate_inputs |= 1u << location;
key.instance_rate_divisors[location] = instance_rate_divisors[binding];
}
- if (pipeline->device->physical_device->rad_info.chip_class <= VI &&
+ format_desc = vk_format_description(desc->format);
+ first_non_void = vk_format_get_first_non_void_channel(desc->format);
+
+ num_format = radv_translate_buffer_numformat(format_desc, first_non_void);
+ data_format = radv_translate_buffer_dataformat(format_desc, first_non_void);
+
+ key.vertex_attribute_formats[location] = data_format | (num_format << 4);
+ key.vertex_attribute_bindings[location] = desc->binding;
+ key.vertex_attribute_offsets[location] = desc->offset;
+ key.vertex_attribute_strides[location] = radv_get_attrib_stride(input_state, desc->binding);
+
+ if (pipeline->device->physical_device->rad_info.chip_class <= GFX8 &&
pipeline->device->physical_device->rad_info.family != CHIP_STONEY) {
VkFormat format = input_state->pVertexAttributeDescriptions[i].format;
uint64_t adjust;
}
key.vertex_alpha_adjust |= adjust << (2 * location);
}
+
+ switch (desc->format) {
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_SNORM:
+ case VK_FORMAT_B8G8R8A8_USCALED:
+ case VK_FORMAT_B8G8R8A8_SSCALED:
+ case VK_FORMAT_B8G8R8A8_UINT:
+ case VK_FORMAT_B8G8R8A8_SINT:
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
+ case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
+ case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
+ case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
+ case VK_FORMAT_A2R10G10B10_UINT_PACK32:
+ case VK_FORMAT_A2R10G10B10_SINT_PACK32:
+ key.vertex_post_shuffle |= 1 << location;
+ break;
+ default:
+ break;
+ }
}
if (pCreateInfo->pTessellationState)
}
key.col_format = blend->spi_shader_col_format;
- if (pipeline->device->physical_device->rad_info.chip_class < VI)
+ if (pipeline->device->physical_device->rad_info.chip_class < GFX8)
radv_pipeline_compute_get_int_clamp(pCreateInfo, &key.is_int8, &key.is_int10);
return key;
{
keys[MESA_SHADER_VERTEX].vs.instance_rate_inputs = key->instance_rate_inputs;
keys[MESA_SHADER_VERTEX].vs.alpha_adjust = key->vertex_alpha_adjust;
- for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i)
+ keys[MESA_SHADER_VERTEX].vs.post_shuffle = key->vertex_post_shuffle;
+ for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i) {
keys[MESA_SHADER_VERTEX].vs.instance_rate_divisors[i] = key->instance_rate_divisors[i];
+ keys[MESA_SHADER_VERTEX].vs.vertex_attribute_formats[i] = key->vertex_attribute_formats[i];
+ keys[MESA_SHADER_VERTEX].vs.vertex_attribute_bindings[i] = key->vertex_attribute_bindings[i];
+ keys[MESA_SHADER_VERTEX].vs.vertex_attribute_offsets[i] = key->vertex_attribute_offsets[i];
+ keys[MESA_SHADER_VERTEX].vs.vertex_attribute_strides[i] = key->vertex_attribute_strides[i];
+ }
if (nir[MESA_SHADER_TESS_CTRL]) {
keys[MESA_SHADER_VERTEX].vs.as_ls = true;
tes_info->tess.point_mode |= tcs_info->tess.point_mode;
}
+static
+void radv_init_feedback(const VkPipelineCreationFeedbackCreateInfoEXT *ext)
+{
+ if (!ext)
+ return;
+
+ if (ext->pPipelineCreationFeedback) {
+ ext->pPipelineCreationFeedback->flags = 0;
+ ext->pPipelineCreationFeedback->duration = 0;
+ }
+
+ for (unsigned i = 0; i < ext->pipelineStageCreationFeedbackCount; ++i) {
+ ext->pPipelineStageCreationFeedbacks[i].flags = 0;
+ ext->pPipelineStageCreationFeedbacks[i].duration = 0;
+ }
+}
+
+static
+void radv_start_feedback(VkPipelineCreationFeedbackEXT *feedback)
+{
+ if (!feedback)
+ return;
+
+ feedback->duration -= radv_get_current_time();
+ feedback ->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
+}
+
+static
+void radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit)
+{
+ if (!feedback)
+ return;
+
+ feedback->duration += radv_get_current_time();
+ feedback ->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT |
+ (cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0);
+}
+
static
void radv_create_shaders(struct radv_pipeline *pipeline,
struct radv_device *device,
struct radv_pipeline_cache *cache,
const struct radv_pipeline_key *key,
const VkPipelineShaderStageCreateInfo **pStages,
- const VkPipelineCreateFlags flags)
+ const VkPipelineCreateFlags flags,
+ VkPipelineCreationFeedbackEXT *pipeline_feedback,
+ VkPipelineCreationFeedbackEXT **stage_feedbacks)
{
struct radv_shader_module fs_m = {0};
struct radv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
struct radv_shader_variant_key keys[MESA_SHADER_STAGES] = {{{{0}}}};
unsigned char hash[20], gs_copy_hash[20];
+ radv_start_feedback(pipeline_feedback);
+
for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
if (pStages[i]) {
modules[i] = radv_shader_module_from_handle(pStages[i]->module);
memcpy(gs_copy_hash, hash, 20);
gs_copy_hash[0] ^= 1;
+ bool found_in_application_cache = true;
if (modules[MESA_SHADER_GEOMETRY]) {
struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
- radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants);
+ radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants,
+ &found_in_application_cache);
pipeline->gs_copy_shader = variants[MESA_SHADER_GEOMETRY];
}
- if (radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders) &&
+ if (radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders,
+ &found_in_application_cache) &&
(!modules[MESA_SHADER_GEOMETRY] || pipeline->gs_copy_shader)) {
+ radv_stop_feedback(pipeline_feedback, found_in_application_cache);
return;
}
if (!modules[i])
continue;
+ radv_start_feedback(stage_feedbacks[i]);
+
nir[i] = radv_shader_compile_to_nir(device, modules[i],
stage ? stage->pName : "main", i,
stage ? stage->pSpecializationInfo : NULL,
- flags);
+ flags, pipeline->layout);
/* We don't want to alter meta shaders IR directly so clone it
* first.
if (nir[i]->info.name) {
nir[i] = nir_shader_clone(NULL, nir[i]);
}
+
+ radv_stop_feedback(stage_feedbacks[i], false);
}
if (nir[MESA_SHADER_TESS_CTRL]) {
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
if (nir[i]) {
NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
+ NIR_PASS_V(nir[i], nir_lower_non_uniform_access,
+ nir_lower_non_uniform_ubo_access |
+ nir_lower_non_uniform_ssbo_access |
+ nir_lower_non_uniform_texture_access |
+ nir_lower_non_uniform_image_access);
}
if (radv_can_dump_shader(device, modules[i], false))
if (nir[MESA_SHADER_FRAGMENT]) {
if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
+ radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
+
pipeline->shaders[MESA_SHADER_FRAGMENT] =
radv_shader_variant_create(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
pipeline->layout, keys + MESA_SHADER_FRAGMENT,
&codes[MESA_SHADER_FRAGMENT], &code_sizes[MESA_SHADER_FRAGMENT]);
+
+ radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
}
/* TODO: These are no longer used as keys we should refactor this */
struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]};
struct radv_shader_variant_key key = keys[MESA_SHADER_TESS_CTRL];
key.tcs.vs_key = keys[MESA_SHADER_VERTEX].vs;
+
+ radv_start_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL]);
+
pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_create(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
pipeline->layout,
&key, &codes[MESA_SHADER_TESS_CTRL],
&code_sizes[MESA_SHADER_TESS_CTRL]);
+
+ radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
}
modules[MESA_SHADER_VERTEX] = NULL;
keys[MESA_SHADER_TESS_EVAL].tes.num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches;
gl_shader_stage pre_stage = modules[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
if (!pipeline->shaders[MESA_SHADER_GEOMETRY]) {
struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]};
+
+ radv_start_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY]);
+
pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_create(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
pipeline->layout,
&keys[pre_stage] , &codes[MESA_SHADER_GEOMETRY],
&code_sizes[MESA_SHADER_GEOMETRY]);
+
+ radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
}
modules[pre_stage] = NULL;
}
keys[MESA_SHADER_TESS_EVAL].tes.num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches;
keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.info.tcs.outputs_written);
}
+
+ radv_start_feedback(stage_feedbacks[i]);
+
pipeline->shaders[i] = radv_shader_variant_create(device, modules[i], &nir[i], 1,
pipeline->layout,
keys + i, &codes[i],
&code_sizes[i]);
+
+ radv_stop_feedback(stage_feedbacks[i], false);
}
}
if (fs_m.nir)
ralloc_free(fs_m.nir);
+
+ radv_stop_feedback(pipeline_feedback, false);
}
static uint32_t
db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
- if (pipeline->device->enabled_extensions.EXT_depth_range_unrestricted &&
- !pCreateInfo->pRasterizationState->depthClampEnable &&
+ if (!pCreateInfo->pRasterizationState->depthClampEnable &&
ps->info.info.ps.writes_z) {
/* From VK_EXT_depth_range_unrestricted spec:
*
const VkConservativeRasterizationModeEXT mode =
radv_get_conservative_raster_mode(vkraster);
uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1);
+ bool depth_clip_disable = vkraster->depthClampEnable;
+
+ const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state =
+ vk_find_struct_const(vkraster->pNext, PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
+ if (depth_clip_state) {
+ depth_clip_disable = !depth_clip_state->depthClipEnable;
+ }
radeon_set_context_reg(ctx_cs, R_028810_PA_CL_CLIP_CNTL,
S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions.
- S_028810_ZCLIP_NEAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) |
- S_028810_ZCLIP_FAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) |
+ S_028810_ZCLIP_NEAR_DISABLE(depth_clip_disable ? 1 : 0) |
+ S_028810_ZCLIP_FAR_DISABLE(depth_clip_disable ? 1 : 0) |
S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) |
S_028810_DX_LINEAR_ATTR_CLIP_ENA(1));
* if no sample lies on the pixel boundary (-8 sample offset). It's
* currently always TRUE because the driver doesn't support 16 samples.
*/
- bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= CIK;
+ bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= GFX7;
radeon_set_context_reg(ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) |
S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
cull_dist_mask << 8 |
clip_dist_mask);
- if (pipeline->device->physical_device->rad_info.chip_class <= VI)
+ if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF,
outinfo->writes_viewport_index);
}
radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
rsrc2 |= S_00B52C_LDS_SIZE(tess->lds_size);
- if (pipeline->device->physical_device->rad_info.chip_class == CIK &&
+ if (pipeline->device->physical_device->rad_info.chip_class == GFX7 &&
pipeline->device->physical_device->rad_info.family != CHIP_HAWAII)
radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2);
radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM,
tess->tf_param);
- if (pipeline->device->physical_device->rad_info.chip_class >= CIK)
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX7)
radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2,
tess->ls_hs_config);
else
radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader);
}
-static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
+static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool float16)
{
uint32_t ps_input_cntl;
if (offset <= AC_EXP_PARAM_OFFSET_31) {
ps_input_cntl = S_028644_OFFSET(offset);
if (flat_shade)
ps_input_cntl |= S_028644_FLAT_SHADE(1);
+ if (float16) {
+ ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
+ S_028644_ATTR0_VALID(1);
+ }
} else {
/* The input is a DEFAULT_VAL constant. */
assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
if (ps->info.info.ps.prim_id_input) {
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
++ps_offset;
}
}
ps->info.info.needs_multiview_view_index) {
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
if (vs_offset != AC_EXP_PARAM_UNDEFINED)
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
else
- ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false);
++ps_offset;
}
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
++ps_offset;
}
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
ps->info.info.ps.num_input_clips_culls > 4) {
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
++ps_offset;
}
}
for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
unsigned vs_offset;
bool flat_shade;
+ bool float16;
if (!(ps->info.fs.input_mask & (1u << i)))
continue;
}
flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
+ float16 = !!(ps->info.fs.float16_shaded_mask & (1u << ps_offset));
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, float16);
++ps_offset;
}
const struct radv_pipeline *pipeline,
const struct radv_shader_variant *ps)
{
- const struct radv_multisample_state *ms = &pipeline->graphics.ms;
unsigned z_order;
if (ps->info.fs.early_fragment_test || !ps->info.info.ps.writes_memory)
z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
bool disable_rbplus = device->physical_device->has_rbplus &&
!device->physical_device->rbplus_allowed;
- /* Do not enable the gl_SampleMask fragment shader output if MSAA is
- * disabled.
+ /* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
+ * but this appears to break Project Cars (DXVK). See
+ * https://bugs.freedesktop.org/show_bug.cgi?id=109401
*/
- bool mask_export_enable = ms->num_samples > 1 &&
- ps->info.info.ps.writes_sample_mask;
+ bool mask_export_enable = ps->info.info.ps.writes_sample_mask;
return S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) |
S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) |
radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, radv_compute_vgt_shader_stages_en(pipeline));
- if (pipeline->device->physical_device->rad_info.chip_class >= CIK) {
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
radeon_set_uconfig_reg_idx(cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim);
} else {
radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
/* GS requirement. */
ia_multi_vgt_param.partial_es_wave = false;
- if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= VI)
+ if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8)
if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3)
ia_multi_vgt_param.partial_es_wave = true;
ia_multi_vgt_param.wd_switch_on_eop = false;
- if (device->physical_device->rad_info.chip_class >= CIK) {
+ if (device->physical_device->rad_info.chip_class >= GFX7) {
/* WD_SWITCH_ON_EOP has no effect on GPUs with less than
* 4 shader engines. Set 1 to pass the assertion below.
* The other cases are hardware requirements. */
/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
if (device->has_distributed_tess) {
if (radv_pipeline_has_gs(pipeline)) {
- if (device->physical_device->rad_info.chip_class <= VI)
+ if (device->physical_device->rad_info.chip_class <= GFX8)
ia_multi_vgt_param.partial_es_wave = true;
} else {
ia_multi_vgt_param.partial_vs_wave = true;
ia_multi_vgt_param.base =
S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) |
/* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
- S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == VI ? 2 : 0) |
+ S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) |
S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) |
S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9);
&vi_info->pVertexAttributeDescriptions[i];
unsigned loc = desc->location;
const struct vk_format_description *format_desc;
- int first_non_void;
- uint32_t num_format, data_format;
- format_desc = vk_format_description(desc->format);
- first_non_void = vk_format_get_first_non_void_channel(desc->format);
- num_format = radv_translate_buffer_numformat(format_desc, first_non_void);
- data_format = radv_translate_buffer_dataformat(format_desc, first_non_void);
+ format_desc = vk_format_description(desc->format);
- velems->rsrc_word3[loc] = S_008F0C_DST_SEL_X(si_map_swizzle(format_desc->swizzle[0])) |
- S_008F0C_DST_SEL_Y(si_map_swizzle(format_desc->swizzle[1])) |
- S_008F0C_DST_SEL_Z(si_map_swizzle(format_desc->swizzle[2])) |
- S_008F0C_DST_SEL_W(si_map_swizzle(format_desc->swizzle[3])) |
- S_008F0C_NUM_FORMAT(num_format) |
- S_008F0C_DATA_FORMAT(data_format);
velems->format_size[loc] = format_desc->block.bits / 8;
- velems->offset[loc] = desc->offset;
- velems->binding[loc] = desc->binding;
- velems->count = MAX2(velems->count, loc + 1);
}
for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
&vi_info->pVertexBindingDescriptions[i];
pipeline->binding_stride[desc->binding] = desc->stride;
+ pipeline->num_vertex_bindings =
+ MAX2(pipeline->num_vertex_bindings, desc->binding + 1);
}
}
struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra);
+ const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback =
+ vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
+ radv_init_feedback(creation_feedback);
+
+ VkPipelineCreationFeedbackEXT *pipeline_feedback = creation_feedback ? creation_feedback->pPipelineCreationFeedback : NULL;
+
const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
+ VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = { 0 };
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1;
pStages[stage] = &pCreateInfo->pStages[i];
+ if(creation_feedback)
+ stage_feedbacks[stage] = &creation_feedback->pPipelineStageCreationFeedbacks[i];
}
struct radv_pipeline_key key = radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &blend, has_view_index);
- radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags);
+ radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags, pipeline_feedback, stage_feedbacks);
pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo);
compute_resource_limits =
S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
- if (device->physical_device->rad_info.chip_class >= CIK) {
+ if (device->physical_device->rad_info.chip_class >= GFX7) {
unsigned num_cu_per_se =
device->physical_device->rad_info.num_good_compute_units /
device->physical_device->rad_info.max_se;
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
+ VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = { 0 };
struct radv_pipeline *pipeline;
VkResult result;
pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
assert(pipeline->layout);
+ const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback =
+ vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
+ radv_init_feedback(creation_feedback);
+
+ VkPipelineCreationFeedbackEXT *pipeline_feedback = creation_feedback ? creation_feedback->pPipelineCreationFeedback : NULL;
+ if (creation_feedback)
+ stage_feedbacks[MESA_SHADER_COMPUTE] = &creation_feedback->pPipelineStageCreationFeedbacks[0];
+
pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
- radv_create_shaders(pipeline, device, cache, &(struct radv_pipeline_key) {0}, pStages, pCreateInfo->flags);
+ radv_create_shaders(pipeline, device, cache, &(struct radv_pipeline_key) {0}, pStages, pCreateInfo->flags, pipeline_feedback, stage_feedbacks);
pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;