radv: add a workaround for Monster Hunter World and LLVM 7&8
[mesa.git] / src / amd / vulkan / radv_pipeline.c
index fb6c61cf3f05f42bb28da4c183269577d0387022..c89a6f139ba003d8a86665a01a489424961e99a2 100644 (file)
@@ -524,6 +524,14 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
                col_format |= cf << (4 * i);
        }
 
+       if (!col_format && blend->need_src_alpha & (1 << 0)) {
+               /* When a subpass doesn't have any color attachments, write the
+                * alpha channel of MRT0 when alpha coverage is enabled because
+                * the depth attachment needs it.
+                */
+               col_format |= V_028714_SPI_SHADER_32_AR;
+       }
+
        /* If the i-th target format is set, all previous target formats must
         * be non-zero to avoid hangs.
         */
@@ -689,6 +697,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 
        if (vkms && vkms->alphaToCoverageEnable) {
                blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
+               blend.need_src_alpha |= 0x1;
        }
 
        blend.cb_target_mask = 0;
@@ -1235,25 +1244,6 @@ si_conv_prim_to_gs_out(enum VkPrimitiveTopology topology)
        }
 }
 
-static unsigned si_map_swizzle(unsigned swizzle)
-{
-       switch (swizzle) {
-       case VK_SWIZZLE_Y:
-               return V_008F0C_SQ_SEL_Y;
-       case VK_SWIZZLE_Z:
-               return V_008F0C_SQ_SEL_Z;
-       case VK_SWIZZLE_W:
-               return V_008F0C_SQ_SEL_W;
-       case VK_SWIZZLE_0:
-               return V_008F0C_SQ_SEL_0;
-       case VK_SWIZZLE_1:
-               return V_008F0C_SQ_SEL_1;
-       default: /* VK_SWIZZLE_X */
-               return V_008F0C_SQ_SEL_X;
-       }
-}
-
-
 static unsigned radv_dynamic_state_mask(VkDynamicState state)
 {
        switch(state) {
@@ -1427,11 +1417,13 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 
        const  VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
                        vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
-       if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
+       if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
                dynamic->discard_rectangle.count = discard_rectangle_info->discardRectangleCount;
-               typed_memcpy(dynamic->discard_rectangle.rectangles,
-                            discard_rectangle_info->pDiscardRectangles,
-                            discard_rectangle_info->discardRectangleCount);
+               if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
+                       typed_memcpy(dynamic->discard_rectangle.rectangles,
+                                    discard_rectangle_info->pDiscardRectangles,
+                                    discard_rectangle_info->discardRectangleCount);
+               }
        }
 
        pipeline->dynamic_state.mask = states;
@@ -1566,11 +1558,11 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline, const struct radv_gs_sta
        unsigned num_se = device->physical_device->rad_info.max_se;
        unsigned wave_size = 64;
        unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
-       /* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16.
-        * On VI+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
+       /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16.
+        * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
         */
        unsigned gs_vertex_reuse =
-               (device->physical_device->rad_info.chip_class >= VI ? 32 : 16) * num_se;
+               (device->physical_device->rad_info.chip_class >= GFX8 ? 32 : 16) * num_se;
        unsigned alignment = 256 * num_se;
        /* The maximum size is 63.999 MB per SE. */
        unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
@@ -1589,7 +1581,7 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline, const struct radv_gs_sta
        esgs_ring_size = align(esgs_ring_size, alignment);
        gsvs_ring_size = align(gsvs_ring_size, alignment);
 
-       if (pipeline->device->physical_device->rad_info.chip_class <= VI)
+       if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
                pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
 
        pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
@@ -1651,7 +1643,7 @@ calculate_tess_state(struct radv_pipeline *pipeline,
 
        lds_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.lds_size;
 
-       if (pipeline->device->physical_device->rad_info.chip_class >= CIK) {
+       if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
                assert(lds_size <= 65536);
                lds_size = align(lds_size, 512) / 512;
        } else {
@@ -1837,6 +1829,20 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders)
        }
 }
 
+static uint32_t
+radv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo *input_state,
+                      uint32_t attrib_binding)
+{
+       for (uint32_t i = 0; i < input_state->vertexBindingDescriptionCount; i++) {
+               const VkVertexInputBindingDescription *input_binding =
+                       &input_state->pVertexBindingDescriptions[i];
+
+               if (input_binding->binding == attrib_binding)
+                       return input_binding->stride;
+       }
+
+       return 0;
+}
 
 static struct radv_pipeline_key
 radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
@@ -1874,14 +1880,31 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
        }
 
        for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) {
-               unsigned location = input_state->pVertexAttributeDescriptions[i].location;
-               unsigned binding = input_state->pVertexAttributeDescriptions[i].binding;
+               const VkVertexInputAttributeDescription *desc =
+                       &input_state->pVertexAttributeDescriptions[i];
+               const struct vk_format_description *format_desc;
+               unsigned location = desc->location;
+               unsigned binding = desc->binding;
+               unsigned num_format, data_format;
+               int first_non_void;
+
                if (binding_input_rate & (1u << binding)) {
                        key.instance_rate_inputs |= 1u << location;
                        key.instance_rate_divisors[location] = instance_rate_divisors[binding];
                }
 
-               if (pipeline->device->physical_device->rad_info.chip_class <= VI &&
+               format_desc = vk_format_description(desc->format);
+               first_non_void = vk_format_get_first_non_void_channel(desc->format);
+
+               num_format = radv_translate_buffer_numformat(format_desc, first_non_void);
+               data_format = radv_translate_buffer_dataformat(format_desc, first_non_void);
+
+               key.vertex_attribute_formats[location] = data_format | (num_format << 4);
+               key.vertex_attribute_bindings[location] = desc->binding;
+               key.vertex_attribute_offsets[location] = desc->offset;
+               key.vertex_attribute_strides[location] = radv_get_attrib_stride(input_state, desc->binding);
+
+               if (pipeline->device->physical_device->rad_info.chip_class <= GFX8 &&
                    pipeline->device->physical_device->rad_info.family != CHIP_STONEY) {
                        VkFormat format = input_state->pVertexAttributeDescriptions[i].format;
                        uint64_t adjust;
@@ -1904,6 +1927,26 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
                        }
                        key.vertex_alpha_adjust |= adjust << (2 * location);
                }
+
+               switch (desc->format) {
+               case VK_FORMAT_B8G8R8A8_UNORM:
+               case VK_FORMAT_B8G8R8A8_SNORM:
+               case VK_FORMAT_B8G8R8A8_USCALED:
+               case VK_FORMAT_B8G8R8A8_SSCALED:
+               case VK_FORMAT_B8G8R8A8_UINT:
+               case VK_FORMAT_B8G8R8A8_SINT:
+               case VK_FORMAT_B8G8R8A8_SRGB:
+               case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
+               case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
+               case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
+               case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
+               case VK_FORMAT_A2R10G10B10_UINT_PACK32:
+               case VK_FORMAT_A2R10G10B10_SINT_PACK32:
+                       key.vertex_post_shuffle |= 1 << location;
+                       break;
+               default:
+                       break;
+               }
        }
 
        if (pCreateInfo->pTessellationState)
@@ -1919,7 +1962,7 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
        }
 
        key.col_format = blend->spi_shader_col_format;
-       if (pipeline->device->physical_device->rad_info.chip_class < VI)
+       if (pipeline->device->physical_device->rad_info.chip_class < GFX8)
                radv_pipeline_compute_get_int_clamp(pCreateInfo, &key.is_int8, &key.is_int10);
 
        return key;
@@ -1932,8 +1975,14 @@ radv_fill_shader_keys(struct radv_shader_variant_key *keys,
 {
        keys[MESA_SHADER_VERTEX].vs.instance_rate_inputs = key->instance_rate_inputs;
        keys[MESA_SHADER_VERTEX].vs.alpha_adjust = key->vertex_alpha_adjust;
-       for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i)
+       keys[MESA_SHADER_VERTEX].vs.post_shuffle = key->vertex_post_shuffle;
+       for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i) {
                keys[MESA_SHADER_VERTEX].vs.instance_rate_divisors[i] = key->instance_rate_divisors[i];
+               keys[MESA_SHADER_VERTEX].vs.vertex_attribute_formats[i] = key->vertex_attribute_formats[i];
+               keys[MESA_SHADER_VERTEX].vs.vertex_attribute_bindings[i] = key->vertex_attribute_bindings[i];
+               keys[MESA_SHADER_VERTEX].vs.vertex_attribute_offsets[i] = key->vertex_attribute_offsets[i];
+               keys[MESA_SHADER_VERTEX].vs.vertex_attribute_strides[i] = key->vertex_attribute_strides[i];
+       }
 
        if (nir[MESA_SHADER_TESS_CTRL]) {
                keys[MESA_SHADER_VERTEX].vs.as_ls = true;
@@ -2000,13 +2049,53 @@ merge_tess_info(struct shader_info *tes_info,
        tes_info->tess.point_mode |= tcs_info->tess.point_mode;
 }
 
+static
+void radv_init_feedback(const VkPipelineCreationFeedbackCreateInfoEXT *ext)
+{
+       if (!ext)
+               return;
+
+       if (ext->pPipelineCreationFeedback) {
+               ext->pPipelineCreationFeedback->flags = 0;
+               ext->pPipelineCreationFeedback->duration = 0;
+       }
+
+       for (unsigned i = 0; i < ext->pipelineStageCreationFeedbackCount; ++i) {
+               ext->pPipelineStageCreationFeedbacks[i].flags = 0;
+               ext->pPipelineStageCreationFeedbacks[i].duration = 0;
+       }
+}
+
+static
+void radv_start_feedback(VkPipelineCreationFeedbackEXT *feedback)
+{
+       if (!feedback)
+               return;
+
+       feedback->duration -= radv_get_current_time();
+       feedback ->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
+}
+
+static
+void radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit)
+{
+       if (!feedback)
+               return;
+
+       feedback->duration += radv_get_current_time();
+       feedback ->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT |
+                          (cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0);
+}
+
 static
 void radv_create_shaders(struct radv_pipeline *pipeline,
                          struct radv_device *device,
                          struct radv_pipeline_cache *cache,
                          const struct radv_pipeline_key *key,
                          const VkPipelineShaderStageCreateInfo **pStages,
-                         const VkPipelineCreateFlags flags)
+                         const VkPipelineCreateFlags flags,
+                         VkPipelineCreationFeedbackEXT *pipeline_feedback,
+                         VkPipelineCreationFeedbackEXT **stage_feedbacks)
 {
        struct radv_shader_module fs_m = {0};
        struct radv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
@@ -2016,6 +2105,8 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
        struct radv_shader_variant_key keys[MESA_SHADER_STAGES] = {{{{0}}}};
        unsigned char hash[20], gs_copy_hash[20];
 
+       radv_start_feedback(pipeline_feedback);
+
        for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
                if (pStages[i]) {
                        modules[i] = radv_shader_module_from_handle(pStages[i]->module);
@@ -2032,14 +2123,18 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
        memcpy(gs_copy_hash, hash, 20);
        gs_copy_hash[0] ^= 1;
 
+       bool found_in_application_cache = true;
        if (modules[MESA_SHADER_GEOMETRY]) {
                struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
-               radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants);
+               radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants,
+                                                               &found_in_application_cache);
                pipeline->gs_copy_shader = variants[MESA_SHADER_GEOMETRY];
        }
 
-       if (radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders) &&
+       if (radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders,
+                                                           &found_in_application_cache) &&
            (!modules[MESA_SHADER_GEOMETRY] || pipeline->gs_copy_shader)) {
+               radv_stop_feedback(pipeline_feedback, found_in_application_cache);
                return;
        }
 
@@ -2057,10 +2152,12 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                if (!modules[i])
                        continue;
 
+               radv_start_feedback(stage_feedbacks[i]);
+
                nir[i] = radv_shader_compile_to_nir(device, modules[i],
                                                    stage ? stage->pName : "main", i,
                                                    stage ? stage->pSpecializationInfo : NULL,
-                                                   flags);
+                                                   flags, pipeline->layout);
 
                /* We don't want to alter meta shaders IR directly so clone it
                 * first.
@@ -2068,6 +2165,8 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                if (nir[i]->info.name) {
                        nir[i] = nir_shader_clone(NULL, nir[i]);
                }
+
+               radv_stop_feedback(stage_feedbacks[i], false);
        }
 
        if (nir[MESA_SHADER_TESS_CTRL]) {
@@ -2081,6 +2180,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
        for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
                if (nir[i]) {
                        NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
+                       NIR_PASS_V(nir[i], nir_lower_non_uniform_access,
+                                          nir_lower_non_uniform_ubo_access |
+                                          nir_lower_non_uniform_ssbo_access |
+                                          nir_lower_non_uniform_texture_access |
+                                          nir_lower_non_uniform_image_access);
                }
 
                if (radv_can_dump_shader(device, modules[i], false))
@@ -2091,10 +2195,14 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 
        if (nir[MESA_SHADER_FRAGMENT]) {
                if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
+                       radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
+
                        pipeline->shaders[MESA_SHADER_FRAGMENT] =
                               radv_shader_variant_create(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
                                                          pipeline->layout, keys + MESA_SHADER_FRAGMENT,
                                                          &codes[MESA_SHADER_FRAGMENT], &code_sizes[MESA_SHADER_FRAGMENT]);
+
+                       radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
                }
 
                /* TODO: These are no longer used as keys we should refactor this */
@@ -2113,10 +2221,15 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                        struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]};
                        struct radv_shader_variant_key key = keys[MESA_SHADER_TESS_CTRL];
                        key.tcs.vs_key = keys[MESA_SHADER_VERTEX].vs;
+
+                       radv_start_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL]);
+
                        pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_create(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
                                                                                              pipeline->layout,
                                                                                              &key, &codes[MESA_SHADER_TESS_CTRL],
                                                                                              &code_sizes[MESA_SHADER_TESS_CTRL]);
+
+                       radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
                }
                modules[MESA_SHADER_VERTEX] = NULL;
                keys[MESA_SHADER_TESS_EVAL].tes.num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches;
@@ -2127,10 +2240,15 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                gl_shader_stage pre_stage = modules[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
                if (!pipeline->shaders[MESA_SHADER_GEOMETRY]) {
                        struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]};
+
+                       radv_start_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY]);
+
                        pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_create(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
                                                                                             pipeline->layout,
                                                                                             &keys[pre_stage] , &codes[MESA_SHADER_GEOMETRY],
                                                                                     &code_sizes[MESA_SHADER_GEOMETRY]);
+
+                       radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
                }
                modules[pre_stage] = NULL;
        }
@@ -2144,10 +2262,15 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                                keys[MESA_SHADER_TESS_EVAL].tes.num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches;
                                keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.info.tcs.outputs_written);
                        }
+
+                       radv_start_feedback(stage_feedbacks[i]);
+
                        pipeline->shaders[i] = radv_shader_variant_create(device, modules[i], &nir[i], 1,
                                                                          pipeline->layout,
                                                                          keys + i, &codes[i],
                                                                          &code_sizes[i]);
+
+                       radv_stop_feedback(stage_feedbacks[i], false);
                }
        }
 
@@ -2197,6 +2320,8 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 
        if (fs_m.nir)
                ralloc_free(fs_m.nir);
+
+       radv_stop_feedback(pipeline_feedback, false);
 }
 
 static uint32_t
@@ -2632,8 +2757,7 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
        db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
                              S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
 
-       if (pipeline->device->enabled_extensions.EXT_depth_range_unrestricted &&
-           !pCreateInfo->pRasterizationState->depthClampEnable &&
+       if (!pCreateInfo->pRasterizationState->depthClampEnable &&
            ps->info.info.ps.writes_z) {
                /* From VK_EXT_depth_range_unrestricted spec:
                 *
@@ -2702,11 +2826,18 @@ radv_pipeline_generate_raster_state(struct radeon_cmdbuf *ctx_cs,
        const VkConservativeRasterizationModeEXT mode =
                radv_get_conservative_raster_mode(vkraster);
        uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1);
+       bool depth_clip_disable = vkraster->depthClampEnable;
+
+       const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state =
+               vk_find_struct_const(vkraster->pNext, PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
+       if (depth_clip_state) {
+               depth_clip_disable = !depth_clip_state->depthClipEnable;
+       }
 
        radeon_set_context_reg(ctx_cs, R_028810_PA_CL_CLIP_CNTL,
                               S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions.
-                              S_028810_ZCLIP_NEAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) |
-                              S_028810_ZCLIP_FAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) |
+                              S_028810_ZCLIP_NEAR_DISABLE(depth_clip_disable ? 1 : 0) |
+                              S_028810_ZCLIP_FAR_DISABLE(depth_clip_disable ? 1 : 0) |
                               S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) |
                               S_028810_DX_LINEAR_ATTR_CLIP_ENA(1));
 
@@ -2787,7 +2918,7 @@ radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs,
         * if no sample lies on the pixel boundary (-8 sample offset). It's
         * currently always TRUE because the driver doesn't support 16 samples.
         */
-       bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= CIK;
+       bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= GFX7;
        radeon_set_context_reg(ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
                               S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) |
                               S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
@@ -2872,7 +3003,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs,
                               cull_dist_mask << 8 |
                               clip_dist_mask);
 
-       if (pipeline->device->physical_device->rad_info.chip_class <= VI)
+       if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
                radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF,
                                       outinfo->writes_viewport_index);
 }
@@ -2905,7 +3036,7 @@ radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs,
        radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
 
        rsrc2 |= S_00B52C_LDS_SIZE(tess->lds_size);
-       if (pipeline->device->physical_device->rad_info.chip_class == CIK &&
+       if (pipeline->device->physical_device->rad_info.chip_class == GFX7 &&
            pipeline->device->physical_device->rad_info.family != CHIP_HAWAII)
                radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2);
 
@@ -2987,7 +3118,7 @@ radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs,
        radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM,
                               tess->tf_param);
 
-       if (pipeline->device->physical_device->rad_info.chip_class >= CIK)
+       if (pipeline->device->physical_device->rad_info.chip_class >= GFX7)
                radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2,
                                           tess->ls_hs_config);
        else
@@ -3070,13 +3201,17 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs,
        radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader);
 }
 
-static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
+static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool float16)
 {
        uint32_t ps_input_cntl;
        if (offset <= AC_EXP_PARAM_OFFSET_31) {
                ps_input_cntl = S_028644_OFFSET(offset);
                if (flat_shade)
                        ps_input_cntl |= S_028644_FLAT_SHADE(1);
+               if (float16) {
+                       ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
+                                        S_028644_ATTR0_VALID(1);
+               }
        } else {
                /* The input is a DEFAULT_VAL constant. */
                assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
@@ -3101,7 +3236,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
        if (ps->info.info.ps.prim_id_input) {
                unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
                if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
-                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
                        ++ps_offset;
                }
        }
@@ -3111,9 +3246,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
            ps->info.info.needs_multiview_view_index) {
                unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
                if (vs_offset != AC_EXP_PARAM_UNDEFINED)
-                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
                else
-                       ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true);
+                       ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false);
                ++ps_offset;
        }
 
@@ -3129,14 +3264,14 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
 
                vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
                if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
-                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
+                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
                        ++ps_offset;
                }
 
                vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
                if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
                    ps->info.info.ps.num_input_clips_culls > 4) {
-                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
+                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
                        ++ps_offset;
                }
        }
@@ -3144,6 +3279,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
        for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
                unsigned vs_offset;
                bool flat_shade;
+               bool float16;
                if (!(ps->info.fs.input_mask & (1u << i)))
                        continue;
 
@@ -3155,8 +3291,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
                }
 
                flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
+               float16 = !!(ps->info.fs.float16_shaded_mask & (1u << ps_offset));
 
-               ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade);
+               ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, float16);
                ++ps_offset;
        }
 
@@ -3173,7 +3310,6 @@ radv_compute_db_shader_control(const struct radv_device *device,
                               const struct radv_pipeline *pipeline,
                                const struct radv_shader_variant *ps)
 {
-       const struct radv_multisample_state *ms = &pipeline->graphics.ms;
        unsigned z_order;
        if (ps->info.fs.early_fragment_test || !ps->info.info.ps.writes_memory)
                z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
@@ -3183,11 +3319,11 @@ radv_compute_db_shader_control(const struct radv_device *device,
        bool disable_rbplus = device->physical_device->has_rbplus &&
                              !device->physical_device->rbplus_allowed;
 
-       /* Do not enable the gl_SampleMask fragment shader output if MSAA is
-        * disabled.
+       /* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
+        * but this appears to break Project Cars (DXVK). See
+        * https://bugs.freedesktop.org/show_bug.cgi?id=109401
         */
-       bool mask_export_enable = ms->num_samples > 1 &&
-                                 ps->info.info.ps.writes_sample_mask;
+       bool mask_export_enable = ps->info.info.ps.writes_sample_mask;
 
        return  S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) |
                S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) |
@@ -3354,7 +3490,7 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline,
 
        radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, radv_compute_vgt_shader_stages_en(pipeline));
 
-       if (pipeline->device->physical_device->rad_info.chip_class >= CIK) {
+       if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
                radeon_set_uconfig_reg_idx(cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim);
        } else {
                radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
@@ -3386,12 +3522,12 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline,
 
        /* GS requirement. */
        ia_multi_vgt_param.partial_es_wave = false;
-       if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= VI)
+       if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8)
                if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3)
                        ia_multi_vgt_param.partial_es_wave = true;
 
        ia_multi_vgt_param.wd_switch_on_eop = false;
-       if (device->physical_device->rad_info.chip_class >= CIK) {
+       if (device->physical_device->rad_info.chip_class >= GFX7) {
                /* WD_SWITCH_ON_EOP has no effect on GPUs with less than
                 * 4 shader engines. Set 1 to pass the assertion below.
                 * The other cases are hardware requirements. */
@@ -3431,7 +3567,7 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline,
                /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
                if (device->has_distributed_tess) {
                        if (radv_pipeline_has_gs(pipeline)) {
-                               if (device->physical_device->rad_info.chip_class <= VI)
+                               if (device->physical_device->rad_info.chip_class <= GFX8)
                                        ia_multi_vgt_param.partial_es_wave = true;
                        } else {
                                ia_multi_vgt_param.partial_vs_wave = true;
@@ -3473,7 +3609,7 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline,
        ia_multi_vgt_param.base =
                S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) |
                /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
-               S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == VI ? 2 : 0) |
+               S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) |
                S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) |
                S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9);
 
@@ -3494,24 +3630,10 @@ radv_compute_vertex_input_state(struct radv_pipeline *pipeline,
                        &vi_info->pVertexAttributeDescriptions[i];
                unsigned loc = desc->location;
                const struct vk_format_description *format_desc;
-               int first_non_void;
-               uint32_t num_format, data_format;
-               format_desc = vk_format_description(desc->format);
-               first_non_void = vk_format_get_first_non_void_channel(desc->format);
 
-               num_format = radv_translate_buffer_numformat(format_desc, first_non_void);
-               data_format = radv_translate_buffer_dataformat(format_desc, first_non_void);
+               format_desc = vk_format_description(desc->format);
 
-               velems->rsrc_word3[loc] = S_008F0C_DST_SEL_X(si_map_swizzle(format_desc->swizzle[0])) |
-                       S_008F0C_DST_SEL_Y(si_map_swizzle(format_desc->swizzle[1])) |
-                       S_008F0C_DST_SEL_Z(si_map_swizzle(format_desc->swizzle[2])) |
-                       S_008F0C_DST_SEL_W(si_map_swizzle(format_desc->swizzle[3])) |
-                       S_008F0C_NUM_FORMAT(num_format) |
-                       S_008F0C_DATA_FORMAT(data_format);
                velems->format_size[loc] = format_desc->block.bits / 8;
-               velems->offset[loc] = desc->offset;
-               velems->binding[loc] = desc->binding;
-               velems->count = MAX2(velems->count, loc + 1);
        }
 
        for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
@@ -3519,6 +3641,8 @@ radv_compute_vertex_input_state(struct radv_pipeline *pipeline,
                        &vi_info->pVertexBindingDescriptions[i];
 
                pipeline->binding_stride[desc->binding] = desc->stride;
+               pipeline->num_vertex_bindings =
+                       MAX2(pipeline->num_vertex_bindings, desc->binding + 1);
        }
 }
 
@@ -3559,14 +3683,23 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 
        struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra);
 
+       const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback =
+               vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
+       radv_init_feedback(creation_feedback);
+
+       VkPipelineCreationFeedbackEXT *pipeline_feedback = creation_feedback ? creation_feedback->pPipelineCreationFeedback : NULL;
+
        const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
+       VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = { 0 };
        for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
                gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1;
                pStages[stage] = &pCreateInfo->pStages[i];
+               if(creation_feedback)
+                       stage_feedbacks[stage] = &creation_feedback->pPipelineStageCreationFeedbacks[i];
        }
 
        struct radv_pipeline_key key = radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &blend, has_view_index);
-       radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags);
+       radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags, pipeline_feedback, stage_feedbacks);
 
        pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
        radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo);
@@ -3752,7 +3885,7 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
        compute_resource_limits =
                S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
 
-       if (device->physical_device->rad_info.chip_class >= CIK) {
+       if (device->physical_device->rad_info.chip_class >= GFX7) {
                unsigned num_cu_per_se =
                        device->physical_device->rad_info.num_good_compute_units /
                        device->physical_device->rad_info.max_se;
@@ -3789,6 +3922,7 @@ static VkResult radv_compute_pipeline_create(
        RADV_FROM_HANDLE(radv_device, device, _device);
        RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
        const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
+       VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = { 0 };
        struct radv_pipeline *pipeline;
        VkResult result;
 
@@ -3801,8 +3935,16 @@ static VkResult radv_compute_pipeline_create(
        pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
        assert(pipeline->layout);
 
+       const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback =
+               vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
+       radv_init_feedback(creation_feedback);
+
+       VkPipelineCreationFeedbackEXT *pipeline_feedback = creation_feedback ? creation_feedback->pPipelineCreationFeedback : NULL;
+       if (creation_feedback)
+               stage_feedbacks[MESA_SHADER_COMPUTE] = &creation_feedback->pPipelineStageCreationFeedbacks[0];
+
        pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
-       radv_create_shaders(pipeline, device, cache, &(struct radv_pipeline_key) {0}, pStages, pCreateInfo->flags);
+       radv_create_shaders(pipeline, device, cache, &(struct radv_pipeline_key) {0}, pStages, pCreateInfo->flags, pipeline_feedback, stage_feedbacks);
 
        pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
        pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;