X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_shader.c;h=37fdbfe33f53ef317a900e8c75ea51028066a3fc;hb=2dd8dfd9137ac561aac3c453c1c7ad6683bd17b4;hp=bd045a0b92fb3382d2e343f339565ec92820535c;hpb=71ffa00fc66110a4062dc935d3548ebd0eb12f9e;p=mesa.git diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index bd045a0b92f..37fdbfe33f5 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -40,7 +40,6 @@ #include #include "sid.h" -#include "gfx9d.h" #include "ac_binary.h" #include "ac_llvm_util.h" #include "ac_nir_to_llvm.h" @@ -53,6 +52,7 @@ static const struct nir_shader_compiler_options nir_options = { .vertex_id_zero_based = true, .lower_scmp = true, + .lower_flrp16 = true, .lower_flrp32 = true, .lower_flrp64 = true, .lower_device_index_to_zero = true, @@ -123,6 +123,10 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, bool allow_copies) { bool progress; + unsigned lower_flrp = + (shader->options->lower_flrp16 ? 16 : 0) | + (shader->options->lower_flrp32 ? 32 : 0) | + (shader->options->lower_flrp64 ? 64 : 0); do { progress = false; @@ -145,7 +149,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, NIR_PASS(progress, shader, nir_opt_copy_prop_vars); NIR_PASS(progress, shader, nir_opt_dead_write_vars); - NIR_PASS_V(shader, nir_lower_alu_to_scalar); + NIR_PASS_V(shader, nir_lower_alu_to_scalar, NULL); NIR_PASS_V(shader, nir_lower_phis_to_scalar); NIR_PASS(progress, shader, nir_copy_prop); @@ -157,12 +161,33 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, NIR_PASS(progress, shader, nir_opt_remove_phis); NIR_PASS(progress, shader, nir_opt_dce); } - NIR_PASS(progress, shader, nir_opt_if); + NIR_PASS(progress, shader, nir_opt_if, true); NIR_PASS(progress, shader, nir_opt_dead_cf); NIR_PASS(progress, shader, nir_opt_cse); NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true); - NIR_PASS(progress, shader, nir_opt_algebraic); NIR_PASS(progress, shader, nir_opt_constant_folding); + NIR_PASS(progress, shader, nir_opt_algebraic); + + if (lower_flrp != 0) { + bool lower_flrp_progress = false; + NIR_PASS(lower_flrp_progress, + shader, + nir_lower_flrp, + lower_flrp, + false /* always_precise */, + shader->options->lower_ffma); + if (lower_flrp_progress) { + NIR_PASS(progress, shader, + nir_opt_constant_folding); + progress = true; + } + + /* Nothing should rematerialize any flrps, so we only + * need to do this lowering once. + */ + lower_flrp = 0; + } + NIR_PASS(progress, shader, nir_opt_undef); NIR_PASS(progress, shader, nir_opt_conditional_discard); if (shader->options->max_unroll_iterations) { @@ -180,10 +205,10 @@ radv_shader_compile_to_nir(struct radv_device *device, const char *entrypoint_name, gl_shader_stage stage, const VkSpecializationInfo *spec_info, - const VkPipelineCreateFlags flags) + const VkPipelineCreateFlags flags, + const struct radv_pipeline_layout *layout) { nir_shader *nir; - nir_function *entry_point; if (module->nir) { /* Some things such as our meta clear/blit code will give us a NIR * shader directly. In that case, we just ignore the SPIR-V entirely @@ -193,8 +218,6 @@ radv_shader_compile_to_nir(struct radv_device *device, nir_validate_shader(nir, "in internal shader"); assert(exec_list_length(&nir->functions) == 1); - struct exec_node *node = exec_list_get_head(&nir->functions); - entry_point = exec_node_data(nir_function, node, node); } else { uint32_t *spirv = (uint32_t *) module->data; assert(module->size % 4 == 0); @@ -222,21 +245,28 @@ radv_shader_compile_to_nir(struct radv_device *device, const struct spirv_to_nir_options spirv_options = { .lower_ubo_ssbo_access_to_offsets = true, .caps = { + .derivative_group = true, .descriptor_array_dynamic_indexing = true, + .descriptor_array_non_uniform_indexing = true, + .descriptor_indexing = true, .device_group = true, .draw_parameters = true, + .float16 = true, .float64 = true, .gcn_shader = true, .geometry_streams = true, .image_read_without_format = true, .image_write_without_format = true, + .int8 = true, .int16 = true, .int64 = true, + .int64_atomics = true, .multiview = true, .physical_storage_buffer_address = true, .runtime_descriptor_array = true, .shader_viewport_index_layer = true, .stencil_export = true, + .storage_8bit = true, .storage_16bit = true, .storage_image_ms = true, .subgroup_arithmetic = true, @@ -250,17 +280,16 @@ radv_shader_compile_to_nir(struct radv_device *device, .trinary_minmax = true, .variable_pointers = true, }, - .ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2), - .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2), - .phys_ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT64, 1), - .push_const_ptr_type = glsl_uint_type(), - .shared_ptr_type = glsl_uint_type(), + .ubo_addr_format = nir_address_format_32bit_index_offset, + .ssbo_addr_format = nir_address_format_32bit_index_offset, + .phys_ssbo_addr_format = nir_address_format_64bit_global, + .push_const_addr_format = nir_address_format_logical, + .shared_addr_format = nir_address_format_32bit_offset, }; - entry_point = spirv_to_nir(spirv, module->size / 4, - spec_entries, num_spec_entries, - stage, entrypoint_name, - &spirv_options, &nir_options); - nir = entry_point->shader; + nir = spirv_to_nir(spirv, module->size / 4, + spec_entries, num_spec_entries, + stage, entrypoint_name, + &spirv_options, &nir_options); assert(nir->info.stage == stage); nir_validate_shader(nir, "after spirv_to_nir"); @@ -277,11 +306,12 @@ radv_shader_compile_to_nir(struct radv_device *device, /* Pick off the single entrypoint that we want */ foreach_list_typed_safe(nir_function, func, node, &nir->functions) { - if (func != entry_point) + if (func->is_entrypoint) + func->name = ralloc_strdup(func, "main"); + else exec_node_remove(&func->node); } assert(exec_list_length(&nir->functions) == 1); - entry_point->name = ralloc_strdup(entry_point, "main"); /* Make sure we lower constant initializers on output variables so that * nir_remove_dead_variables below sees the corresponding stores @@ -304,15 +334,17 @@ radv_shader_compile_to_nir(struct radv_device *device, NIR_PASS_V(nir, nir_lower_system_values); NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); + NIR_PASS_V(nir, radv_nir_lower_ycbcr_textures, layout); } /* Vulkan uses the separate-shader linking model */ nir->info.separate_shader = true; - nir_shader_gather_info(nir, entry_point->impl); + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); static const nir_lower_tex_options tex_options = { .lower_txp = ~0, + .lower_tg4_offsets = true, }; nir_lower_tex(nir, &tex_options); @@ -613,6 +645,8 @@ shader_variant_create(struct radv_device *device, tm_options |= AC_TM_SISCHED; if (options->check_ir) tm_options |= AC_TM_CHECK_IR; + if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT) + tm_options |= AC_TM_NO_LOAD_STORE_OPT; thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM); radv_init_llvm_once(); @@ -737,7 +771,7 @@ generate_shader_stats(struct radv_device *device, struct _mesa_string_buffer *buf) { enum chip_class chip_class = device->physical_device->rad_info.chip_class; - unsigned lds_increment = chip_class >= CIK ? 512 : 256; + unsigned lds_increment = chip_class >= GFX7 ? 512 : 256; struct ac_shader_config *conf; unsigned max_simd_waves; unsigned lds_per_wave = 0; @@ -839,7 +873,7 @@ radv_GetShaderInfoAMD(VkDevice _device, if (!pInfo) { *pInfoSize = sizeof(VkShaderStatisticsInfoAMD); } else { - unsigned lds_multiplier = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256; + unsigned lds_multiplier = device->physical_device->rad_info.chip_class >= GFX7 ? 512 : 256; struct ac_shader_config *conf = &variant->config; VkShaderStatisticsInfoAMD statistics = {};