X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_shader.c;h=3c3f761ca8966775814b4263c663248de1e7facf;hp=c802abb0e081837e4048b22c9e5bd214078a60c3;hb=456557a837ea005763283b6cbd172fe3b9c7e8f4;hpb=9cf55b022dfa43f8fe3163edeb87a1c25ebf5a16 diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index c802abb0e08..3c3f761ca89 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -40,7 +40,6 @@ #include #include "sid.h" -#include "gfx9d.h" #include "ac_binary.h" #include "ac_llvm_util.h" #include "ac_nir_to_llvm.h" @@ -59,6 +58,8 @@ static const struct nir_shader_compiler_options nir_options = { .lower_device_index_to_zero = true, .lower_fsat = true, .lower_fdiv = true, + .lower_bitfield_insert_to_bitfield_select = true, + .lower_bitfield_extract = true, .lower_sub = true, .lower_pack_snorm_2x16 = true, .lower_pack_snorm_4x8 = true, @@ -73,6 +74,7 @@ static const struct nir_shader_compiler_options nir_options = { .lower_ffma = true, .lower_fpow = true, .lower_mul_2x32_64 = true, + .lower_rotate = true, .max_unroll_iterations = 32 }; @@ -124,6 +126,10 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, bool allow_copies) { bool progress; + unsigned lower_flrp = + (shader->options->lower_flrp16 ? 16 : 0) | + (shader->options->lower_flrp32 ? 32 : 0) | + (shader->options->lower_flrp64 ? 64 : 0); do { progress = false; @@ -146,7 +152,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, NIR_PASS(progress, shader, nir_opt_copy_prop_vars); NIR_PASS(progress, shader, nir_opt_dead_write_vars); - NIR_PASS_V(shader, nir_lower_alu_to_scalar); + NIR_PASS_V(shader, nir_lower_alu_to_scalar, NULL); NIR_PASS_V(shader, nir_lower_phis_to_scalar); NIR_PASS(progress, shader, nir_copy_prop); @@ -162,8 +168,29 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, NIR_PASS(progress, shader, nir_opt_dead_cf); NIR_PASS(progress, shader, nir_opt_cse); NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true); - NIR_PASS(progress, shader, nir_opt_algebraic); NIR_PASS(progress, shader, nir_opt_constant_folding); + NIR_PASS(progress, shader, nir_opt_algebraic); + + if (lower_flrp != 0) { + bool lower_flrp_progress = false; + NIR_PASS(lower_flrp_progress, + shader, + nir_lower_flrp, + lower_flrp, + false /* always_precise */, + shader->options->lower_ffma); + if (lower_flrp_progress) { + NIR_PASS(progress, shader, + nir_opt_constant_folding); + progress = true; + } + + /* Nothing should rematerialize any flrps, so we only + * need to do this lowering once. + */ + lower_flrp = 0; + } + NIR_PASS(progress, shader, nir_opt_undef); NIR_PASS(progress, shader, nir_opt_conditional_discard); if (shader->options->max_unroll_iterations) { @@ -181,10 +208,10 @@ radv_shader_compile_to_nir(struct radv_device *device, const char *entrypoint_name, gl_shader_stage stage, const VkSpecializationInfo *spec_info, - const VkPipelineCreateFlags flags) + const VkPipelineCreateFlags flags, + const struct radv_pipeline_layout *layout) { nir_shader *nir; - nir_function *entry_point; if (module->nir) { /* Some things such as our meta clear/blit code will give us a NIR * shader directly. In that case, we just ignore the SPIR-V entirely @@ -194,8 +221,6 @@ radv_shader_compile_to_nir(struct radv_device *device, nir_validate_shader(nir, "in internal shader"); assert(exec_list_length(&nir->functions) == 1); - struct exec_node *node = exec_list_get_head(&nir->functions); - entry_point = exec_node_data(nir_function, node, node); } else { uint32_t *spirv = (uint32_t *) module->data; assert(module->size % 4 == 0); @@ -223,12 +248,17 @@ radv_shader_compile_to_nir(struct radv_device *device, const struct spirv_to_nir_options spirv_options = { .lower_ubo_ssbo_access_to_offsets = true, .caps = { + .amd_gcn_shader = true, + .amd_shader_ballot = device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT, + .amd_trinary_minmax = true, + .derivative_group = true, .descriptor_array_dynamic_indexing = true, + .descriptor_array_non_uniform_indexing = true, + .descriptor_indexing = true, .device_group = true, .draw_parameters = true, .float16 = true, .float64 = true, - .gcn_shader = true, .geometry_streams = true, .image_read_without_format = true, .image_write_without_format = true, @@ -252,20 +282,18 @@ radv_shader_compile_to_nir(struct radv_device *device, .subgroup_vote = true, .tessellation = true, .transform_feedback = true, - .trinary_minmax = true, .variable_pointers = true, }, - .ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2), - .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2), - .phys_ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT64, 1), - .push_const_ptr_type = glsl_uint_type(), - .shared_ptr_type = glsl_uint_type(), + .ubo_addr_format = nir_address_format_32bit_index_offset, + .ssbo_addr_format = nir_address_format_32bit_index_offset, + .phys_ssbo_addr_format = nir_address_format_64bit_global, + .push_const_addr_format = nir_address_format_logical, + .shared_addr_format = nir_address_format_32bit_offset, }; - entry_point = spirv_to_nir(spirv, module->size / 4, - spec_entries, num_spec_entries, - stage, entrypoint_name, - &spirv_options, &nir_options); - nir = entry_point->shader; + nir = spirv_to_nir(spirv, module->size / 4, + spec_entries, num_spec_entries, + stage, entrypoint_name, + &spirv_options, &nir_options); assert(nir->info.stage == stage); nir_validate_shader(nir, "after spirv_to_nir"); @@ -282,11 +310,12 @@ radv_shader_compile_to_nir(struct radv_device *device, /* Pick off the single entrypoint that we want */ foreach_list_typed_safe(nir_function, func, node, &nir->functions) { - if (func != entry_point) + if (func->is_entrypoint) + func->name = ralloc_strdup(func, "main"); + else exec_node_remove(&func->node); } assert(exec_list_length(&nir->functions) == 1); - entry_point->name = ralloc_strdup(entry_point, "main"); /* Make sure we lower constant initializers on output variables so that * nir_remove_dead_variables below sees the corresponding stores @@ -309,12 +338,13 @@ radv_shader_compile_to_nir(struct radv_device *device, NIR_PASS_V(nir, nir_lower_system_values); NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); + NIR_PASS_V(nir, radv_nir_lower_ycbcr_textures, layout); } /* Vulkan uses the separate-shader linking model */ nir->info.separate_shader = true; - nir_shader_gather_info(nir, entry_point->impl); + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); static const nir_lower_tex_options tex_options = { .lower_txp = ~0, @@ -442,6 +472,7 @@ radv_get_shader_binary_size(struct ac_shader_binary *binary) static void radv_fill_shader_variant(struct radv_device *device, struct radv_shader_variant *variant, + struct radv_nir_compiler_options *options, struct ac_shader_binary *binary, gl_shader_stage stage) { @@ -466,21 +497,54 @@ radv_fill_shader_variant(struct radv_device *device, switch (stage) { case MESA_SHADER_TESS_EVAL: - vgpr_comp_cnt = 3; + if (options->key.tes.as_es) { + assert(device->physical_device->rad_info.chip_class <= GFX8); + vgpr_comp_cnt = info->uses_prim_id ? 3 : 2; + } else { + bool enable_prim_id = options->key.tes.export_prim_id || info->uses_prim_id; + vgpr_comp_cnt = enable_prim_id ? 3 : 2; + } variant->rsrc2 |= S_00B12C_OC_LDS_EN(1); break; case MESA_SHADER_TESS_CTRL: if (device->physical_device->rad_info.chip_class >= GFX9) { - vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt; + /* We need at least 2 components for LS. + * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID). + * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. + */ + vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1; } else { variant->rsrc2 |= S_00B12C_OC_LDS_EN(1); } break; case MESA_SHADER_VERTEX: - case MESA_SHADER_GEOMETRY: - vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt; + if (variant->info.vs.as_ls) { + assert(device->physical_device->rad_info.chip_class <= GFX8); + /* We need at least 2 components for LS. + * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID). + * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. + */ + vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1; + } else if (variant->info.vs.as_es) { + assert(device->physical_device->rad_info.chip_class <= GFX8); + /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */ + vgpr_comp_cnt = info->vs.needs_instance_id ? 1 : 0; + } else { + /* VGPR0-3: (VertexID, InstanceID / StepRate0, PrimID, InstanceID) + * If PrimID is disabled. InstanceID / StepRate1 is loaded instead. + * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. + */ + if (options->key.vs.export_prim_id) { + vgpr_comp_cnt = 2; + } else if (info->vs.needs_instance_id) { + vgpr_comp_cnt = 1; + } else { + vgpr_comp_cnt = 0; + } + } break; case MESA_SHADER_FRAGMENT: + case MESA_SHADER_GEOMETRY: break; case MESA_SHADER_COMPUTE: variant->rsrc2 |= @@ -503,9 +567,10 @@ radv_fill_shader_variant(struct radv_device *device, unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt; if (es_type == MESA_SHADER_VERTEX) { - es_vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt; + /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */ + es_vgpr_comp_cnt = info->vs.needs_instance_id ? 1 : 0; } else if (es_type == MESA_SHADER_TESS_EVAL) { - es_vgpr_comp_cnt = 3; + es_vgpr_comp_cnt = info->uses_prim_id ? 3 : 2; } else { unreachable("invalid shader ES type"); } @@ -619,6 +684,8 @@ shader_variant_create(struct radv_device *device, tm_options |= AC_TM_SISCHED; if (options->check_ir) tm_options |= AC_TM_CHECK_IR; + if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT) + tm_options |= AC_TM_NO_LOAD_STORE_OPT; thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM); radv_init_llvm_once(); @@ -638,7 +705,7 @@ shader_variant_create(struct radv_device *device, radv_destroy_llvm_compiler(&ac_llvm, thread_compiler); - radv_fill_shader_variant(device, variant, &binary, stage); + radv_fill_shader_variant(device, variant, options, &binary, stage); if (code_out) { *code_out = binary.code; @@ -743,7 +810,7 @@ generate_shader_stats(struct radv_device *device, struct _mesa_string_buffer *buf) { enum chip_class chip_class = device->physical_device->rad_info.chip_class; - unsigned lds_increment = chip_class >= CIK ? 512 : 256; + unsigned lds_increment = chip_class >= GFX7 ? 512 : 256; struct ac_shader_config *conf; unsigned max_simd_waves; unsigned lds_per_wave = 0; @@ -845,7 +912,7 @@ radv_GetShaderInfoAMD(VkDevice _device, if (!pInfo) { *pInfoSize = sizeof(VkShaderStatisticsInfoAMD); } else { - unsigned lds_multiplier = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256; + unsigned lds_multiplier = device->physical_device->rad_info.chip_class >= GFX7 ? 512 : 256; struct ac_shader_config *conf = &variant->config; VkShaderStatisticsInfoAMD statistics = {};