X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_shader.c;h=b462393b11803d215bf9ffd94aae7e9ae178f806;hp=16253979b0862ec1bd28dd84af2c0354a9a362aa;hb=8301a43f272df7aa8c28e4143be1549bbef74e42;hpb=e901b901cb61519514271ffc76a8c784c5e37d2a diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 16253979b08..b462393b118 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -47,8 +47,6 @@ #include "aco_interface.h" -#include "util/string_buffer.h" - static const struct nir_shader_compiler_options nir_options_llvm = { .vertex_id_zero_based = true, .lower_scmp = true, @@ -278,13 +276,13 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, } NIR_PASS(progress, shader, nir_opt_undef); + NIR_PASS(progress, shader, nir_opt_shrink_vectors); if (shader->options->max_unroll_iterations) { NIR_PASS(progress, shader, nir_opt_loop_unroll, 0); } } while (progress && !optimize_conservatively); NIR_PASS(progress, shader, nir_opt_conditional_discard); - NIR_PASS(progress, shader, nir_opt_shrink_vectors); NIR_PASS(progress, shader, nir_opt_move, nir_move_load_ubo); } @@ -540,6 +538,8 @@ radv_shader_compile_to_nir(struct radv_device *device, NIR_PASS_V(nir, nir_propagate_invariant); NIR_PASS_V(nir, nir_lower_system_values); + NIR_PASS_V(nir, nir_lower_compute_system_values, NULL); + NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE) @@ -803,13 +803,15 @@ radv_get_shader_binary_size(size_t code_size) return code_size + DEBUGGER_NUM_MARKERS * 4; } -static void radv_postprocess_config(const struct radv_physical_device *pdevice, +static void radv_postprocess_config(const struct radv_device *device, const struct ac_shader_config *config_in, const struct radv_shader_info *info, gl_shader_stage stage, struct ac_shader_config *config_out) { + const struct radv_physical_device *pdevice = device->physical_device; bool scratch_enabled = config_in->scratch_bytes_per_wave > 0; + bool trap_enabled = !!device->trap_handler_shader; unsigned vgpr_comp_cnt = 0; unsigned num_input_vgprs = info->num_input_vgprs; @@ -825,6 +827,7 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, assert((pdevice->rad_info.chip_class >= GFX10 && num_shared_vgprs % 8 == 0) || (pdevice->rad_info.chip_class < GFX10 && num_shared_vgprs == 0)); unsigned num_shared_vgpr_blocks = num_shared_vgprs / 8; + unsigned excp_en = 0; *config_out = *config_in; config_out->num_vgprs = num_vgprs; @@ -832,7 +835,15 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, config_out->num_shared_vgprs = num_shared_vgprs; config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) | - S_00B12C_SCRATCH_EN(scratch_enabled); + S_00B12C_SCRATCH_EN(scratch_enabled) | + S_00B12C_TRAP_PRESENT(trap_enabled); + + if (trap_enabled) { + /* Configure the shader exceptions like memory violation, etc. + * TODO: Enable (and validate) more exceptions. + */ + excp_en = 1 << 8; /* mem_viol */ + } if (!pdevice->use_ngg_streamout) { config_out->rsrc2 |= S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) | @@ -858,18 +869,21 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, case MESA_SHADER_TESS_EVAL: if (info->is_ngg) { config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10); - config_out->rsrc2 |= S_00B22C_OC_LDS_EN(1); + config_out->rsrc2 |= S_00B22C_OC_LDS_EN(1) | + S_00B22C_EXCP_EN(excp_en); } else if (info->tes.as_es) { assert(pdevice->rad_info.chip_class <= GFX8); vgpr_comp_cnt = info->uses_prim_id ? 3 : 2; - config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1); + config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | + S_00B12C_EXCP_EN(excp_en); } else { bool enable_prim_id = info->tes.export_prim_id || info->uses_prim_id; vgpr_comp_cnt = enable_prim_id ? 3 : 2; config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10); - config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1); + config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | + S_00B12C_EXCP_EN(excp_en); } config_out->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks); break; @@ -881,13 +895,16 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, */ if (pdevice->rad_info.chip_class >= GFX10) { vgpr_comp_cnt = info->vs.needs_instance_id ? 3 : 1; - config_out->rsrc2 |= S_00B42C_LDS_SIZE_GFX10(info->tcs.num_lds_blocks); + config_out->rsrc2 |= S_00B42C_LDS_SIZE_GFX10(info->tcs.num_lds_blocks) | + S_00B42C_EXCP_EN_GFX10(excp_en); } else { vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1; - config_out->rsrc2 |= S_00B42C_LDS_SIZE_GFX9(info->tcs.num_lds_blocks); + config_out->rsrc2 |= S_00B42C_LDS_SIZE_GFX9(info->tcs.num_lds_blocks) | + S_00B42C_EXCP_EN_GFX9(excp_en); } } else { - config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1); + config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | + S_00B12C_EXCP_EN(excp_en); } config_out->rsrc1 |= S_00B428_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) | S_00B848_WGP_MODE(pdevice->rad_info.chip_class >= GFX10); @@ -924,16 +941,20 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10); } - config_out->rsrc2 |= S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks); + config_out->rsrc2 |= S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | + S_00B12C_EXCP_EN(excp_en); break; case MESA_SHADER_FRAGMENT: config_out->rsrc1 |= S_00B028_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10); - config_out->rsrc2 |= S_00B02C_SHARED_VGPR_CNT(num_shared_vgpr_blocks); + config_out->rsrc2 |= S_00B02C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | + S_00B02C_TRAP_PRESENT(1) | + S_00B02C_EXCP_EN(excp_en); break; case MESA_SHADER_GEOMETRY: config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) | S_00B848_WGP_MODE(pdevice->rad_info.chip_class >= GFX10); - config_out->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks); + config_out->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | + S_00B22C_EXCP_EN(excp_en); break; case MESA_SHADER_COMPUTE: config_out->rsrc1 |= S_00B848_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) | @@ -945,7 +966,8 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2] ? 2 : info->cs.uses_thread_id[1] ? 1 : 0) | S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) | - S_00B84C_LDS_SIZE(config_in->lds_size); + S_00B84C_LDS_SIZE(config_in->lds_size) | + S_00B84C_EXCP_EN(excp_en); config_out->rsrc3 |= S_00B8A0_SHARED_VGPR_CNT(num_shared_vgpr_blocks); break; @@ -1106,7 +1128,7 @@ radv_shader_variant_create(struct radv_device *device, } variant->info = binary->info; - radv_postprocess_config(device->physical_device, &config, &binary->info, + radv_postprocess_config(device, &config, &binary->info, binary->stage, &variant->config); void *dest_ptr = radv_alloc_shader_memory(device, variant); @@ -1200,6 +1222,7 @@ shader_variant_compile(struct radv_device *device, struct radv_shader_info *info, struct radv_nir_compiler_options *options, bool gs_copy_shader, + bool trap_handler_shader, bool keep_shader_info, bool keep_statistic_info, struct radv_shader_binary **binary_out) @@ -1232,6 +1255,8 @@ shader_variant_compile(struct radv_device *device, args.options = options; args.shader_info = info; args.is_gs_copy_shader = gs_copy_shader; + args.is_trap_handler_shader = trap_handler_shader; + radv_declare_shader_args(&args, gs_copy_shader ? MESA_SHADER_VERTEX : shaders[shader_count - 1]->info.stage, @@ -1269,7 +1294,7 @@ shader_variant_compile(struct radv_device *device, if (keep_shader_info) { variant->nir_string = radv_dump_nir_shaders(shaders, shader_count); - if (!gs_copy_shader && !module->nir) { + if (!gs_copy_shader && !trap_handler_shader && !module->nir) { variant->spirv = malloc(module->size); if (!variant->spirv) { free(variant); @@ -1312,7 +1337,8 @@ radv_shader_variant_compile(struct radv_device *device, options.robust_buffer_access = device->robust_buffer_access; return shader_variant_compile(device, module, shaders, shader_count, stage, info, - &options, false, keep_shader_info, keep_statistic_info, binary_out); + &options, false, false, + keep_shader_info, keep_statistic_info, binary_out); } struct radv_shader_variant * @@ -1330,7 +1356,33 @@ radv_create_gs_copy_shader(struct radv_device *device, options.key.has_multiview_view_index = multiview; return shader_variant_compile(device, NULL, &shader, 1, stage, - info, &options, true, keep_shader_info, keep_statistic_info, binary_out); + info, &options, true, false, + keep_shader_info, keep_statistic_info, binary_out); +} + +struct radv_shader_variant * +radv_create_trap_handler_shader(struct radv_device *device) +{ + struct radv_nir_compiler_options options = {0}; + struct radv_shader_variant *shader = NULL; + struct radv_shader_binary *binary = NULL; + struct radv_shader_info info = {0}; + + nir_builder b; + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_trap_handler"); + + options.explicit_scratch_args = true; + info.wave_size = 64; + + shader = shader_variant_compile(device, NULL, &b.shader, 1, + MESA_SHADER_COMPUTE, &info, &options, + false, true, true, false, &binary); + + ralloc_free(b.shader); + free(binary); + + return shader; } void @@ -1453,66 +1505,6 @@ radv_get_max_waves(struct radv_device *device, return max_simd_waves; } -static void -generate_shader_stats(struct radv_device *device, - struct radv_shader_variant *variant, - gl_shader_stage stage, - struct _mesa_string_buffer *buf) -{ - struct ac_shader_config *conf = &variant->config; - unsigned max_simd_waves = radv_get_max_waves(device, variant, stage); - - if (stage == MESA_SHADER_FRAGMENT) { - _mesa_string_buffer_printf(buf, "*** SHADER CONFIG ***\n" - "SPI_PS_INPUT_ADDR = 0x%04x\n" - "SPI_PS_INPUT_ENA = 0x%04x\n", - conf->spi_ps_input_addr, conf->spi_ps_input_ena); - } - - _mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n" - "SGPRS: %d\n" - "VGPRS: %d\n" - "Spilled SGPRs: %d\n" - "Spilled VGPRs: %d\n" - "PrivMem VGPRS: %d\n" - "Code Size: %d bytes\n" - "LDS: %d blocks\n" - "Scratch: %d bytes per wave\n" - "Max Waves: %d\n", - conf->num_sgprs, conf->num_vgprs, - conf->spilled_sgprs, conf->spilled_vgprs, - variant->info.private_mem_vgprs, variant->exec_size, - conf->lds_size, conf->scratch_bytes_per_wave, - max_simd_waves); - - if (variant->statistics) { - _mesa_string_buffer_printf(buf, "*** COMPILER STATS ***\n"); - for (unsigned i = 0; i < variant->statistics->count; i++) { - struct radv_compiler_statistic_info *info = &variant->statistics->infos[i]; - uint32_t value = variant->statistics->values[i]; - _mesa_string_buffer_printf(buf, "%s: %lu\n", info->name, value); - } - } - - _mesa_string_buffer_printf(buf, "********************\n\n\n"); -} - -void -radv_shader_dump_stats(struct radv_device *device, - struct radv_shader_variant *variant, - gl_shader_stage stage, - FILE *file) -{ - struct _mesa_string_buffer *buf = _mesa_string_buffer_create(NULL, 256); - - generate_shader_stats(device, variant, stage, buf); - - fprintf(file, "\n%s:\n", radv_get_shader_name(&variant->info, stage)); - fprintf(file, "%s", buf->buf); - - _mesa_string_buffer_destroy(buf); -} - VkResult radv_GetShaderInfoAMD(VkDevice _device, VkPipeline _pipeline, @@ -1525,7 +1517,6 @@ radv_GetShaderInfoAMD(VkDevice _device, RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); gl_shader_stage stage = vk_to_mesa_shader_stage(shaderStage); struct radv_shader_variant *variant = pipeline->shaders[stage]; - struct _mesa_string_buffer *buf; VkResult result = VK_SUCCESS; /* Spec doesn't indicate what to do if the stage is invalid, so just @@ -1577,16 +1568,19 @@ radv_GetShaderInfoAMD(VkDevice _device, } break; - case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD: - buf = _mesa_string_buffer_create(NULL, 1024); + case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD: { + char *out; + size_t outsize; + FILE *memf = open_memstream(&out, &outsize); - _mesa_string_buffer_printf(buf, "%s:\n", radv_get_shader_name(&variant->info, stage)); - _mesa_string_buffer_printf(buf, "%s\n\n", variant->ir_string); - _mesa_string_buffer_printf(buf, "%s\n\n", variant->disasm_string); - generate_shader_stats(device, variant, stage, buf); + fprintf(memf, "%s:\n", radv_get_shader_name(&variant->info, stage)); + fprintf(memf, "%s\n\n", variant->ir_string); + fprintf(memf, "%s\n\n", variant->disasm_string); + radv_dump_shader_stats(device, pipeline, stage, memf); + fclose(memf); /* Need to include the null terminator. */ - size_t length = buf->length + 1; + size_t length = outsize + 1; if (!pInfo) { *pInfoSize = length; @@ -1594,14 +1588,15 @@ radv_GetShaderInfoAMD(VkDevice _device, size_t size = *pInfoSize; *pInfoSize = length; - memcpy(pInfo, buf->buf, MIN2(size, length)); + memcpy(pInfo, out, MIN2(size, length)); if (size < length) result = VK_INCOMPLETE; } - _mesa_string_buffer_destroy(buf); + free(out); break; + } default: /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now. */ result = VK_ERROR_FEATURE_NOT_PRESENT; @@ -1610,3 +1605,100 @@ radv_GetShaderInfoAMD(VkDevice _device, return result; } + +VkResult +radv_dump_shader_stats(struct radv_device *device, + struct radv_pipeline *pipeline, + gl_shader_stage stage, FILE *output) +{ + struct radv_shader_variant *shader = pipeline->shaders[stage]; + VkPipelineExecutablePropertiesKHR *props = NULL; + uint32_t prop_count = 0; + VkResult result; + + VkPipelineInfoKHR pipeline_info = {}; + pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR; + pipeline_info.pipeline = radv_pipeline_to_handle(pipeline); + + result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device), + &pipeline_info, + &prop_count, NULL); + if (result != VK_SUCCESS) + return result; + + props = calloc(prop_count, sizeof(*props)); + if (!props) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device), + &pipeline_info, + &prop_count, props); + if (result != VK_SUCCESS) + goto fail; + + for (unsigned i = 0; i < prop_count; i++) { + if (!(props[i].stages & mesa_to_vk_shader_stage(stage))) + continue; + + VkPipelineExecutableStatisticKHR *stats = NULL; + uint32_t stat_count = 0; + VkResult result; + + VkPipelineExecutableInfoKHR exec_info = {}; + exec_info.pipeline = radv_pipeline_to_handle(pipeline); + exec_info.executableIndex = i; + + result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device), + &exec_info, + &stat_count, NULL); + if (result != VK_SUCCESS) + goto fail; + + stats = calloc(stat_count, sizeof(*stats)); + if (!stats) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device), + &exec_info, + &stat_count, stats); + if (result != VK_SUCCESS) { + free(stats); + goto fail; + } + + fprintf(output, "\n%s:\n", + radv_get_shader_name(&shader->info, stage)); + fprintf(output, "*** SHADER STATS ***\n"); + + for (unsigned i = 0; i < stat_count; i++) { + fprintf(output, "%s: ", stats[i].name); + switch (stats[i].format) { + case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR: + fprintf(output, "%s", stats[i].value.b32 == VK_TRUE ? "true" : "false"); + break; + case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR: + fprintf(output, "%"PRIi64, stats[i].value.i64); + break; + case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR: + fprintf(output, "%"PRIu64, stats[i].value.u64); + break; + case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR: + fprintf(output, "%f", stats[i].value.f64); + break; + default: + unreachable("Invalid pipeline statistic format"); + } + fprintf(output, "\n"); + } + + fprintf(output, "********************\n\n\n"); + + free(stats); + } + +fail: + free(props); + return result; +}