From 29c1f67e9f166da4393493d213ee06498aecac51 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sat, 28 Jan 2017 23:51:19 +0100 Subject: [PATCH] radv/ac: Add compiler support for spilling. Based on code written by Dave Airlie. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Dave Airlie --- src/amd/common/ac_binary.c | 30 +++++++++++++++++++----------- src/amd/common/ac_binary.h | 4 +++- src/amd/common/ac_llvm_util.c | 4 ++-- src/amd/common/ac_llvm_util.h | 2 +- src/amd/common/ac_nir_to_llvm.c | 16 +++++++++++----- src/amd/common/ac_nir_to_llvm.h | 6 ++++-- src/amd/vulkan/radv_pipeline.c | 3 ++- 7 files changed, 42 insertions(+), 23 deletions(-) diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c index 01cf000d9be..9c66a821c21 100644 --- a/src/amd/common/ac_binary.c +++ b/src/amd/common/ac_binary.c @@ -212,23 +212,28 @@ static const char *scratch_rsrc_dword1_symbol = void ac_shader_binary_read_config(struct ac_shader_binary *binary, struct ac_shader_config *conf, - unsigned symbol_offset) + unsigned symbol_offset, + bool supports_spill) { unsigned i; const unsigned char *config = ac_shader_binary_config_start(binary, symbol_offset); bool really_needs_scratch = false; - + uint32_t wavesize = 0; /* LLVM adds SGPR spills to the scratch size. * Find out if we really need the scratch buffer. */ - for (i = 0; i < binary->reloc_count; i++) { - const struct ac_shader_reloc *reloc = &binary->relocs[i]; + if (supports_spill) { + really_needs_scratch = true; + } else { + for (i = 0; i < binary->reloc_count; i++) { + const struct ac_shader_reloc *reloc = &binary->relocs[i]; - if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) || - !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { - really_needs_scratch = true; - break; + if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) || + !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { + really_needs_scratch = true; + break; + } } } @@ -259,9 +264,7 @@ void ac_shader_binary_read_config(struct ac_shader_binary *binary, case R_0286E8_SPI_TMPRING_SIZE: case R_00B860_COMPUTE_TMPRING_SIZE: /* WAVESIZE is in units of 256 dwords. */ - if (really_needs_scratch) - conf->scratch_bytes_per_wave = - G_00B860_WAVESIZE(value) * 256 * 4; + wavesize = value; break; case SPILLED_SGPRS: conf->spilled_sgprs = value; @@ -285,4 +288,9 @@ void ac_shader_binary_read_config(struct ac_shader_binary *binary, if (!conf->spi_ps_input_addr) conf->spi_ps_input_addr = conf->spi_ps_input_ena; } + + if (really_needs_scratch) { + /* sgprs spills aren't spilling */ + conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(wavesize) * 256 * 4; + } } diff --git a/src/amd/common/ac_binary.h b/src/amd/common/ac_binary.h index 282f33d22b9..06fd855f948 100644 --- a/src/amd/common/ac_binary.h +++ b/src/amd/common/ac_binary.h @@ -27,6 +27,7 @@ #pragma once #include +#include struct ac_shader_reloc { char name[32]; @@ -85,4 +86,5 @@ void ac_elf_read(const char *elf_data, unsigned elf_size, void ac_shader_binary_read_config(struct ac_shader_binary *binary, struct ac_shader_config *conf, - unsigned symbol_offset); + unsigned symbol_offset, + bool supports_spill); diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index 7317db76baa..f3cab921ba1 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -126,11 +126,11 @@ static const char *ac_get_llvm_processor_name(enum radeon_family family) } } -LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family) +LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill) { assert(family >= CHIP_TAHITI); - const char *triple = "amdgcn--"; + const char *triple = supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--"; LLVMTargetRef target = ac_get_llvm_target(triple); LLVMTargetMachineRef tm = LLVMCreateTargetMachine( target, diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index 2d301c93575..c07f67ab8b1 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -56,7 +56,7 @@ struct ac_llvm_context { LLVMValueRef fpmath_md_2p5_ulp; }; -LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family); +LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill); void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes); bool ac_is_sgpr_param(LLVMValueRef param); diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index e83c7a2e488..dedea656892 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -458,10 +458,10 @@ static void create_function(struct nir_to_llvm_context *ctx) arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math); set_llvm_calling_convention(ctx->main_function, ctx->stage); - ctx->shader_info->num_input_sgprs = 0; ctx->shader_info->num_input_vgprs = 0; + ctx->shader_info->num_user_sgprs = ctx->options->supports_spill ? 2 : 0; for (i = 0; i < user_sgpr_count; i++) ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4; @@ -475,6 +475,12 @@ static void create_function(struct nir_to_llvm_context *ctx) arg_idx = 0; user_sgpr_idx = 0; + + if (ctx->options->supports_spill) { + set_userdata_location_shader(ctx, AC_UD_SCRATCH, user_sgpr_idx, 2); + user_sgpr_idx += 2; + } + for (unsigned i = 0; i < num_sets; ++i) { if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) { set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2); @@ -4432,7 +4438,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, memset(shader_info, 0, sizeof(*shader_info)); - LLVMSetTarget(ctx.module, "amdgcn--"); + LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--"); setup_types(&ctx); ctx.builder = LLVMCreateBuilderInContext(ctx.context); @@ -4566,7 +4572,7 @@ static void ac_compile_llvm_module(LLVMTargetMachineRef tm, struct ac_shader_config *config, struct ac_shader_variant_info *shader_info, gl_shader_stage stage, - bool dump_shader) + bool dump_shader, bool supports_spill) { if (dump_shader) ac_dump_module(llvm_module); @@ -4580,7 +4586,7 @@ static void ac_compile_llvm_module(LLVMTargetMachineRef tm, if (dump_shader) fprintf(stderr, "disasm:\n%s\n", binary->disasm_string); - ac_shader_binary_read_config(binary, config, 0); + ac_shader_binary_read_config(binary, config, 0, supports_spill); LLVMContextRef ctx = LLVMGetModuleContext(llvm_module); LLVMDisposeModule(llvm_module); @@ -4640,7 +4646,7 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm, LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info, options); - ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader); + ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader, options->supports_spill); switch (nir->stage) { case MESA_SHADER_COMPUTE: for (int i = 0; i < 3; ++i) diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h index a57558e38ff..9d66f940b52 100644 --- a/src/amd/common/ac_nir_to_llvm.h +++ b/src/amd/common/ac_nir_to_llvm.h @@ -52,6 +52,7 @@ struct ac_nir_compiler_options { struct radv_pipeline_layout *layout; union ac_shader_variant_key key; bool unsafe_math; + bool supports_spill; enum radeon_family family; enum chip_class chip_class; }; @@ -64,8 +65,9 @@ struct ac_userdata_info { }; enum ac_ud_index { - AC_UD_PUSH_CONSTANTS = 0, - AC_UD_SHADER_START = 1, + AC_UD_SCRATCH = 0, + AC_UD_PUSH_CONSTANTS = 1, + AC_UD_SHADER_START = 2, AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START, AC_UD_VS_BASE_VERTEX_START_INSTANCE, AC_UD_VS_MAX_UD, diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 360b5196551..4d88ed77f93 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -424,7 +424,8 @@ static struct radv_shader_variant *radv_shader_variant_create(struct radv_device options.unsafe_math = !!(device->debug_flags & RADV_DEBUG_UNSAFE_MATH); options.family = chip_family; options.chip_class = device->physical_device->rad_info.chip_class; - tm = ac_create_target_machine(chip_family); + options.supports_spill = false; + tm = ac_create_target_machine(chip_family, false); ac_compile_nir_shader(tm, &binary, &variant->config, &variant->info, shader, &options, dump); LLVMDisposeTargetMachine(tm); -- 2.30.2