radv/ac: Add compiler support for spilling.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sat, 28 Jan 2017 22:51:19 +0000 (23:51 +0100)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 30 Jan 2017 01:07:12 +0000 (02:07 +0100)
Based on code written by Dave Airlie.

Signed-off-by: Bas Nieuwenhuizen <basni@google.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_binary.c
src/amd/common/ac_binary.h
src/amd/common/ac_llvm_util.c
src/amd/common/ac_llvm_util.h
src/amd/common/ac_nir_to_llvm.c
src/amd/common/ac_nir_to_llvm.h
src/amd/vulkan/radv_pipeline.c

index 01cf000d9bea64140016940457e771cd0edd106c..9c66a821c216280f0b2259854e6602488e0f9946 100644 (file)
@@ -212,23 +212,28 @@ static const char *scratch_rsrc_dword1_symbol =
 
 void ac_shader_binary_read_config(struct ac_shader_binary *binary,
                                  struct ac_shader_config *conf,
-                                 unsigned symbol_offset)
+                                 unsigned symbol_offset,
+                                 bool supports_spill)
 {
        unsigned i;
        const unsigned char *config =
                ac_shader_binary_config_start(binary, symbol_offset);
        bool really_needs_scratch = false;
-
+       uint32_t wavesize = 0;
        /* LLVM adds SGPR spills to the scratch size.
         * Find out if we really need the scratch buffer.
         */
-       for (i = 0; i < binary->reloc_count; i++) {
-               const struct ac_shader_reloc *reloc = &binary->relocs[i];
+       if (supports_spill) {
+               really_needs_scratch = true;
+       } else {
+               for (i = 0; i < binary->reloc_count; i++) {
+                       const struct ac_shader_reloc *reloc = &binary->relocs[i];
 
-               if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
-                   !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
-                       really_needs_scratch = true;
-                       break;
+                       if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
+                           !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
+                               really_needs_scratch = true;
+                               break;
+                       }
                }
        }
 
@@ -259,9 +264,7 @@ void ac_shader_binary_read_config(struct ac_shader_binary *binary,
                case R_0286E8_SPI_TMPRING_SIZE:
                case R_00B860_COMPUTE_TMPRING_SIZE:
                        /* WAVESIZE is in units of 256 dwords. */
-                       if (really_needs_scratch)
-                               conf->scratch_bytes_per_wave =
-                                       G_00B860_WAVESIZE(value) * 256 * 4;
+                       wavesize = value;
                        break;
                case SPILLED_SGPRS:
                        conf->spilled_sgprs = value;
@@ -285,4 +288,9 @@ void ac_shader_binary_read_config(struct ac_shader_binary *binary,
                if (!conf->spi_ps_input_addr)
                        conf->spi_ps_input_addr = conf->spi_ps_input_ena;
        }
+
+       if (really_needs_scratch) {
+               /* sgprs spills aren't spilling */
+               conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(wavesize) * 256 * 4;
+       }
 }
index 282f33d22b972fb8ee763d25f37edabfceef4083..06fd855f948f7da5ca5067338a111ee42553d4fc 100644 (file)
@@ -27,6 +27,7 @@
 #pragma once
 
 #include <stdint.h>
+#include <stdbool.h>
 
 struct ac_shader_reloc {
        char name[32];
@@ -85,4 +86,5 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
 
 void ac_shader_binary_read_config(struct ac_shader_binary *binary,
                                  struct ac_shader_config *conf,
-                                 unsigned symbol_offset);
+                                 unsigned symbol_offset,
+                                 bool supports_spill);
index 7317db76baaa73bf2d2801d4e41fbe8ccc6bad74..f3cab921ba17621b57eb614e5bf69ad23bb9149f 100644 (file)
@@ -126,11 +126,11 @@ static const char *ac_get_llvm_processor_name(enum radeon_family family)
        }
 }
 
-LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family)
+LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill)
 {
        assert(family >= CHIP_TAHITI);
 
-       const char *triple = "amdgcn--";
+       const char *triple = supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--";
        LLVMTargetRef target = ac_get_llvm_target(triple);
        LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
                                     target,
index 2d301c935754b433885bb429e714bf46683bcf34..c07f67ab8b18b7c1ad252129a76281ed4d866b5b 100644 (file)
@@ -56,7 +56,7 @@ struct ac_llvm_context {
        LLVMValueRef fpmath_md_2p5_ulp;
 };
 
-LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family);
+LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill);
 
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
 bool ac_is_sgpr_param(LLVMValueRef param);
index e83c7a2e4887dc0c0a1ea5b309549e51d842a060..dedea6568921f0f6720933321b531d77581bae5a 100644 (file)
@@ -458,10 +458,10 @@ static void create_function(struct nir_to_llvm_context *ctx)
            arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math);
        set_llvm_calling_convention(ctx->main_function, ctx->stage);
 
-
        ctx->shader_info->num_input_sgprs = 0;
        ctx->shader_info->num_input_vgprs = 0;
 
+       ctx->shader_info->num_user_sgprs = ctx->options->supports_spill ? 2 : 0;
        for (i = 0; i < user_sgpr_count; i++)
                ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4;
 
@@ -475,6 +475,12 @@ static void create_function(struct nir_to_llvm_context *ctx)
 
        arg_idx = 0;
        user_sgpr_idx = 0;
+
+       if (ctx->options->supports_spill) {
+               set_userdata_location_shader(ctx, AC_UD_SCRATCH, user_sgpr_idx, 2);
+               user_sgpr_idx += 2;
+       }
+
        for (unsigned i = 0; i < num_sets; ++i) {
                if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
                        set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
@@ -4432,7 +4438,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 
        memset(shader_info, 0, sizeof(*shader_info));
 
-       LLVMSetTarget(ctx.module, "amdgcn--");
+       LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--");
        setup_types(&ctx);
 
        ctx.builder = LLVMCreateBuilderInContext(ctx.context);
@@ -4566,7 +4572,7 @@ static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
                                   struct ac_shader_config *config,
                                   struct ac_shader_variant_info *shader_info,
                                   gl_shader_stage stage,
-                                  bool dump_shader)
+                                  bool dump_shader, bool supports_spill)
 {
        if (dump_shader)
                ac_dump_module(llvm_module);
@@ -4580,7 +4586,7 @@ static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
        if (dump_shader)
                fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
 
-       ac_shader_binary_read_config(binary, config, 0);
+       ac_shader_binary_read_config(binary, config, 0, supports_spill);
 
        LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
        LLVMDisposeModule(llvm_module);
@@ -4640,7 +4646,7 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm,
        LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
                                                             options);
 
-       ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader);
+       ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader, options->supports_spill);
        switch (nir->stage) {
        case MESA_SHADER_COMPUTE:
                for (int i = 0; i < 3; ++i)
index a57558e38ff90c271064f28cad1496c9876de7c2..9d66f940b52550b42ab51636137ed555d3d7284e 100644 (file)
@@ -52,6 +52,7 @@ struct ac_nir_compiler_options {
        struct radv_pipeline_layout *layout;
        union ac_shader_variant_key key;
        bool unsafe_math;
+       bool supports_spill;
        enum radeon_family family;
        enum chip_class chip_class;
 };
@@ -64,8 +65,9 @@ struct ac_userdata_info {
 };
 
 enum ac_ud_index {
-       AC_UD_PUSH_CONSTANTS = 0,
-       AC_UD_SHADER_START = 1,
+       AC_UD_SCRATCH = 0,
+       AC_UD_PUSH_CONSTANTS = 1,
+       AC_UD_SHADER_START = 2,
        AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
        AC_UD_VS_BASE_VERTEX_START_INSTANCE,
        AC_UD_VS_MAX_UD,
index 360b5196551b6c77b51085f0ae254e7c93c2373d..4d88ed77f93fc8278576dd995391bf06e9b092bb 100644 (file)
@@ -424,7 +424,8 @@ static struct radv_shader_variant *radv_shader_variant_create(struct radv_device
        options.unsafe_math = !!(device->debug_flags & RADV_DEBUG_UNSAFE_MATH);
        options.family = chip_family;
        options.chip_class = device->physical_device->rad_info.chip_class;
-       tm = ac_create_target_machine(chip_family);
+       options.supports_spill = false;
+       tm = ac_create_target_machine(chip_family, false);
        ac_compile_nir_shader(tm, &binary, &variant->config,
                              &variant->info, shader, &options, dump);
        LLVMDisposeTargetMachine(tm);