radv/ac: Add compiler support for spilling.

author Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

Sat, 28 Jan 2017 22:51:19 +0000 (23:51 +0100)

committer Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

Mon, 30 Jan 2017 01:07:12 +0000 (02:07 +0100)
author Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sat, 28 Jan 2017 22:51:19 +0000 (23:51 +0100)
committer Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 30 Jan 2017 01:07:12 +0000 (02:07 +0100)
diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c

index 01cf000d9bea64140016940457e771cd0edd106c..9c66a821c216280f0b2259854e6602488e0f9946 100644 (file)
--- a/src/amd/common/ac_binary.c
+++ b/src/amd/common/ac_binary.c
@@ -212,23 +212,28 @@ static const char *scratch_rsrc_dword1_symbol =
  
  void ac_shader_binary_read_config(struct ac_shader_binary *binary,
                                   struct ac_shader_config *conf,
-                                 unsigned symbol_offset)
+                                 unsigned symbol_offset,
+                                 bool supports_spill)
  {
         unsigned i;
         const unsigned char *config =
                 ac_shader_binary_config_start(binary, symbol_offset);
         bool really_needs_scratch = false;
-
+       uint32_t wavesize = 0;
         /* LLVM adds SGPR spills to the scratch size.
          * Find out if we really need the scratch buffer.
          */
-       for (i = 0; i < binary->reloc_count; i++) {
-               const struct ac_shader_reloc *reloc = &binary->relocs[i];
+       if (supports_spill) {
+               really_needs_scratch = true;
+       } else {
+               for (i = 0; i < binary->reloc_count; i++) {
+                       const struct ac_shader_reloc *reloc = &binary->relocs[i];
  
-               if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
-                   !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
-                       really_needs_scratch = true;
-                       break;
+                       if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
+                           !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
+                               really_needs_scratch = true;
+                               break;
+                       }
                 }
         }
  
@@ -259,9 +264,7 @@ void ac_shader_binary_read_config(struct ac_shader_binary *binary,
                 case R_0286E8_SPI_TMPRING_SIZE:
                 case R_00B860_COMPUTE_TMPRING_SIZE:
                         /* WAVESIZE is in units of 256 dwords. */
-                       if (really_needs_scratch)
-                               conf->scratch_bytes_per_wave =
-                                       G_00B860_WAVESIZE(value) * 256 * 4;
+                       wavesize = value;
                         break;
                 case SPILLED_SGPRS:
                         conf->spilled_sgprs = value;
@@ -285,4 +288,9 @@ void ac_shader_binary_read_config(struct ac_shader_binary *binary,
                 if (!conf->spi_ps_input_addr)
                         conf->spi_ps_input_addr = conf->spi_ps_input_ena;
         }
+
+       if (really_needs_scratch) {
+               /* sgprs spills aren't spilling */
+               conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(wavesize) * 256 * 4;
+       }
  }
diff --git a/src/amd/common/ac_binary.h b/src/amd/common/ac_binary.h

index 282f33d22b972fb8ee763d25f37edabfceef4083..06fd855f948f7da5ca5067338a111ee42553d4fc 100644 (file)
--- a/src/amd/common/ac_binary.h
+++ b/src/amd/common/ac_binary.h
@@ -27,6 +27,7 @@
  #pragma once
  
  #include <stdint.h>
+#include <stdbool.h>
  
  struct ac_shader_reloc {
         char name[32];
@@ -85,4 +86,5 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
  
  void ac_shader_binary_read_config(struct ac_shader_binary *binary,
                                   struct ac_shader_config *conf,
-                                 unsigned symbol_offset);
+                                 unsigned symbol_offset,
+                                 bool supports_spill);
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c

index 7317db76baaa73bf2d2801d4e41fbe8ccc6bad74..f3cab921ba17621b57eb614e5bf69ad23bb9149f 100644 (file)
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -126,11 +126,11 @@ static const char *ac_get_llvm_processor_name(enum radeon_family family)
         }
  }
  
-LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family)
+LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill)
  {
         assert(family >= CHIP_TAHITI);
  
-       const char *triple = "amdgcn--";
+       const char *triple = supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--";
         LLVMTargetRef target = ac_get_llvm_target(triple);
         LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
                                      target,
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h

index 2d301c935754b433885bb429e714bf46683bcf34..c07f67ab8b18b7c1ad252129a76281ed4d866b5b 100644 (file)
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -56,7 +56,7 @@ struct ac_llvm_context {
         LLVMValueRef fpmath_md_2p5_ulp;
  };
  
-LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family);
+LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill);
  
  void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
  bool ac_is_sgpr_param(LLVMValueRef param);
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c

index e83c7a2e4887dc0c0a1ea5b309549e51d842a060..dedea6568921f0f6720933321b531d77581bae5a 100644 (file)
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -458,10 +458,10 @@ static void create_function(struct nir_to_llvm_context *ctx)
             arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math);
         set_llvm_calling_convention(ctx->main_function, ctx->stage);
  
-
         ctx->shader_info->num_input_sgprs = 0;
         ctx->shader_info->num_input_vgprs = 0;
  
+       ctx->shader_info->num_user_sgprs = ctx->options->supports_spill ? 2 : 0;
         for (i = 0; i < user_sgpr_count; i++)
                 ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4;
  
@@ -475,6 +475,12 @@ static void create_function(struct nir_to_llvm_context *ctx)
  
         arg_idx = 0;
         user_sgpr_idx = 0;
+
+       if (ctx->options->supports_spill) {
+               set_userdata_location_shader(ctx, AC_UD_SCRATCH, user_sgpr_idx, 2);
+               user_sgpr_idx += 2;
+       }
+
         for (unsigned i = 0; i < num_sets; ++i) {
                 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
                         set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
@@ -4432,7 +4438,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
  
         memset(shader_info, 0, sizeof(*shader_info));
  
-       LLVMSetTarget(ctx.module, "amdgcn--");
+       LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--");
         setup_types(&ctx);
  
         ctx.builder = LLVMCreateBuilderInContext(ctx.context);
@@ -4566,7 +4572,7 @@ static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
                                    struct ac_shader_config *config,
                                    struct ac_shader_variant_info *shader_info,
                                    gl_shader_stage stage,
-                                  bool dump_shader)
+                                  bool dump_shader, bool supports_spill)
  {
         if (dump_shader)
                 ac_dump_module(llvm_module);
@@ -4580,7 +4586,7 @@ static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
         if (dump_shader)
                 fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
  
-       ac_shader_binary_read_config(binary, config, 0);
+       ac_shader_binary_read_config(binary, config, 0, supports_spill);
  
         LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
         LLVMDisposeModule(llvm_module);
@@ -4640,7 +4646,7 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm,
         LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
                                                              options);
  
-       ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader);
+       ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader, options->supports_spill);
         switch (nir->stage) {
         case MESA_SHADER_COMPUTE:
                 for (int i = 0; i < 3; ++i)
diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h

index a57558e38ff90c271064f28cad1496c9876de7c2..9d66f940b52550b42ab51636137ed555d3d7284e 100644 (file)
--- a/src/amd/common/ac_nir_to_llvm.h
+++ b/src/amd/common/ac_nir_to_llvm.h
@@ -52,6 +52,7 @@ struct ac_nir_compiler_options {
         struct radv_pipeline_layout *layout;
         union ac_shader_variant_key key;
         bool unsafe_math;
+       bool supports_spill;
         enum radeon_family family;
         enum chip_class chip_class;
  };
@@ -64,8 +65,9 @@ struct ac_userdata_info {
  };
  
  enum ac_ud_index {
-       AC_UD_PUSH_CONSTANTS = 0,
-       AC_UD_SHADER_START = 1,
+       AC_UD_SCRATCH = 0,
+       AC_UD_PUSH_CONSTANTS = 1,
+       AC_UD_SHADER_START = 2,
         AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
         AC_UD_VS_BASE_VERTEX_START_INSTANCE,
         AC_UD_VS_MAX_UD,
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c

index 360b5196551b6c77b51085f0ae254e7c93c2373d..4d88ed77f93fc8278576dd995391bf06e9b092bb 100644 (file)
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -424,7 +424,8 @@ static struct radv_shader_variant *radv_shader_variant_create(struct radv_device
         options.unsafe_math = !!(device->debug_flags & RADV_DEBUG_UNSAFE_MATH);
         options.family = chip_family;
         options.chip_class = device->physical_device->rad_info.chip_class;
-       tm = ac_create_target_machine(chip_family);
+       options.supports_spill = false;
+       tm = ac_create_target_machine(chip_family, false);
         ac_compile_nir_shader(tm, &binary, &variant->config,
                               &variant->info, shader, &options, dump);
         LLVMDisposeTargetMachine(tm);
author	Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
	Sat, 28 Jan 2017 22:51:19 +0000 (23:51 +0100)
committer	Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
	Mon, 30 Jan 2017 01:07:12 +0000 (02:07 +0100)
src/amd/common/ac_binary.c		patch \| blob \| history
src/amd/common/ac_binary.h		patch \| blob \| history
src/amd/common/ac_llvm_util.c		patch \| blob \| history
src/amd/common/ac_llvm_util.h		patch \| blob \| history
src/amd/common/ac_nir_to_llvm.c		patch \| blob \| history
src/amd/common/ac_nir_to_llvm.h		patch \| blob \| history
src/amd/vulkan/radv_pipeline.c		patch \| blob \| history