radeonsi: reload PS inputs with direct indexing at each use (v2)

author Marek Olšák <marek.olsak@amd.com>

Tue, 13 Sep 2016 15:33:23 +0000 (17:33 +0200)

committer Marek Olšák <marek.olsak@amd.com>

Wed, 14 Sep 2016 10:33:00 +0000 (12:33 +0200)
author Marek Olšák <marek.olsak@amd.com>
Tue, 13 Sep 2016 15:33:23 +0000 (17:33 +0200)
committer Marek Olšák <marek.olsak@amd.com>
Wed, 14 Sep 2016 10:33:00 +0000 (12:33 +0200)
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h

index da5b7f58e154e28224b26bb55617734d30828455..f508d3230cc94e2c646317db309b208b3f4a3bdf 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -30,7 +30,9 @@
  #include <llvm-c/Core.h>
  #include "gallivm/lp_bld_init.h"
  #include "gallivm/lp_bld_tgsi.h"
+#include "tgsi/tgsi_parse.h"
  
+#define RADEON_LLVM_MAX_INPUT_SLOTS 32
  #define RADEON_LLVM_MAX_INPUTS 32 * 4
  #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
  
@@ -62,7 +64,8 @@ struct radeon_llvm_context {
           */
         void (*load_input)(struct radeon_llvm_context *,
                            unsigned input_index,
-                          const struct tgsi_full_declaration *decl);
+                          const struct tgsi_full_declaration *decl,
+                          LLVMValueRef out[4]);
  
         void (*load_system_value)(struct radeon_llvm_context *,
                                   unsigned index,
@@ -75,6 +78,7 @@ struct radeon_llvm_context {
           * values will be in the form of a target intrinsic that will inform the
           * backend how to load the actual inputs to the shader. 
           */
+       struct tgsi_full_declaration input_decls[RADEON_LLVM_MAX_INPUT_SLOTS];
         LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS];
         LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS];
  
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c

index 4643e6d0ce79671ca385a6e797b2bb2c4c76275d..4fa43cd23424cca26b6986bff0a8edf8ee51f98e 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -446,14 +446,29 @@ LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
                 }
         }
  
-       case TGSI_FILE_INPUT:
-               result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
+       case TGSI_FILE_INPUT: {
+               unsigned index = reg->Register.Index;
+               LLVMValueRef input[4];
+
+               /* I don't think doing this for vertex shaders is beneficial.
+                * For those, we want to make sure the VMEM loads are executed
+                * only once. Fragment shaders don't care much, because
+                * v_interp instructions are much cheaper than VMEM loads.
+                */
+               if (ctx->soa.bld_base.info->processor == PIPE_SHADER_FRAGMENT)
+                       ctx->load_input(ctx, index, &ctx->input_decls[index], input);
+               else
+                       memcpy(input, &ctx->inputs[index * 4], sizeof(input));
+
+               result = input[swizzle];
+
                 if (tgsi_type_is_64bit(type)) {
                         ptr = result;
-                       ptr2 = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)];
+                       ptr2 = input[swizzle + 1];
                         return radeon_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
                 }
                 break;
+       }
  
         case TGSI_FILE_TEMPORARY:
                 if (reg->Register.Index >= ctx->temps_count)
@@ -626,8 +641,13 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
         {
                 unsigned idx;
                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
-                       if (ctx->load_input)
-                               ctx->load_input(ctx, idx, decl);
+                       if (ctx->load_input) {
+                               ctx->input_decls[idx] = *decl;
+
+                               if (bld_base->info->processor != PIPE_SHADER_FRAGMENT)
+                                       ctx->load_input(ctx, idx, decl,
+                                                       &ctx->inputs[idx * 4]);
+                       }
                 }
         }
         break;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c

index b034837d252accee2fee6adbb3c4dc4591cb8b6a..3ccff7ae9e0d426d81317cbd2c1e8df42c8f43b8 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -440,7 +440,8 @@ static LLVMValueRef get_instance_index_for_fetch(
  static void declare_input_vs(
         struct radeon_llvm_context *radeon_bld,
         unsigned input_index,
-       const struct tgsi_full_declaration *decl)
+       const struct tgsi_full_declaration *decl,
+       LLVMValueRef out[4])
  {
         struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
         struct gallivm_state *gallivm = base->gallivm;
@@ -498,11 +499,8 @@ static void declare_input_vs(
         /* Break up the vec4 into individual components */
         for (chan = 0; chan < 4; chan++) {
                 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
-               /* XXX: Use a helper function for this.  There is one in
-                * tgsi_llvm.c. */
-               ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
-                               LLVMBuildExtractElement(gallivm->builder,
-                               input, llvm_chan, "");
+               out[chan] = LLVMBuildExtractElement(gallivm->builder,
+                                                   input, llvm_chan, "");
         }
  }
  
@@ -1463,7 +1461,8 @@ static LLVMValueRef get_interp_param(struct si_shader_context *ctx,
  static void declare_input_fs(
         struct radeon_llvm_context *radeon_bld,
         unsigned input_index,
-       const struct tgsi_full_declaration *decl)
+       const struct tgsi_full_declaration *decl,
+       LLVMValueRef out[4])
  {
         struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
         struct si_shader_context *ctx =
@@ -1482,14 +1481,10 @@ static void declare_input_fs(
                 unsigned offset = SI_PARAM_POS_FIXED_PT + 1 +
                                   (i ? util_bitcount(colors_read & 0xf) : 0);
  
-               radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
-                       mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef;
-               radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
-                       mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef;
-               radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
-                       mask & 0x4 ? LLVMGetParam(main_fn, offset++) : base->undef;
-               radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
-                       mask & 0x8 ? LLVMGetParam(main_fn, offset++) : base->undef;
+               out[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef;
+               out[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef;
+               out[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : base->undef;
+               out[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : base->undef;
                 return;
         }
  
@@ -1513,7 +1508,7 @@ static void declare_input_fs(
                         shader->selector->info.colors_read, interp_param,
                         LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK),
                         LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE),
-                       &radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)]);
+                       &out[0]);
  }
  
  static LLVMValueRef get_sample_id(struct radeon_llvm_context *radeon_bld)
author	Marek Olšák <marek.olsak@amd.com>
	Tue, 13 Sep 2016 15:33:23 +0000 (17:33 +0200)
committer	Marek Olšák <marek.olsak@amd.com>
	Wed, 14 Sep 2016 10:33:00 +0000 (12:33 +0200)
src/gallium/drivers/radeon/radeon_llvm.h		patch \| blob \| history
src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader.c		patch \| blob \| history