ac/nir: Lower large indirect variables to scratch

author Connor Abbott <cwabbott0@gmail.com>

Wed, 12 Jun 2019 16:58:13 +0000 (18:58 +0200)

committer Connor Abbott <cwabbott0@gmail.com>

Mon, 5 Aug 2019 09:45:18 +0000 (11:45 +0200)
author Connor Abbott <cwabbott0@gmail.com>
Wed, 12 Jun 2019 16:58:13 +0000 (18:58 +0200)
committer Connor Abbott <cwabbott0@gmail.com>
Mon, 5 Aug 2019 09:45:18 +0000 (11:45 +0200)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c

index 53b93f7e1d333484ac7626e853e30fb345e67436..5e25e838f8f3f78c5d7a1590768f95bde4d0a828 100644 (file)
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -42,6 +42,8 @@ struct ac_nir_context {
  
         LLVMValueRef *ssa_defs;
  
+       LLVMValueRef scratch;
+
         struct hash_table *defs;
         struct hash_table *phis;
         struct hash_table *vars;
@@ -3573,6 +3575,36 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
         case nir_intrinsic_mbcnt_amd:
                 result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
                 break;
+       case nir_intrinsic_load_scratch: {
+               LLVMValueRef offset = get_src(ctx, instr->src[0]);
+               LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
+                                                offset);
+               LLVMTypeRef comp_type =
+                       LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+               LLVMTypeRef vec_type =
+                       instr->dest.ssa.num_components == 1 ? comp_type :
+                       LLVMVectorType(comp_type, instr->dest.ssa.num_components);
+               unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+               ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
+                                      LLVMPointerType(vec_type, addr_space), "");
+               result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+               break;
+       }
+       case nir_intrinsic_store_scratch: {
+               LLVMValueRef offset = get_src(ctx, instr->src[1]);
+               LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
+                                                offset);
+               LLVMTypeRef comp_type =
+                       LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
+               LLVMTypeRef vec_type =
+                       instr->src[0].ssa->num_components == 1 ? comp_type :
+                       LLVMVectorType(comp_type, instr->src[0].ssa->num_components);
+               unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+               ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
+                                      LLVMPointerType(vec_type, addr_space), "");
+               LLVMBuildStore(ctx->ac.builder, get_src(ctx, instr->src[0]), ptr);
+               break;
+       }
         default:
                 fprintf(stderr, "Unknown intrinsic: ");
                 nir_print_instr(&instr->instr, stderr);
@@ -4473,6 +4505,18 @@ setup_locals(struct ac_nir_context *ctx,
         }
  }
  
+static void
+setup_scratch(struct ac_nir_context *ctx,
+             struct nir_shader *shader)
+{
+       if (shader->scratch_size == 0)
+               return;
+
+       ctx->scratch = ac_build_alloca_undef(&ctx->ac,
+                                            LLVMArrayType(ctx->ac.i8, shader->scratch_size),
+                                            "scratch");
+}
+
  static void
  setup_shared(struct ac_nir_context *ctx,
              struct nir_shader *nir)
@@ -4518,6 +4562,7 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
         ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
  
         setup_locals(&ctx, func);
+       setup_scratch(&ctx, nir);
  
         if (gl_shader_stage_is_compute(nir->info.stage))
                 setup_shared(&ctx, nir);
@@ -4539,6 +4584,15 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
  void
  ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
  {
+       /* Lower large variables to scratch first so that we won't bloat the
+        * shader by generating large if ladders for them. We later lower
+        * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
+        */
+       NIR_PASS_V(nir, nir_lower_vars_to_scratch,
+                  nir_var_function_temp,
+                  256,
+                  glsl_get_natural_size_align_bytes);
+
         /* While it would be nice not to have this flag, we are constrained
          * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9.
          */
author	Connor Abbott <cwabbott0@gmail.com>
	Wed, 12 Jun 2019 16:58:13 +0000 (18:58 +0200)
committer	Connor Abbott <cwabbott0@gmail.com>
	Mon, 5 Aug 2019 09:45:18 +0000 (11:45 +0200)