ac/nir: Lower large indirect variables to scratch

[mesa.git] / src / amd / common / ac_nir_to_llvm.c
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c

index 70fcaaf7bf340c762dc0a3d5631d491f620fdef9..5e25e838f8f3f78c5d7a1590768f95bde4d0a828 100644 (file)
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -42,6 +42,8 @@ struct ac_nir_context {
  
         LLVMValueRef *ssa_defs;
  
+       LLVMValueRef scratch;
+
         struct hash_table *defs;
         struct hash_table *phis;
         struct hash_table *vars;
@@ -1644,14 +1646,17 @@ static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx,
                                            LLVMValueRef compare,
                                            LLVMValueRef exchange)
  {
-       LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
+       LLVMBasicBlockRef start_block = NULL, then_block = NULL;
+       if (ctx->abi->robust_buffer_access) {
+               LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
  
-       LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
-       LLVMBasicBlockRef start_block = LLVMGetInsertBlock(ctx->ac.builder);
+               LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
+               start_block = LLVMGetInsertBlock(ctx->ac.builder);
  
-       ac_build_ifcc(&ctx->ac, cond, -1);
+               ac_build_ifcc(&ctx->ac, cond, -1);
  
-       LLVMBasicBlockRef then_block = LLVMGetInsertBlock(ctx->ac.builder);
+               then_block = LLVMGetInsertBlock(ctx->ac.builder);
+       }
  
         LLVMValueRef ptr_parts[2] = {
                 ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
@@ -1673,20 +1678,24 @@ static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx,
         LLVMValueRef result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
         result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
  
-       ac_build_endif(&ctx->ac, -1);
+       if (ctx->abi->robust_buffer_access) {
+               ac_build_endif(&ctx->ac, -1);
  
-       LLVMBasicBlockRef incoming_blocks[2] = {
-               start_block,
-               then_block,
-       };
+               LLVMBasicBlockRef incoming_blocks[2] = {
+                       start_block,
+                       then_block,
+               };
  
-       LLVMValueRef incoming_values[2] = {
-               LLVMConstInt(ctx->ac.i64, 0, 0),
-               result,
-       };
-       LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
-       LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
-       return ret;
+               LLVMValueRef incoming_values[2] = {
+                       LLVMConstInt(ctx->ac.i64, 0, 0),
+                       result,
+               };
+               LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
+               LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
+               return ret;
+       } else {
+               return result;
+       }
  }
  
  static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
@@ -3566,6 +3575,36 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
         case nir_intrinsic_mbcnt_amd:
                 result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
                 break;
+       case nir_intrinsic_load_scratch: {
+               LLVMValueRef offset = get_src(ctx, instr->src[0]);
+               LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
+                                                offset);
+               LLVMTypeRef comp_type =
+                       LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+               LLVMTypeRef vec_type =
+                       instr->dest.ssa.num_components == 1 ? comp_type :
+                       LLVMVectorType(comp_type, instr->dest.ssa.num_components);
+               unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+               ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
+                                      LLVMPointerType(vec_type, addr_space), "");
+               result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+               break;
+       }
+       case nir_intrinsic_store_scratch: {
+               LLVMValueRef offset = get_src(ctx, instr->src[1]);
+               LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
+                                                offset);
+               LLVMTypeRef comp_type =
+                       LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
+               LLVMTypeRef vec_type =
+                       instr->src[0].ssa->num_components == 1 ? comp_type :
+                       LLVMVectorType(comp_type, instr->src[0].ssa->num_components);
+               unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+               ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
+                                      LLVMPointerType(vec_type, addr_space), "");
+               LLVMBuildStore(ctx->ac.builder, get_src(ctx, instr->src[0]), ptr);
+               break;
+       }
         default:
                 fprintf(stderr, "Unknown intrinsic: ");
                 nir_print_instr(&instr->instr, stderr);
@@ -4466,6 +4505,18 @@ setup_locals(struct ac_nir_context *ctx,
         }
  }
  
+static void
+setup_scratch(struct ac_nir_context *ctx,
+             struct nir_shader *shader)
+{
+       if (shader->scratch_size == 0)
+               return;
+
+       ctx->scratch = ac_build_alloca_undef(&ctx->ac,
+                                            LLVMArrayType(ctx->ac.i8, shader->scratch_size),
+                                            "scratch");
+}
+
  static void
  setup_shared(struct ac_nir_context *ctx,
              struct nir_shader *nir)
@@ -4511,6 +4562,7 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
         ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
  
         setup_locals(&ctx, func);
+       setup_scratch(&ctx, nir);
  
         if (gl_shader_stage_is_compute(nir->info.stage))
                 setup_shared(&ctx, nir);
@@ -4532,6 +4584,15 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
  void
  ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
  {
+       /* Lower large variables to scratch first so that we won't bloat the
+        * shader by generating large if ladders for them. We later lower
+        * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
+        */
+       NIR_PASS_V(nir, nir_lower_vars_to_scratch,
+                  nir_var_function_temp,
+                  256,
+                  glsl_get_natural_size_align_bytes);
+
         /* While it would be nice not to have this flag, we are constrained
          * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9.
          */