ac/nir: Lower large indirect variables to scratch
[mesa.git] / src / amd / common / ac_nir_to_llvm.c
index 70fcaaf7bf340c762dc0a3d5631d491f620fdef9..5e25e838f8f3f78c5d7a1590768f95bde4d0a828 100644 (file)
@@ -42,6 +42,8 @@ struct ac_nir_context {
 
        LLVMValueRef *ssa_defs;
 
+       LLVMValueRef scratch;
+
        struct hash_table *defs;
        struct hash_table *phis;
        struct hash_table *vars;
@@ -1644,14 +1646,17 @@ static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx,
                                           LLVMValueRef compare,
                                           LLVMValueRef exchange)
 {
-       LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
+       LLVMBasicBlockRef start_block = NULL, then_block = NULL;
+       if (ctx->abi->robust_buffer_access) {
+               LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
 
-       LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
-       LLVMBasicBlockRef start_block = LLVMGetInsertBlock(ctx->ac.builder);
+               LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
+               start_block = LLVMGetInsertBlock(ctx->ac.builder);
 
-       ac_build_ifcc(&ctx->ac, cond, -1);
+               ac_build_ifcc(&ctx->ac, cond, -1);
 
-       LLVMBasicBlockRef then_block = LLVMGetInsertBlock(ctx->ac.builder);
+               then_block = LLVMGetInsertBlock(ctx->ac.builder);
+       }
 
        LLVMValueRef ptr_parts[2] = {
                ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
@@ -1673,20 +1678,24 @@ static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx,
        LLVMValueRef result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
        result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
 
-       ac_build_endif(&ctx->ac, -1);
+       if (ctx->abi->robust_buffer_access) {
+               ac_build_endif(&ctx->ac, -1);
 
-       LLVMBasicBlockRef incoming_blocks[2] = {
-               start_block,
-               then_block,
-       };
+               LLVMBasicBlockRef incoming_blocks[2] = {
+                       start_block,
+                       then_block,
+               };
 
-       LLVMValueRef incoming_values[2] = {
-               LLVMConstInt(ctx->ac.i64, 0, 0),
-               result,
-       };
-       LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
-       LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
-       return ret;
+               LLVMValueRef incoming_values[2] = {
+                       LLVMConstInt(ctx->ac.i64, 0, 0),
+                       result,
+               };
+               LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
+               LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
+               return ret;
+       } else {
+               return result;
+       }
 }
 
 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
@@ -3566,6 +3575,36 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
        case nir_intrinsic_mbcnt_amd:
                result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
                break;
+       case nir_intrinsic_load_scratch: {
+               LLVMValueRef offset = get_src(ctx, instr->src[0]);
+               LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
+                                                offset);
+               LLVMTypeRef comp_type =
+                       LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+               LLVMTypeRef vec_type =
+                       instr->dest.ssa.num_components == 1 ? comp_type :
+                       LLVMVectorType(comp_type, instr->dest.ssa.num_components);
+               unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+               ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
+                                      LLVMPointerType(vec_type, addr_space), "");
+               result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+               break;
+       }
+       case nir_intrinsic_store_scratch: {
+               LLVMValueRef offset = get_src(ctx, instr->src[1]);
+               LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
+                                                offset);
+               LLVMTypeRef comp_type =
+                       LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
+               LLVMTypeRef vec_type =
+                       instr->src[0].ssa->num_components == 1 ? comp_type :
+                       LLVMVectorType(comp_type, instr->src[0].ssa->num_components);
+               unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+               ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
+                                      LLVMPointerType(vec_type, addr_space), "");
+               LLVMBuildStore(ctx->ac.builder, get_src(ctx, instr->src[0]), ptr);
+               break;
+       }
        default:
                fprintf(stderr, "Unknown intrinsic: ");
                nir_print_instr(&instr->instr, stderr);
@@ -4466,6 +4505,18 @@ setup_locals(struct ac_nir_context *ctx,
        }
 }
 
+static void
+setup_scratch(struct ac_nir_context *ctx,
+             struct nir_shader *shader)
+{
+       if (shader->scratch_size == 0)
+               return;
+
+       ctx->scratch = ac_build_alloca_undef(&ctx->ac,
+                                            LLVMArrayType(ctx->ac.i8, shader->scratch_size),
+                                            "scratch");
+}
+
 static void
 setup_shared(struct ac_nir_context *ctx,
             struct nir_shader *nir)
@@ -4511,6 +4562,7 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
        ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
 
        setup_locals(&ctx, func);
+       setup_scratch(&ctx, nir);
 
        if (gl_shader_stage_is_compute(nir->info.stage))
                setup_shared(&ctx, nir);
@@ -4532,6 +4584,15 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
 void
 ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
 {
+       /* Lower large variables to scratch first so that we won't bloat the
+        * shader by generating large if ladders for them. We later lower
+        * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
+        */
+       NIR_PASS_V(nir, nir_lower_vars_to_scratch,
+                  nir_var_function_temp,
+                  256,
+                  glsl_get_natural_size_align_bytes);
+
        /* While it would be nice not to have this flag, we are constrained
         * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9.
         */