radeonsi: move building llvm.SI.load.const into ac_build_buffer_load
authorMarek Olšák <marek.olsak@amd.com>
Fri, 19 May 2017 13:02:34 +0000 (15:02 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Sun, 28 May 2017 23:52:16 +0000 (01:52 +0200)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/amd/common/ac_llvm_build.c
src/amd/common/ac_llvm_build.h
src/amd/common/ac_nir_to_llvm.c
src/gallium/drivers/radeonsi/si_shader.c

index 3df9f53ed7a38016231c3bab1091404df7b7a646..237e9291d41571e18d1f5bb5ac1433905a4b4c5f 100644 (file)
@@ -633,14 +633,46 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
                     unsigned inst_offset,
                     unsigned glc,
                     unsigned slc,
-                    bool can_speculate)
+                    bool can_speculate,
+                    bool allow_smem)
 {
+       LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
+       if (voffset)
+               offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
+       if (soffset)
+               offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
+
+       /* TODO: VI and later generations can use SMEM with GLC=1.*/
+       if (allow_smem && !glc && !slc) {
+               assert(vindex == NULL);
+
+               LLVMValueRef result[4];
+
+               for (int i = 0; i < num_channels; i++) {
+                       if (i) {
+                               offset = LLVMBuildAdd(ctx->builder, offset,
+                                                     LLVMConstInt(ctx->i32, 4, 0), "");
+                       }
+                       LLVMValueRef args[2] = {rsrc, offset};
+                       result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32",
+                                                      ctx->f32, args, 2,
+                                                      AC_FUNC_ATTR_READNONE |
+                                                      AC_FUNC_ATTR_LEGACY);
+               }
+               if (num_channels == 1)
+                       return result[0];
+
+               if (num_channels == 3)
+                       result[num_channels++] = LLVMGetUndef(ctx->f32);
+               return ac_build_gather_values(ctx, result, num_channels);
+       }
+
        unsigned func = CLAMP(num_channels, 1, 3) - 1;
 
        LLVMValueRef args[] = {
                LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
                vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
-               LLVMConstInt(ctx->i32, inst_offset, 0),
+               offset,
                LLVMConstInt(ctx->i1, glc, 0),
                LLVMConstInt(ctx->i1, slc, 0)
        };
@@ -650,16 +682,6 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
        const char *type_names[] = {"f32", "v2f32", "v4f32"};
        char name[256];
 
-       if (voffset) {
-               args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
-                               "");
-       }
-
-       if (soffset) {
-               args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
-                               "");
-       }
-
        snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
                 type_names[func]);
 
index c1b5f3d228e911142a3da3e8847e8e314c1a8d77..ebb78fbd79b849b27ef5928be537dc9973c4dc90 100644 (file)
@@ -143,7 +143,8 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
                     unsigned inst_offset,
                     unsigned glc,
                     unsigned slc,
-                    bool can_speculate);
+                    bool can_speculate,
+                    bool allow_smem);
 
 LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
                                         LLVMValueRef rsrc,
index 8ae0a75fd04c310eadefe771a6a6a0eb61d7b2c7..28ba47d502f7d73e8b6f398d68eb3453ed7866fc 100644 (file)
@@ -2823,7 +2823,7 @@ load_tes_input(struct nir_to_llvm_context *ctx,
                                                     is_compact, vertex_index, indir_index);
 
        result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL,
-                                     buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true);
+                                     buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false);
        result = trim_vector(ctx, result, instr->num_components);
        result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), "");
        return result;
index 212f558d9bb2244e29df9c58ea118fa7fa3698ac..ddfaa3b724709c68680e8f8f3e57b23d3807f8c4 100644 (file)
@@ -833,14 +833,14 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
 
        if (swizzle == ~0) {
                value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset,
-                                            0, 1, 0, can_speculate);
+                                            0, 1, 0, can_speculate, false);
 
                return LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
        }
 
        if (!tgsi_type_is_64bit(type)) {
                value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset,
-                                            0, 1, 0, can_speculate);
+                                            0, 1, 0, can_speculate, false);
 
                value = LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
                return LLVMBuildExtractElement(gallivm->builder, value,
@@ -848,10 +848,10 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
        }
 
        value = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
-                                 swizzle * 4, 1, 0, can_speculate);
+                                 swizzle * 4, 1, 0, can_speculate, false);
 
        value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
-                                  swizzle * 4 + 4, 1, 0, can_speculate);
+                                  swizzle * 4 + 4, 1, 0, can_speculate, false);
 
        return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
 }
@@ -1154,14 +1154,14 @@ static LLVMValueRef fetch_input_gs(
        soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle) * 256, 0);
 
        value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->i32_0,
-                                    vtx_offset, soffset, 0, 1, 0, true);
+                                    vtx_offset, soffset, 0, 1, 0, true, false);
        if (tgsi_type_is_64bit(type)) {
                LLVMValueRef value2;
                soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle + 1) * 256, 0);
 
                value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1,
                                              ctx->i32_0, vtx_offset, soffset,
-                                             0, 1, 0, true);
+                                             0, 1, 0, true, false);
                return si_llvm_emit_fetch_64bit(bld_base, type,
                                                value, value2);
        }
@@ -1389,12 +1389,8 @@ static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
                                      LLVMValueRef resource,
                                      LLVMValueRef offset)
 {
-       LLVMBuilderRef builder = ctx->gallivm.builder;
-       LLVMValueRef args[2] = {resource, offset};
-
-       return lp_build_intrinsic(builder, "llvm.SI.load.const.v4i32", ctx->f32, args, 2,
-                                 LP_FUNC_ATTR_READNONE |
-                                 LP_FUNC_ATTR_LEGACY);
+       return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL,
+                                   0, 0, 0, true, true);
 }
 
 static LLVMValueRef load_sample_position(struct si_shader_context *ctx, LLVMValueRef sample_id)
@@ -5199,7 +5195,8 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
                                        ac_build_buffer_load(&ctx.ac,
                                                             ctx.gsvs_ring[0], 1,
                                                             ctx.i32_0, voffset,
-                                                            soffset, 0, 1, 1, true);
+                                                            soffset, 0, 1, 1,
+                                                            true, false);
                        }
                }