ac: refactor visit_load_buffer
authorRhys Perry <pendingchaos02@gmail.com>
Wed, 5 Dec 2018 13:42:47 +0000 (13:42 +0000)
committerRhys Perry <pendingchaos02@gmail.com>
Sun, 16 Dec 2018 14:56:10 +0000 (14:56 +0000)
This is so that we can split different types of loads more easily.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/amd/common/ac_llvm_build.c
src/amd/common/ac_nir_to_llvm.c

index 58f72972d253266feca2a7f96fd905399ccfb5fe..336554a9ac869d494acfc9e2af87c222b0bb38d0 100644 (file)
@@ -2701,9 +2701,11 @@ LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
        if (count == num_components)
                return value;
 
-       LLVMValueRef masks[] = {
-           ctx->i32_0, ctx->i32_1,
-           LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
+       LLVMValueRef masks[MAX2(count, 2)];
+       masks[0] = ctx->i32_0;
+       masks[1] = ctx->i32_1;
+       for (unsigned i = 2; i < count; i++)
+               masks[i] = LLVMConstInt(ctx->i32, i, false);
 
        if (count == 1)
                return LLVMBuildExtractElement(ctx->builder, value, masks[0],
index 4294956de13739fde16e4f1b3fa5872202b0b4d3..2f68e0dcf1e617798bcebea573f5784c3d877c23 100644 (file)
@@ -1621,37 +1621,43 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
                                       const nir_intrinsic_instr *instr)
 {
-       LLVMValueRef results[2];
-       int load_bytes;
        int elem_size_bytes = instr->dest.ssa.bit_size / 8;
        int num_components = instr->num_components;
-       int num_bytes = num_components * elem_size_bytes;
        enum gl_access_qualifier access = nir_intrinsic_access(instr);
        LLVMValueRef glc = ctx->ac.i1false;
 
        if (access & (ACCESS_VOLATILE | ACCESS_COHERENT))
                glc = ctx->ac.i1true;
 
-       for (int i = 0; i < num_bytes; i += load_bytes) {
-               load_bytes = MIN2(num_bytes - i, 16);
-               const char *load_name;
-               LLVMTypeRef data_type;
-               LLVMValueRef offset = get_src(ctx, instr->src[1]);
-               LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i, false);
-               LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
-                                                       get_src(ctx, instr->src[0]), false);
-               LLVMValueRef vindex = ctx->ac.i32_0;
+       LLVMValueRef offset = get_src(ctx, instr->src[1]);
+       LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
+                                               get_src(ctx, instr->src[0]), false);
+       LLVMValueRef vindex = ctx->ac.i32_0;
+
+       LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
+       LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
 
-               int idx = i ? 1 : 0;
+       LLVMValueRef results[4];
+       for (int i = 0; i < num_components;) {
+               int num_elems = num_components - i;
+               if (num_elems * elem_size_bytes > 16)
+                       num_elems = 16 / elem_size_bytes;
+               int load_bytes = num_elems * elem_size_bytes;
+
+               LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
+
+               LLVMValueRef ret;
                if (load_bytes == 2) {
-                       results[idx] = ac_build_tbuffer_load_short(&ctx->ac,
-                                                                  rsrc,
-                                                                  vindex,
-                                                                  offset,
-                                                                  ctx->ac.i32_0,
-                                                                  immoffset,
-                                                                  glc);
+                       ret = ac_build_tbuffer_load_short(&ctx->ac,
+                                                         rsrc,
+                                                         vindex,
+                                                         offset,
+                                                         ctx->ac.i32_0,
+                                                         immoffset,
+                                                         glc);
                } else {
+                       const char *load_name;
+                       LLVMTypeRef data_type;
                        switch (load_bytes) {
                        case 16:
                        case 12:
@@ -1677,33 +1683,23 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
                                glc,
                                ctx->ac.i1false,
                        };
-                       results[idx] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
-                       unsigned num_elems = ac_get_type_size(data_type) / elem_size_bytes;
-                       LLVMTypeRef resTy = LLVMVectorType(LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size), num_elems);
-                       results[idx] = LLVMBuildBitCast(ctx->ac.builder, results[idx], resTy, "");
+                       ret = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
                }
-       }
 
-       assume(results[0]);
-       LLVMValueRef ret = results[0];
-       if (num_bytes > 16 || num_components == 3) {
-               LLVMValueRef masks[] = {
-                       LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
-                       LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
-               };
+               LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
+               ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
+               ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
 
-               if (num_bytes > 16 && num_components == 3) {
-                       /* we end up with a v2i64 and i64 but shuffle fails on that */
-                       results[1] = ac_build_expand(&ctx->ac, results[1], 1, 2);
-               }
+               LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
+               ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
 
-               LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
-               ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0],
-                                            results[num_bytes > 16 ? 1 : 0], swizzle, "");
+               for (unsigned j = 0; j < num_elems; j++) {
+                       results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
+               }
+               i += num_elems;
        }
 
-       return LLVMBuildBitCast(ctx->ac.builder, ret,
-                               get_def_type(ctx, &instr->dest.ssa), "");
+       return ac_build_gather_values(&ctx->ac, results, num_components);
 }
 
 static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,