radv/ac: Implement Float64 SSBO loads.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sun, 8 Jan 2017 18:38:28 +0000 (19:38 +0100)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Wed, 1 Feb 2017 00:09:34 +0000 (01:09 +0100)
Signed-off-by: Bas Nieuwenhuizen <basni@google.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_nir_to_llvm.c

index c50292e765fd293c267af955a4194ee5597382f3..50ed4d47836f6ab9d110b11055358d6fe97fc0fd 100644 (file)
@@ -2148,35 +2148,58 @@ static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx,
 static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx,
                                       nir_intrinsic_instr *instr)
 {
-       const char *load_name;
-       LLVMTypeRef data_type = ctx->f32;
-       if (instr->num_components == 3)
-               data_type = LLVMVectorType(ctx->f32, 4);
-       else if (instr->num_components > 1)
-               data_type = LLVMVectorType(ctx->f32, instr->num_components);
-
-       if (instr->num_components == 4 || instr->num_components == 3)
-               load_name = "llvm.amdgcn.buffer.load.v4f32";
-       else if (instr->num_components == 2)
-               load_name = "llvm.amdgcn.buffer.load.v2f32";
-       else if (instr->num_components == 1)
-               load_name = "llvm.amdgcn.buffer.load.f32";
-       else
-               abort();
+       LLVMValueRef results[2];
+       int load_components;
+       int num_components = instr->num_components;
+       if (instr->dest.ssa.bit_size == 64)
+               num_components *= 2;
 
-       LLVMValueRef params[] = {
-           get_src(ctx, instr->src[0]),
-           LLVMConstInt(ctx->i32, 0, false),
-           get_src(ctx, instr->src[1]),
-           LLVMConstInt(ctx->i1, 0, false),
-           LLVMConstInt(ctx->i1, 0, false),
-       };
+       for (int i = 0; i < num_components; i += load_components) {
+               load_components = MIN2(num_components - i, 4);
+               const char *load_name;
+               LLVMTypeRef data_type = ctx->f32;
+               LLVMValueRef offset = LLVMConstInt(ctx->i32, i * 4, false);
+               offset = LLVMBuildAdd(ctx->builder, get_src(ctx, instr->src[1]), offset, "");
+
+               if (load_components == 3)
+                       data_type = LLVMVectorType(ctx->f32, 4);
+               else if (load_components > 1)
+                       data_type = LLVMVectorType(ctx->f32, load_components);
+
+               if (load_components >= 3)
+                       load_name = "llvm.amdgcn.buffer.load.v4f32";
+               else if (load_components == 2)
+                       load_name = "llvm.amdgcn.buffer.load.v2f32";
+               else if (load_components == 1)
+                       load_name = "llvm.amdgcn.buffer.load.f32";
+               else
+                       unreachable("unhandled number of components");
 
-       LLVMValueRef ret =
-           ac_emit_llvm_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
+               LLVMValueRef params[] = {
+                       get_src(ctx, instr->src[0]),
+                       LLVMConstInt(ctx->i32, 0, false),
+                       offset,
+                       LLVMConstInt(ctx->i1, 0, false),
+                       LLVMConstInt(ctx->i1, 0, false),
+               };
+
+               results[i] = ac_emit_llvm_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
+
+       }
 
-       if (instr->num_components == 3)
-               ret = trim_vector(ctx, ret, 3);
+       LLVMValueRef ret = results[0];
+       if (num_components > 4 || num_components == 3) {
+               LLVMValueRef masks[] = {
+                       LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
+                       LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
+                       LLVMConstInt(ctx->i32, 4, false), LLVMConstInt(ctx->i32, 5, false),
+                       LLVMConstInt(ctx->i32, 6, false), LLVMConstInt(ctx->i32, 7, false)
+               };
+
+               LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
+               ret = LLVMBuildShuffleVector(ctx->builder, results[0],
+                                            results[num_components > 4 ? 1 : 0], swizzle, "");
+       }
 
        return LLVMBuildBitCast(ctx->builder, ret,
                                get_def_type(ctx, &instr->dest.ssa), "");