ac/nir: implement 64-bit packing and unpacking
authorConnor Abbott <cwabbott0@gmail.com>
Wed, 7 Jun 2017 21:16:31 +0000 (14:16 -0700)
committerConnor Abbott <cwabbott0@gmail.com>
Mon, 3 Jul 2017 18:58:58 +0000 (11:58 -0700)
We implement the split opcodes, and tell NIR to lower the original ones.
The lowering to LLVM is a little more complicated, but NIR can optimize
the split ones a little better, and some NIR lowering passes that we
might want to use (particularly for doubles) emit the split ones.

This should fix pack/unpackDouble2x32, which seems like a bug since when
we enabled the Float64 capability. It will also fix pack/unpackInt2x32
when we enable the Int64 capability.

Fixes: 798ae37c ("radv: Enable Float64 support.")
Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/common/ac_nir_to_llvm.c
src/amd/vulkan/radv_pipeline.c

index 51daa3cff36aa7db90ec2e9d877ee9d5bf0f4047..88f3f44e92ca6482ba573528a7bc3868fbe53ddf 100644 (file)
@@ -1866,6 +1866,37 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst
        case nir_op_fddy_coarse:
                result = emit_ddxy(ctx, instr->op, src[0]);
                break;
+
+       case nir_op_unpack_64_2x32_split_x: {
+               assert(instr->src[0].src.ssa->num_components == 1);
+               LLVMValueRef tmp = LLVMBuildBitCast(ctx->builder, src[0],
+                                                   LLVMVectorType(ctx->i32, 2),
+                                                   "");
+               result = LLVMBuildExtractElement(ctx->builder, tmp,
+                                                ctx->i32zero, "");
+               break;
+       }
+
+       case nir_op_unpack_64_2x32_split_y: {
+               assert(instr->src[0].src.ssa->num_components == 1);
+               LLVMValueRef tmp = LLVMBuildBitCast(ctx->builder, src[0],
+                                                   LLVMVectorType(ctx->i32, 2),
+                                                   "");
+               result = LLVMBuildExtractElement(ctx->builder, tmp,
+                                                ctx->i32one, "");
+               break;
+       }
+
+       case nir_op_pack_64_2x32_split: {
+               LLVMValueRef tmp = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
+               tmp = LLVMBuildInsertElement(ctx->builder, tmp,
+                                            src[0], ctx->i32zero, "");
+               tmp = LLVMBuildInsertElement(ctx->builder, tmp,
+                                            src[1], ctx->i32one, "");
+               result = LLVMBuildBitCast(ctx->builder, tmp, ctx->i64, "");
+               break;
+       }
+
        default:
                fprintf(stderr, "Unknown NIR alu instr: ");
                nir_print_instr(&instr->instr, stderr);
index efe641dc0be7f3177ffab65046bffbaab3e31445..d05acd28506ee9021bd44ae071021198d2e200b4 100644 (file)
@@ -151,6 +151,7 @@ radv_optimize_nir(struct nir_shader *shader)
                 progress = false;
 
                 NIR_PASS_V(shader, nir_lower_vars_to_ssa);
+               NIR_PASS_V(shader, nir_lower_64bit_pack);
                 NIR_PASS_V(shader, nir_lower_alu_to_scalar);
                 NIR_PASS_V(shader, nir_lower_phis_to_scalar);