ac/llvm: fix 64-bit fmed3
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 26 Feb 2020 14:04:38 +0000 (15:04 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 27 Feb 2020 07:04:28 +0000 (08:04 +0100)
Lower 64-bit fmed3 because LLVM doesn't expose an intrinsic.

Fixes dEQP-VK.spirv_assembly.instruction.amd_trinary_minmax.mid3.f64.*.

Fixes: d6a07732c9c ("ac: use llvm.amdgcn.fmed3 intrinsic for nir_op_fmed3")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3962>

src/amd/llvm/ac_llvm_build.c

index 8d706944a453be6caff08f225ab9f644f2f52615..db7964d6aa9315e1cf2c509b9c84f301672f87c4 100644 (file)
@@ -2723,27 +2723,41 @@ LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
                            LLVMValueRef src1, LLVMValueRef src2,
                            unsigned bitsize)
 {
-       LLVMTypeRef type;
-       char *intr;
+       LLVMValueRef result;
 
-       if (bitsize == 16) {
-               intr = "llvm.amdgcn.fmed3.f16";
-               type = ctx->f16;
-       } else if (bitsize == 32) {
-               intr = "llvm.amdgcn.fmed3.f32";
-               type = ctx->f32;
+       if (bitsize == 64) {
+               /* Lower 64-bit fmed because LLVM doesn't expose an intrinsic. */
+               LLVMValueRef min1, min2, max1;
+
+               min1 = ac_build_fmin(ctx, src0, src1);
+               max1 = ac_build_fmax(ctx, src0, src1);
+               min2 = ac_build_fmin(ctx, max1, src2);
+
+               result = ac_build_fmax(ctx, min2, min1);
        } else {
-               intr = "llvm.amdgcn.fmed3.f64";
-               type = ctx->f64;
+               LLVMTypeRef type;
+               char *intr;
+
+               if (bitsize == 16) {
+                       intr = "llvm.amdgcn.fmed3.f16";
+                       type = ctx->f16;
+               } else {
+                       assert(bitsize == 32);
+                       intr = "llvm.amdgcn.fmed3.f32";
+                       type = ctx->f32;
+               }
+
+               LLVMValueRef params[] = {
+                       src0,
+                       src1,
+                       src2,
+               };
+
+               result = ac_build_intrinsic(ctx, intr, type, params, 3,
+                                           AC_FUNC_ATTR_READNONE);
        }
 
-       LLVMValueRef params[] = {
-               src0,
-               src1,
-               src2,
-       };
-       return ac_build_intrinsic(ctx, intr, type, params, 3,
-                                 AC_FUNC_ATTR_READNONE);
+       return result;
 }
 
 LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,