ac/nir: fix 64-bit division for GL CTS
authorMarek Olšák <marek.olsak@amd.com>
Wed, 17 Jun 2020 19:25:28 +0000 (15:25 -0400)
committerMarge Bot <eric+marge@anholt.net>
Tue, 23 Jun 2020 04:46:55 +0000 (04:46 +0000)
This fixes: KHR-GL45.gpu_shader_fp64.builtin.mod_*

Fixes: ba2ec1f3 "ac/nir: use llvm.amdgcn.rcp in ac_build_fdiv()"
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5531>

src/amd/llvm/ac_llvm_build.c
src/amd/llvm/ac_nir_to_llvm.c

index 77681834ffae7c31889110bc5b80118cf90d4e83..6d5d1f7bab27d0ba9fc54830ed4c1b8dd8d24aa6 100644 (file)
@@ -705,6 +705,11 @@ ac_build_fdiv(struct ac_llvm_context *ctx,
        unsigned type_size = ac_get_type_size(LLVMTypeOf(den));
        const char *name;
 
+       /* For doubles, we need precise division to pass GLCTS. */
+       if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL &&
+           type_size == 8)
+               return LLVMBuildFDiv(ctx->builder, num, den, "");
+
        if (type_size == 2)
                name = "llvm.amdgcn.rcp.f16";
        else if (type_size == 4)
index b90a7e3dcf2b58055a2bf2fbcf1090a84a11531e..bd3cc246e5cd7c496f3d0af23a44bf6b17aaf011 100644 (file)
@@ -704,8 +704,15 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
                result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
                break;
        case nir_op_frcp:
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
+               /* For doubles, we need precise division to pass GLCTS. */
+               if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL &&
+                   ac_get_type_size(def_type) == 8) {
+                       result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1,
+                                              ac_to_float(&ctx->ac, src[0]), "");
+               } else {
+                       result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp",
+                                                     ac_to_float_type(&ctx->ac, def_type), src[0]);
+               }
                break;
        case nir_op_iand:
                result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");