From d64593e3c464f7c612a8d82eccafd23a08c8c167 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 26 Aug 2019 16:19:31 -0400 Subject: [PATCH] ac: use fma on gfx10 Acked-by: Pierre-Eric Pelloux-Prayer --- src/amd/common/ac_llvm_build.c | 7 +++++++ src/amd/common/ac_nir_to_llvm.c | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 94ec569ad9f..a1e3ce6d241 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -2665,6 +2665,13 @@ LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1, LLVMValueRef s2) { + /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */ + if (ctx->chip_class >= GFX10) { + return ac_build_intrinsic(ctx, "llvm.fma.f32", ctx->f32, + (LLVMValueRef []) {s0, s1, s2}, 3, + AC_FUNC_ATTR_READNONE); + } + return LLVMBuildFAdd(ctx->builder, LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, ""); } diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 5ed21b1650b..7294b293ca2 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -811,7 +811,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) } break; case nir_op_ffma: - result = emit_intrin_3f_param(&ctx->ac, "llvm.fmuladd", + /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */ + result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd", ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); break; case nir_op_ldexp: -- 2.30.2