From 2f42d4cacc9a12bc210c279d86833bf210c9c806 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 26 Aug 2019 16:20:44 -0400 Subject: [PATCH] radeonsi/gfx10: use fma for TGSI_OPCODE_FMA Acked-by: Pierre-Eric Pelloux-Prayer --- .../drivers/radeonsi/si_shader_internal.h | 2 +- .../drivers/radeonsi/si_shader_tgsi_alu.c | 17 ++++++++++++++--- .../drivers/radeonsi/si_shader_tgsi_setup.c | 2 +- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index b576d94a63f..98abbdfc693 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -342,7 +342,7 @@ LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx, int rel_index, unsigned num); LLVMValueRef si_get_sample_id(struct si_shader_context *ctx); -void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base); +void si_shader_context_init_alu(struct si_shader_context *ctx); void si_shader_context_init_mem(struct si_shader_context *ctx); LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index adad3223d99..4be410ec331 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -23,6 +23,7 @@ */ #include "si_shader_internal.h" +#include "si_pipe.h" #include "ac_llvm_util.h" void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible) @@ -677,8 +678,10 @@ static void dfracexp_emit(const struct lp_build_tgsi_action *action, ctx->ac.i32, &in, 1, 0); } -void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base) +void si_shader_context_init_alu(struct si_shader_context *ctx) { + struct lp_build_tgsi_context *bld_base = &ctx->bld_base; + lp_set_default_actions(bld_base); bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and; @@ -722,8 +725,16 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base) bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32"; bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32"; - bld_base->op_actions[TGSI_OPCODE_FMA].emit = - bld_base->op_actions[TGSI_OPCODE_MAD].emit; + + /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */ + if (ctx->screen->info.chip_class >= GFX10) { + bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32"; + } else { + bld_base->op_actions[TGSI_OPCODE_FMA].emit = + bld_base->op_actions[TGSI_OPCODE_MAD].emit; + } + bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac; bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i; bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u; diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 39abd4b18f6..1c6522322ff 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -1008,7 +1008,7 @@ void si_llvm_context_init(struct si_shader_context *ctx, bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; - si_shader_context_init_alu(&ctx->bld_base); + si_shader_context_init_alu(ctx); si_shader_context_init_mem(ctx); ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context); -- 2.30.2