int rel_index, unsigned num);
LLVMValueRef si_get_sample_id(struct si_shader_context *ctx);
-void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base);
+void si_shader_context_init_alu(struct si_shader_context *ctx);
void si_shader_context_init_mem(struct si_shader_context *ctx);
LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
*/
#include "si_shader_internal.h"
+#include "si_pipe.h"
#include "ac_llvm_util.h"
void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
ctx->ac.i32, &in, 1, 0);
}
-void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
+void si_shader_context_init_alu(struct si_shader_context *ctx)
{
+ struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
+
lp_set_default_actions(bld_base);
bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32";
bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
- bld_base->op_actions[TGSI_OPCODE_FMA].emit =
- bld_base->op_actions[TGSI_OPCODE_MAD].emit;
+
+ /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
+ if (ctx->screen->info.chip_class >= GFX10) {
+ bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32";
+ } else {
+ bld_base->op_actions[TGSI_OPCODE_FMA].emit =
+ bld_base->op_actions[TGSI_OPCODE_MAD].emit;
+ }
+
bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
- si_shader_context_init_alu(&ctx->bld_base);
+ si_shader_context_init_alu(ctx);
si_shader_context_init_mem(ctx);
ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);