From 7727fbb7c5d64348994bce6682e681d6181a91e9 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 13 Aug 2013 18:59:35 +0200 Subject: [PATCH] r600/radeonsi: implement new float comparison instructions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Also use ordered comparisons for old cmp instructions. Tested-by: Michel Dänzer Reviewed-by: Tom Stellard --- src/gallium/drivers/r600/r600_shader.c | 18 +++---- .../drivers/radeon/radeon_setup_tgsi_llvm.c | 49 +++++++++++++++---- 2 files changed, 48 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 37298ccdcf2..fb766c491cd 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -5743,11 +5743,10 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {105, 0, ALU_OP0_NOP, tgsi_unsupported}, {106, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported}, - /* gap */ - {108, 0, ALU_OP0_NOP, tgsi_unsupported}, - {109, 0, ALU_OP0_NOP, tgsi_unsupported}, - {110, 0, ALU_OP0_NOP, tgsi_unsupported}, - {111, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2}, + {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2}, + {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap}, + {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap}, {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ @@ -5936,11 +5935,10 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {105, 0, ALU_OP0_NOP, tgsi_unsupported}, {106, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported}, - /* gap */ - {108, 0, ALU_OP0_NOP, tgsi_unsupported}, - {109, 0, ALU_OP0_NOP, tgsi_unsupported}, - {110, 0, ALU_OP0_NOP, tgsi_unsupported}, - {111, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2}, + {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2}, + {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap}, + {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap}, {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 7a47746ce1e..8ff9abd5ed9 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -850,18 +850,16 @@ static void emit_cmp( LLVMRealPredicate pred; LLVMValueRef cond; - /* XXX I'm not sure whether to do unordered or ordered comparisons, - * but llvmpipe uses unordered comparisons, so for consistency we use - * unordered. (The authors of llvmpipe aren't sure about using - * unordered vs ordered comparisons either. + /* Use ordered for everything but NE (which is usual for + * float comparisons) */ switch (emit_data->inst->Instruction.Opcode) { - case TGSI_OPCODE_SGE: pred = LLVMRealUGE; break; - case TGSI_OPCODE_SEQ: pred = LLVMRealUEQ; break; - case TGSI_OPCODE_SLE: pred = LLVMRealULE; break; - case TGSI_OPCODE_SLT: pred = LLVMRealULT; break; + case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break; + case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break; + case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break; + case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break; case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break; - case TGSI_OPCODE_SGT: pred = LLVMRealUGT; break; + case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break; default: assert(!"unknown instruction"); pred = 0; break; } @@ -872,6 +870,35 @@ static void emit_cmp( cond, bld_base->base.one, bld_base->base.zero, ""); } +static void emit_fcmp( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMContextRef context = bld_base->base.gallivm->context; + LLVMRealPredicate pred; + + /* Use ordered for everything but NE (which is usual for + * float comparisons) + */ + switch (emit_data->inst->Instruction.Opcode) { + case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break; + case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break; + case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break; + case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break; + default: assert(!"unknown instruction"); pred = 0; break; + } + + LLVMValueRef v = LLVMBuildFCmp(builder, pred, + emit_data->args[0], emit_data->args[1],""); + + v = LLVMBuildSExtOrBitCast(builder, v, + LLVMInt32TypeInContext(context), ""); + + emit_data->output[emit_data->chan] = v; +} + static void emit_not( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, @@ -1236,6 +1263,10 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction."; bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i; bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u; + bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp; + bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp; + bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp; + bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp; bld_base->op_actions[TGSI_OPCODE_IABS].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs."; bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv; -- 2.30.2