From ff947c6d65830b7be6e9fcbfe666fa7dba6341f6 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 24 Sep 2012 16:04:26 +0200 Subject: [PATCH] radeon/llvm: improve select_cc lowering to generate CND* more often v2: - Simplify isZero() - Remove a unused function prototype - Clean whitespace trails Reviewed-by: Tom Stellard --- src/gallium/drivers/r600/r600_llvm.c | 15 ++++ .../drivers/radeon/R600ISelLowering.cpp | 89 +++++++++++-------- src/gallium/drivers/radeon/R600ISelLowering.h | 2 + .../drivers/radeon/R600Instructions.td | 38 ++++++-- 4 files changed, 103 insertions(+), 41 deletions(-) diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index dd0a714ebf6..71ea5781220 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -165,6 +165,20 @@ static void llvm_emit_tex( emit_data->dst_type, args, c, LLVMReadNoneAttribute); } +static void emit_cndlt( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef float_zero = lp_build_const_float( + bld_base->base.gallivm, 0.0f); + LLVMValueRef cmp = LLVMBuildFCmp( + builder, LLVMRealULT, emit_data->args[0], float_zero, ""); + emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, + cmp, emit_data->args[1], emit_data->args[2], ""); +} + static void dp_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) @@ -241,6 +255,7 @@ LLVMModuleRef r600_tgsi_llvm( bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt; lp_build_tgsi_llvm(bld_base, tokens); diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp index 2fc9c6708ef..5dd2f5334c5 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp @@ -516,6 +516,17 @@ SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const Op.getOperand(1))); } +bool R600TargetLowering::isZero(SDValue Op) const +{ + if(ConstantSDNode *Cst = dyn_cast(Op)) { + return Cst->isNullValue(); + } else if(ConstantFPSDNode *CstFP = dyn_cast(Op)){ + return CstFP->isZero(); + } else { + return false; + } +} + SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); @@ -568,47 +579,58 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const if (isHWTrueValue(False) && isHWFalseValue(True)) { } - // XXX Check if we can lower this to a SELECT or if it is supported by a native - // operation. (The code below does this but we don't have the Instruction - // selection patterns to do this yet. -#if 0 + // Check if we can lower this to a native operation. + // CND* instructions requires all operands to have the same type, + // and RHS to be zero. + if (isZero(LHS) || isZero(RHS)) { SDValue Cond = (isZero(LHS) ? RHS : LHS); - bool SwapTF = false; + SDValue Zero = (isZero(LHS) ? LHS : RHS); + ISD::CondCode CCOpcode = cast(CC)->get(); + if (CompareVT != VT) { + True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); + False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); + } + if (isZero(LHS)) { + CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode); + } + switch (CCOpcode) { - case ISD::SETOEQ: - case ISD::SETUEQ: - case ISD::SETEQ: - SwapTF = true; - // Fall through case ISD::SETONE: case ISD::SETUNE: case ISD::SETNE: - // We can lower to select - if (SwapTF) { - Temp = True; - True = False; - False = Temp; - } - // CNDE - return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); + case ISD::SETULE: + case ISD::SETULT: + case ISD::SETOLE: + case ISD::SETOLT: + case ISD::SETLE: + case ISD::SETLT: + CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); + Temp = True; + True = False; + False = Temp; + break; default: - // Supported by a native operation (CNDGE, CNDGT) - return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); + break; } + SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, + Cond, Zero, + True, False, + DAG.getCondCode(CCOpcode)); + return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); } -#endif + // If we make it this for it means we have no native instructions to handle // this SELECT_CC, so we must lower it. SDValue HWTrue, HWFalse; - if (VT == MVT::f32) { - HWTrue = DAG.getConstantFP(1.0f, VT); - HWFalse = DAG.getConstantFP(0.0f, VT); - } else if (VT == MVT::i32) { - HWTrue = DAG.getConstant(-1, VT); - HWFalse = DAG.getConstant(0, VT); + if (CompareVT == MVT::f32) { + HWTrue = DAG.getConstantFP(1.0f, CompareVT); + HWFalse = DAG.getConstantFP(0.0f, CompareVT); + } else if (CompareVT == MVT::i32) { + HWTrue = DAG.getConstant(-1, CompareVT); + HWFalse = DAG.getConstant(0, CompareVT); } else { assert(!"Unhandled value type in LowerSELECT_CC"); @@ -616,15 +638,12 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const // Lower this unsupported SELECT_CC into a combination of two supported // SELECT_CC operations. - SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC); - - // Convert floating point condition to i1 - if (VT == MVT::f32) { - Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, - DAG.getNode(ISD::FNEG, DL, VT, Cond)); - } + SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); - return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); + return DAG.getNode(ISD::SELECT_CC, DL, VT, + Cond, HWFalse, + True, False, + DAG.getCondCode(ISD::SETNE)); } SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h index 7b9c27ee12e..7df2dd13787 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.h +++ b/src/gallium/drivers/radeon/R600ISelLowering.h @@ -60,6 +60,8 @@ private: SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; + + bool isZero(SDValue Op) const; }; } // End namespace llvm; diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index feb97fac822..1689a2f4ab8 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -545,7 +545,25 @@ def SETGE_UINT : R600_2OP < def CNDE_INT : R600_3OP < 0x1C, "CNDE_INT", [(set (i32 R600_Reg32:$dst), - (select R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] + (selectcc (i32 R600_Reg32:$src0), 0, + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), + COND_EQ))] +>; + +def CNDGE_INT : R600_3OP < + 0x1E, "CNDGE_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), 0, + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), + COND_GE))] +>; + +def CNDGT_INT : R600_3OP < + 0x1D, "CNDGT_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), 0, + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), + COND_GT))] >; //===----------------------------------------------------------------------===// @@ -642,18 +660,26 @@ class MULADD_Common inst> : R600_3OP < class CNDE_Common inst> : R600_3OP < inst, "CNDE", - [(set (f32 R600_Reg32:$dst), - (select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 R600_Reg32:$src2), (f32 R600_Reg32:$src1)))] + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), FP_ZERO, + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), + COND_EQ))] >; class CNDGT_Common inst> : R600_3OP < inst, "CNDGT", - [] + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), FP_ZERO, + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), + COND_GT))] >; - + class CNDGE_Common inst> : R600_3OP < inst, "CNDGE", - [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), FP_ZERO, + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), + COND_GE))] >; class DOT4_Common inst> : R600_REDUCTION < -- 2.30.2