From: Tom Stellard Date: Fri, 18 May 2012 20:58:31 +0000 (-0400) Subject: radeon/llvm: Handle selectcc DAG node X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=cee23ab246f22210b3063cdc47bdb45b3d943526;p=mesa.git radeon/llvm: Handle selectcc DAG node R600 can now select instructions from the selectcc DAG node, which is typically lowered to one of the SET* instructions. --- diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp index 8df80352085..a6827e5a24a 100644 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp @@ -23,6 +23,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : { // We need to custom lower some of the intrinsics setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); } SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) @@ -31,6 +34,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) switch (Op.getOpcode()) { default: return AMDILTargetLowering::LowerOperation(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); } } @@ -101,6 +105,143 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, OneSubAC); } +SDValue AMDGPUTargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue True = Op.getOperand(2); + SDValue False = Op.getOperand(3); + SDValue CC = Op.getOperand(4); + ISD::CondCode CCOpcode = cast(CC)->get(); + SDValue Temp; + + //cmovlog = src0 != 0.0f ? src1 : src2 + //cmovlog = src0 == 0.0f ? src2 : src1 + //cnde = src0 == 0.0f ? src1 : src2 + + // LHS and RHS are guaranteed to be the same value type + EVT CompareVT = LHS.getValueType(); + + // We need all the operands of SELECT_CC to have the same value type, so if + // necessary we need to convert LHS and RHS to be the same type True and + // False. True and False are guaranteed to have the same type as this + // SELECT_CC node. + + if (CompareVT != VT) { + ISD::NodeType ConversionOp = ISD::DELETED_NODE; + if (VT == MVT::f32 && CompareVT == MVT::i32) { + if (isUnsignedIntSetCC(CCOpcode)) { + ConversionOp = ISD::UINT_TO_FP; + } else { + ConversionOp = ISD::SINT_TO_FP; + } + } else if (VT == MVT::i32 && CompareVT == MVT::f32) { + ConversionOp = ISD::FP_TO_SINT; + } else { + // I don't think there will be any other type pairings. + assert(!"Unhandled operand type parings in SELECT_CC"); + } + // XXX Check the value of LHS and RHS and avoid creating sequences like + // (FTOI (ITOF)) + LHS = DAG.getNode(ConversionOp, DL, VT, LHS); + RHS = DAG.getNode(ConversionOp, DL, VT, RHS); + } + + // If true is 1 and false is 0 or vice-versa we can handle this with a native + // instruction (SET* instructions). + if ((isOne(True) && isZero(False))) { + return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); + } + + // XXX If true is 0 and 1 is false, we can handle this with a native + // instruction, but we need to swap true and false and change the + // conditional. + if (isOne(False) && isZero(True)) { + } + + // XXX Check if we can lower this to a SELECT or if it is supported by a native + // operation. (The code below does this but we don't have the Instruction + // selection patterns to do this yet. +#if 0 + if (isZero(LHS) || isZero(RHS)) { + SDValue Cond = (isZero(LHS) ? RHS : LHS); + bool SwapTF = false; + switch (CCOpcode) { + case ISD::SETOEQ: + case ISD::SETUEQ: + case ISD::SETEQ: + SwapTF = true; + // Fall through + case ISD::SETONE: + case ISD::SETUNE: + case ISD::SETNE: + // We can lower to select + if (SwapTF) { + Temp = True; + True = False; + False = Temp; + } + // CNDE + return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); + default: + // Supported by a native operation (CNDGE, CNDGT) + return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); + } + } +#endif + + // If we make it this for it means we have no native instructions to handle + // this SELECT_CC, so we must lower it. + SDValue One, Zero; + + if (VT == MVT::f32) { + One = DAG.getConstantFP(1.0f, VT); + Zero = DAG.getConstantFP(0.0f, VT); + } else if (VT == MVT::i32) { + One = DAG.getConstant(1, VT); + Zero = DAG.getConstant(0, VT); + } + else { + assert(!"Unhandled value type in LowerSELECT_CC"); + } + + // Lower this unsupported SELECT_CC into a combination of two supported + // SELECT_CC operations. + SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, One, Zero, CC); + + return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); +} + +//===----------------------------------------------------------------------===// +// Helper functions +//===----------------------------------------------------------------------===// + +bool AMDGPUTargetLowering::isOne(SDValue Op) const +{ + if (ConstantFPSDNode * CFP = dyn_cast(Op)) { + return CFP->isExactlyValue(1.0); + } + if (ConstantSDNode *C = dyn_cast(Op)) { + return C->isOne(); + } + return false; +} + +bool AMDGPUTargetLowering::isZero(SDValue Op) const +{ + if (ConstantFPSDNode * CFP = dyn_cast(Op)) { + return CFP->getValueAPF().isZero(); + } + if (ConstantSDNode *C = dyn_cast(Op)) { + return C->isNullValue(); + } + return false; +} + void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI, MachineFunction * MF, MachineRegisterInfo & MRI, const TargetInstrInfo * TII, unsigned reg) const diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h index 849c0c734bb..d37473974bb 100644 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h @@ -23,6 +23,7 @@ class AMDGPUTargetLowering : public AMDILTargetLowering { private: SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; protected: @@ -36,6 +37,9 @@ protected: MachineRegisterInfo & MRI, const TargetInstrInfo * TII, unsigned reg) const; + bool isOne(SDValue Op) const; + bool isZero(SDValue Op) const; + public: AMDGPUTargetLowering(TargetMachine &TM); diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp index 1fffb99c601..05a31d3ff9b 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp @@ -36,6 +36,13 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); setOperationAction(ISD::FSUB, MVT::f32, Expand); + +#if 0 + + setTargetDAGCombine(ISD::Constant); + setTargetDAGCombine(ISD::ConstantFP); + +#endif } MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp index 2bd59fd5e1b..af106b225f9 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.cpp +++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp @@ -69,6 +69,10 @@ unsigned R600InstrInfo::getISAOpcode(unsigned opcode) const return AMDIL::ADD_INT; case AMDIL::CUSTOM_XOR_i32: return AMDIL::XOR_INT; + case AMDIL::IEQ: + return AMDIL::SETE_INT; + case AMDIL::INE: + return AMDIL::SETNE_INT; case AMDIL::MOVE_f32: case AMDIL::MOVE_i32: return AMDIL::MOV; diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index a42d917e11d..1f2c2d0f1e8 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -119,6 +119,57 @@ def TEX_SHADOW : PatLeaf< }] >; +def FP_ZERO : PatLeaf < + (fpimm), + [{return N->getValueAPF().isZero();}] +>; + +def FP_ONE : PatLeaf < + (fpimm), + [{return N->isExactlyValue(1.0);}] +>; + +def COND_EQ : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOEQ: case ISD::SETUEQ: + case ISD::SETEQ: return true;}}}] +>; + +def COND_NE : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETONE: case ISD::SETUNE: + case ISD::SETNE: return true;}}}] +>; +def COND_GT : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOGT: case ISD::SETUGT: + case ISD::SETGT: return true;}}}] +>; + +def COND_GE : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOGE: case ISD::SETUGE: + case ISD::SETGE: return true;}}}] +>; + +def COND_LT : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOLT: case ISD::SETULT: + case ISD::SETLT: return true;}}}] +>; + +def COND_LE : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOLE: case ISD::SETULE: + case ISD::SETLE: return true;}}}] +>; + class EG_CF_RAT cf_inst, bits <6> rat_inst, dag outs, dag ins, string asm> : InstR600ISA @@ -275,26 +326,35 @@ def MIN : R600_2OP < def SETE : R600_2OP < 0x08, "SETE", - [(set R600_Reg32:$dst, (int_AMDGPU_seq R600_Reg32:$src0, R600_Reg32:$src1))]> { - let AMDILOp = AMDILInst.FEQ; -} + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, + COND_EQ))] +>; +//let AMDILOp = AMDILInst.FEQ; def SGT : R600_2OP < 0x09, "SETGT", - [(set R600_Reg32:$dst, (int_AMDGPU_sgt R600_Reg32:$src0, R600_Reg32:$src1))] + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, + COND_GT))] >; def SGE : R600_2OP < 0xA, "SETGE", - [(set R600_Reg32:$dst, (int_AMDGPU_sge R600_Reg32:$src0, R600_Reg32:$src1))]> { - let AMDILOp = AMDILInst.FGE; -} + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, + COND_GE))] +>; +//let AMDILOp = AMDILInst.FGE; def SNE : R600_2OP < 0xB, "SETNE", - [(set R600_Reg32:$dst, (int_AMDGPU_sne R600_Reg32:$src0, R600_Reg32:$src1))]> { - let AMDILOp = AMDILInst.FNE; -} + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, + COND_NE))] +>; + +// let AMDILOp = AMDILInst.FNE; def FRACT : R600_1OP < 0x10, "FRACT", @@ -385,38 +445,48 @@ def MIN_UINT : R600_2OP < def SETE_INT : R600_2OP < 0x3A, "SETE_INT", - []>{ - let AMDILOp = AMDILInst.IEQ; -} + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, 1, 0, SETEQ))] +>; + +// let AMDILOp = AMDILInst.IEQ; def SETGT_INT : R600_2OP < 0x3B, "SGT_INT", - [] + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, 1, 0, SETGT))] >; def SETGE_INT : R600_2OP < 0x3C, "SETGE_INT", - []>{ - let AMDILOp = AMDILInst.IGE; -} + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, 1, 0, SETGE))] +>; +// let AMDILOp = AMDILInst.IGE; + def SETNE_INT : R600_2OP < 0x3D, "SETNE_INT", - []>{ - let AMDILOp = AMDILInst.INE; -} + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, 1, 0, SETNE))] +>; +//let AMDILOp = AMDILInst.INE; + def SETGT_UINT : R600_2OP < 0x3E, "SETGT_UINT", - []>{ - let AMDILOp = AMDILInst.UGT; -} + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, 1, 0, SETUGT))] +>; + +// let AMDILOp = AMDILInst.UGT; def SETGE_UINT : R600_2OP < 0x3F, "SETGE_UINT", - []>{ - let AMDILOp = AMDILInst.UGE; -} + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, 1, 0, SETUGE))] +>; +// let AMDILOp = AMDILInst.UGE; def CNDE_INT : R600_3OP < 0x1C, "CNDE_INT", @@ -1046,6 +1116,66 @@ def LOAD_VTX : AMDGPUShaderInst < } //End isPseudo +//===----------------------------------------------------------------------===// +// ISel Patterns +//===----------------------------------------------------------------------===// + +// SGT Reverse args +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT), + (SGT R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SGE Reverse args +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), + (SGE R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGT_INT reverse args +def : Pat < + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, 1, 0, SETLT), + (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGE_INT reverse args +def : Pat < + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, 1, 0, SETLE), + (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGT_UINT reverse args +def : Pat < + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, 1, 0, SETULT), + (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGE_UINT reverse args +def : Pat < + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, 1, 0, SETULE), + (SETGE_UINT R600_Reg32:$src0, R600_Reg32:$src1) +>; + +// The next two patterns are special cases for handling 'true if ordered' and +// 'true if unordered' conditionals. The assumption here is that the behavior of +// SETE and SNE conforms to the Direct3D 10 rules for floating point values +// described here: +// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit +// We assume that SETE returns false when one of the operands is NAN and +// SNE returns true when on of the operands is NAN + +//SETE - 'true if ordered' +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO), + (SETE R600_Reg32:$src0, R600_Reg32:$src1) +>; + +//SNE - 'true if unordered' +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), + (SNE R600_Reg32:$src0, R600_Reg32:$src1) +>; + def : Extract_Element ; def : Extract_Element ; def : Extract_Element ; diff --git a/src/gallium/drivers/radeon/loader.cpp b/src/gallium/drivers/radeon/loader.cpp index 1eae1735191..79348d59430 100644 --- a/src/gallium/drivers/radeon/loader.cpp +++ b/src/gallium/drivers/radeon/loader.cpp @@ -30,5 +30,5 @@ int main(int argc, char ** argv) Module * mod = M.get(); - radeon_llvm_compile(wrap(mod), &bytes, &byte_count, "SI", 1); + radeon_llvm_compile(wrap(mod), &bytes, &byte_count, "redwood", 1); } diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index d71481b46da..9c2c449db65 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -57,19 +57,6 @@ unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan) return (index * 4) + chan; } -static void radeon_llvm_fetch_args_2_reverse_soa( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - assert(emit_data->info->num_src == 2); - emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, - 1, emit_data->chan); - emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, - 0, emit_data->chan); - emit_data->arg_count = 2; - emit_data->dst_type = LLVMTypeOf(emit_data->args[0]); -} - static LLVMValueRef emit_swizzle( struct lp_build_tgsi_context * bld_base, LLVMValueRef value, @@ -707,6 +694,37 @@ static void emit_icmp( emit_data->output[emit_data->chan] = v; } +static void emit_cmp( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMRealPredicate pred; + LLVMValueRef cond; + + /* XXX I'm not sure whether to do unordered or ordered comparisons, + * but llvmpipe uses unordered comparisons, so for consistency we use + * unordered. (The authors of llvmpipe aren't sure about using + * unordered vs ordered comparisons either. + */ + switch (emit_data->inst->Instruction.Opcode) { + case TGSI_OPCODE_SGE: pred = LLVMRealUGE; break; + case TGSI_OPCODE_SEQ: pred = LLVMRealUEQ; break; + case TGSI_OPCODE_SLE: pred = LLVMRealULE; break; + case TGSI_OPCODE_SLT: pred = LLVMRealULT; break; + case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break; + case TGSI_OPCODE_SGT: pred = LLVMRealUGT; break; + default: assert(!"unknown instruction"); + } + + cond = LLVMBuildFCmp(builder, + pred, emit_data->args[0], emit_data->args[1], ""); + + emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, + cond, bld_base->base.one, bld_base->base.zero, ""); +} + static void emit_not( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, @@ -1120,20 +1138,12 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_RCP].intr_name = "llvm.AMDGPU.rcp"; bld_base->op_actions[TGSI_OPCODE_SSG].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_SSG].intr_name = "llvm.AMDGPU.ssg"; - bld_base->op_actions[TGSI_OPCODE_SGE].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_SGE].intr_name = "llvm.AMDGPU.sge"; - bld_base->op_actions[TGSI_OPCODE_SEQ].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_SEQ].intr_name = "llvm.AMDGPU.seq"; - bld_base->op_actions[TGSI_OPCODE_SLE].fetch_args = radeon_llvm_fetch_args_2_reverse_soa; - bld_base->op_actions[TGSI_OPCODE_SLE].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_SLE].intr_name = "llvm.AMDGPU.sge"; - bld_base->op_actions[TGSI_OPCODE_SLT].fetch_args = radeon_llvm_fetch_args_2_reverse_soa; - bld_base->op_actions[TGSI_OPCODE_SLT].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_SLT].intr_name = "llvm.AMDGPU.sgt"; - bld_base->op_actions[TGSI_OPCODE_SNE].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_SNE].intr_name = "llvm.AMDGPU.sne"; - bld_base->op_actions[TGSI_OPCODE_SGT].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_SGT].intr_name = "llvm.AMDGPU.sgt"; + bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp; + bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp; + bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_cmp; + bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_cmp; + bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_cmp; + bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_cmp; bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.AMDGPU.sin"; bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;