From 0eca5fd919b0a31ea926b5f5072e5e56f7a55269 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Wed, 1 Aug 2012 22:49:42 +0200 Subject: [PATCH] radeon/llvm: Lower branch/branch_cond into predicated jump Signed-off-by: Tom Stellard --- .../drivers/radeon/AMDGPUInstrInfo.cpp | 118 ----------- src/gallium/drivers/radeon/AMDGPUInstrInfo.h | 14 -- .../drivers/radeon/AMDILCFGStructurizer.cpp | 57 +++-- src/gallium/drivers/radeon/AMDILInstrInfo.td | 2 +- .../drivers/radeon/R600ISelLowering.cpp | 27 +++ src/gallium/drivers/radeon/R600InstrInfo.cpp | 196 ++++++++++++++++++ src/gallium/drivers/radeon/R600InstrInfo.h | 9 + 7 files changed, 278 insertions(+), 145 deletions(-) diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp index 2af036740ae..03a647e4387 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp +++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp @@ -97,124 +97,6 @@ bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter, return false; } -bool AMDGPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const { - bool retVal = true; - return retVal; - MachineBasicBlock::iterator iter = MBB.begin(); - if (!getNextBranchInstr(iter, MBB)) { - retVal = false; - } else { - MachineInstr *firstBranch = iter; - if (!getNextBranchInstr(++iter, MBB)) { - if (firstBranch->getOpcode() == AMDGPU::BRANCH) { - TBB = firstBranch->getOperand(0).getMBB(); - firstBranch->eraseFromParent(); - retVal = false; - } else { - TBB = firstBranch->getOperand(0).getMBB(); - FBB = *(++MBB.succ_begin()); - if (FBB == TBB) { - FBB = *(MBB.succ_begin()); - } - Cond.push_back(firstBranch->getOperand(1)); - retVal = false; - } - } else { - MachineInstr *secondBranch = iter; - if (!getNextBranchInstr(++iter, MBB)) { - if (secondBranch->getOpcode() == AMDGPU::BRANCH) { - TBB = firstBranch->getOperand(0).getMBB(); - Cond.push_back(firstBranch->getOperand(1)); - FBB = secondBranch->getOperand(0).getMBB(); - secondBranch->eraseFromParent(); - retVal = false; - } else { - assert(0 && "Should not have two consecutive conditional branches"); - } - } else { - MBB.getParent()->viewCFG(); - assert(0 && "Should not have three branch instructions in" - " a single basic block"); - retVal = false; - } - } - } - return retVal; -} - -unsigned int AMDGPUInstrInfo::getBranchInstr(const MachineOperand &op) const { - const MachineInstr *MI = op.getParent(); - - switch (MI->getDesc().OpInfo->RegClass) { - default: // FIXME: fallthrough?? - case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; - case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; - }; -} - -unsigned int -AMDGPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const -{ - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - for (unsigned int x = 0; x < Cond.size(); ++x) { - Cond[x].getParent()->dump(); - } - if (FBB == 0) { - if (Cond.empty()) { - BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(TBB); - } else { - BuildMI(&MBB, DL, get(getBranchInstr(Cond[0]))) - .addMBB(TBB).addReg(Cond[0].getReg()); - } - return 1; - } else { - BuildMI(&MBB, DL, get(getBranchInstr(Cond[0]))) - .addMBB(TBB).addReg(Cond[0].getReg()); - BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(FBB); - } - assert(0 && "Inserting two branches not supported"); - return 0; -} - -unsigned int AMDGPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) { - return 0; - } - --I; - switch (I->getOpcode()) { - default: - return 0; - ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND); - case AMDGPU::BRANCH: - I->eraseFromParent(); - break; - } - I = MBB.end(); - - if (I == MBB.begin()) { - return 1; - } - --I; - switch (I->getOpcode()) { - // FIXME: only one case?? - default: - return 1; - ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND); - I->eraseFromParent(); - break; - } - return 2; -} - MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) { MachineBasicBlock::iterator tmp = MBB->end(); if (!MBB->size()) { diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h index 28952cfa60f..31400a7b60f 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h +++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h @@ -49,8 +49,6 @@ private: TargetMachine &TM; bool getNextBranchInstr(MachineBasicBlock::iterator &iter, MachineBasicBlock &MBB) const; - unsigned int getBranchInstr(const MachineOperand &op) const; - public: explicit AMDGPUInstrInfo(TargetMachine &tm); @@ -77,18 +75,6 @@ public: MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const; - bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const; - - unsigned RemoveBranch(MachineBasicBlock &MBB) const; - - unsigned - InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const; virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, diff --git a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp index 95a75ac0ffc..b167d62fa0a 100644 --- a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp +++ b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp @@ -301,6 +301,7 @@ public: bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); private: + void reversePredicateSetter(typename BlockT::iterator); void orderBlocks(); void printOrderedBlocks(llvm::raw_ostream &OS); int patternMatch(BlockT *CurBlock); @@ -1663,6 +1664,31 @@ void CFGStructurizer::mergeLooplandBlock(BlockT *dstBlk, retireBlock(dstBlk, landBlk); } //mergeLooplandBlock +template +void CFGStructurizer::reversePredicateSetter(typename BlockT::iterator I) +{ + while (I--) { + if (I->getOpcode() == AMDGPU::PRED_X) { + switch (static_cast(I)->getOperand(2).getImm()) { + case OPCODE_IS_ZERO_INT: + static_cast(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT); + return; + case OPCODE_IS_NOT_ZERO_INT: + static_cast(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT); + return; + case OPCODE_IS_ZERO: + static_cast(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO); + return; + case OPCODE_IS_NOT_ZERO: + static_cast(I)->getOperand(2).setImm(OPCODE_IS_ZERO); + return; + default: + assert(0 && "PRED_X Opcode invalid!"); + } + } + } +} + template void CFGStructurizer::mergeLoopbreakBlock(BlockT *exitingBlk, BlockT *exitBlk, @@ -1695,14 +1721,17 @@ void CFGStructurizer::mergeLoopbreakBlock(BlockT *exitingBlk, if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) { //break_logical - int newOpcode = - (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode) - : CFGTraits::getBreakZeroOpcode(oldOpcode); + + if (trueBranch != exitBlk) { + reversePredicateSetter(branchInstrPos); + } + int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode); CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL); } else { - int newOpcode = - (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode) - : CFGTraits::getBranchZeroOpcode(oldOpcode); + if (trueBranch != exitBlk) { + reversePredicateSetter(branchInstr); + } + int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode); CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL); if (exitBlk != exitLandBlk) { //splice is insert-before ... @@ -2765,7 +2794,7 @@ struct CFGStructTraits static int getBreakNzeroOpcode(int oldOpcode) { switch(oldOpcode) { - ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALNZ); + case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32; default: assert(0 && "internal error"); }; @@ -2774,7 +2803,7 @@ struct CFGStructTraits static int getBreakZeroOpcode(int oldOpcode) { switch(oldOpcode) { - ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALZ); + case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32; default: assert(0 && "internal error"); }; @@ -2783,6 +2812,7 @@ struct CFGStructTraits static int getBranchNzeroOpcode(int oldOpcode) { switch(oldOpcode) { + case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32; ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ); case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ; default: @@ -2793,6 +2823,7 @@ struct CFGStructTraits static int getBranchZeroOpcode(int oldOpcode) { switch(oldOpcode) { + case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32; ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ); case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z; default: @@ -2804,7 +2835,7 @@ struct CFGStructTraits static int getContinueNzeroOpcode(int oldOpcode) { switch(oldOpcode) { - ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALNZ); + case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; default: assert(0 && "internal error"); }; @@ -2813,7 +2844,7 @@ struct CFGStructTraits static int getContinueZeroOpcode(int oldOpcode) { switch(oldOpcode) { - ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALZ); + case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; default: assert(0 && "internal error"); }; @@ -2845,6 +2876,8 @@ struct CFGStructTraits static bool isCondBranch(MachineInstr *instr) { switch (instr->getOpcode()) { + case AMDGPU::JUMP: + return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0; ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND); case AMDGPU::SI_IF_NZ: case AMDGPU::SI_IF_Z: @@ -2857,8 +2890,8 @@ struct CFGStructTraits static bool isUncondBranch(MachineInstr *instr) { switch (instr->getOpcode()) { - case AMDGPU::BRANCH: - break; + case AMDGPU::JUMP: + return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0; default: return false; } diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.td b/src/gallium/drivers/radeon/AMDILInstrInfo.td index b2a0541ae3f..b683e49ea81 100644 --- a/src/gallium/drivers/radeon/AMDILInstrInfo.td +++ b/src/gallium/drivers/radeon/AMDILInstrInfo.td @@ -217,7 +217,7 @@ include "AMDILIntrinsics.td" // Custom Inserter for Branches and returns, this eventually will be a // seperate pass //===---------------------------------------------------------------------===// -let isTerminator = 1 in { +let isTerminator = 1, usesCustomInserter = 1 in { def BRANCH : ILFormat<(outs), (ins brtarget:$target), "; Pseudo unconditional branch instruction", [(br bb:$target)]>; diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp index 26f14fa49fa..1f5f4178f34 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp @@ -210,6 +210,33 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( .addReg(t1, RegState::Implicit); break; } + case AMDGPU::BRANCH: + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) + .addOperand(MI->getOperand(0)) + .addReg(0); + break; + case AMDGPU::BRANCH_COND_f32: + MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH); + + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) + .addReg(AMDGPU::PREDICATE_BIT) + .addOperand(MI->getOperand(1)) + .addImm(OPCODE_IS_ZERO); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) + .addOperand(MI->getOperand(0)) + .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + break; + case AMDGPU::BRANCH_COND_i32: + MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH); + + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) + .addReg(AMDGPU::PREDICATE_BIT) + .addOperand(MI->getOperand(1)) + .addImm(OPCODE_IS_ZERO_INT); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) + .addOperand(MI->getOperand(0)) + .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + break; } diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp index c807d5c440f..4a396ef1b47 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.cpp +++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp @@ -17,6 +17,7 @@ #include "R600RegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "AMDILUtilityFunctions.h" +#include "AMDGPUUtil.h" #define GET_INSTRINFO_CTOR #include "AMDGPUGenDFAPacketizer.inc" @@ -94,6 +95,8 @@ unsigned R600InstrInfo::getIEQOpcode() const bool R600InstrInfo::isMov(unsigned Opcode) const { + + switch(Opcode) { default: return false; case AMDGPU::MOV: @@ -188,6 +191,199 @@ DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, return TM->getSubtarget().createDFAPacketizer(II); } +static bool +isPredicateSetter(unsigned opcode) +{ + switch (opcode) { + case AMDGPU::PRED_X: + return true; + default: + return false; + } +} + +static MachineInstr * +findFirstPredicateSetterFrom(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) +{ + while (I != MBB.begin()) { + --I; + MachineInstr *MI = I; + if (isPredicateSetter(MI->getOpcode())) + return MI; + } + + return NULL; +} + +bool +R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const +{ + // Most of the following comes from the ARM implementation of AnalyzeBranch + + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (static_cast(I)->getOpcode() != AMDGPU::JUMP) { + return false; + } + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + unsigned LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || + static_cast(--I)->getOpcode() != AMDGPU::JUMP) { + if (LastOpc == AMDGPU::JUMP) { + if(!isPredicated(LastInst)) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else { + MachineInstr *predSet = I; + while (!isPredicateSetter(predSet->getOpcode())) { + predSet = --I; + } + TBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(predSet->getOperand(1)); + Cond.push_back(predSet->getOperand(2)); + Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); + return false; + } + } + return true; // Can't handle indirect branch. + } + + // Get the instruction before it if it is a terminator. + MachineInstr *SecondLastInst = I; + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + + // If the block ends with a B and a Bcc, handle it. + if (SecondLastOpc == AMDGPU::JUMP && + isPredicated(SecondLastInst) && + LastOpc == AMDGPU::JUMP && + !isPredicated(LastInst)) { + MachineInstr *predSet = --I; + while (!isPredicateSetter(predSet->getOpcode())) { + predSet = --I; + } + TBB = SecondLastInst->getOperand(0).getMBB(); + FBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(predSet->getOperand(1)); + Cond.push_back(predSet->getOperand(2)); + Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { + const MachineInstr *MI = op.getParent(); + + switch (MI->getDesc().OpInfo->RegClass) { + default: // FIXME: fallthrough?? + case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; + case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; + }; +} + +unsigned +R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const +{ + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + + if (FBB == 0) { + if (Cond.empty()) { + BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0); + return 1; + } else { + MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); + assert(PredSet && "No previous predicate !"); + PredSet->getOperand(1).addTargetFlag(1<<4); + PredSet->getOperand(2).setImm(Cond[1].getImm()); + + BuildMI(&MBB, DL, get(AMDGPU::JUMP)) + .addMBB(TBB) + .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + return 1; + } + } else { + MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); + assert(PredSet && "No previous predicate !"); + PredSet->getOperand(1).addTargetFlag(1<<4); + PredSet->getOperand(2).setImm(Cond[1].getImm()); + BuildMI(&MBB, DL, get(AMDGPU::JUMP)) + .addMBB(TBB) + .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0); + return 2; + } +} + +unsigned +R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const +{ + + // Note : we leave PRED* instructions there. + // They may be needed when predicating instructions. + + MachineBasicBlock::iterator I = MBB.end(); + + if (I == MBB.begin()) { + return 0; + } + --I; + switch (I->getOpcode()) { + default: + return 0; + case AMDGPU::JUMP: + if (isPredicated(I)) { + MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); + char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4)); + predSet->getOperand(1).setTargetFlags(flag); + } + I->eraseFromParent(); + break; + } + I = MBB.end(); + + if (I == MBB.begin()) { + return 1; + } + --I; + switch (I->getOpcode()) { + // FIXME: only one case?? + default: + return 1; + case AMDGPU::JUMP: + if (isPredicated(I)) { + MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); + char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4)); + predSet->getOperand(1).setTargetFlags(flag); + } + I->eraseFromParent(); + break; + } + return 2; +} + bool R600InstrInfo::isPredicated(const MachineInstr *MI) const { diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h index 9bdda7a1e13..2819b0b87a2 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.h +++ b/src/gallium/drivers/radeon/R600InstrInfo.h @@ -34,6 +34,8 @@ namespace llvm { const R600RegisterInfo RI; AMDGPUTargetMachine &TM; + int getBranchInstr(const MachineOperand &op) const; + public: explicit R600InstrInfo(AMDGPUTargetMachine &tm); @@ -62,6 +64,13 @@ namespace llvm { DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const; + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, bool AllowModify) const; + + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const; + + unsigned RemoveBranch(MachineBasicBlock &MBB) const; + bool isPredicated(const MachineInstr *MI) const; bool isPredicable(MachineInstr *MI) const; -- 2.30.2