return false;
}
-bool AMDGPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
- bool retVal = true;
- return retVal;
- MachineBasicBlock::iterator iter = MBB.begin();
- if (!getNextBranchInstr(iter, MBB)) {
- retVal = false;
- } else {
- MachineInstr *firstBranch = iter;
- if (!getNextBranchInstr(++iter, MBB)) {
- if (firstBranch->getOpcode() == AMDGPU::BRANCH) {
- TBB = firstBranch->getOperand(0).getMBB();
- firstBranch->eraseFromParent();
- retVal = false;
- } else {
- TBB = firstBranch->getOperand(0).getMBB();
- FBB = *(++MBB.succ_begin());
- if (FBB == TBB) {
- FBB = *(MBB.succ_begin());
- }
- Cond.push_back(firstBranch->getOperand(1));
- retVal = false;
- }
- } else {
- MachineInstr *secondBranch = iter;
- if (!getNextBranchInstr(++iter, MBB)) {
- if (secondBranch->getOpcode() == AMDGPU::BRANCH) {
- TBB = firstBranch->getOperand(0).getMBB();
- Cond.push_back(firstBranch->getOperand(1));
- FBB = secondBranch->getOperand(0).getMBB();
- secondBranch->eraseFromParent();
- retVal = false;
- } else {
- assert(0 && "Should not have two consecutive conditional branches");
- }
- } else {
- MBB.getParent()->viewCFG();
- assert(0 && "Should not have three branch instructions in"
- " a single basic block");
- retVal = false;
- }
- }
- }
- return retVal;
-}
-
-unsigned int AMDGPUInstrInfo::getBranchInstr(const MachineOperand &op) const {
- const MachineInstr *MI = op.getParent();
-
- switch (MI->getDesc().OpInfo->RegClass) {
- default: // FIXME: fallthrough??
- case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
- case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
- };
-}
-
-unsigned int
-AMDGPUInstrInfo::InsertBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const
-{
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
- for (unsigned int x = 0; x < Cond.size(); ++x) {
- Cond[x].getParent()->dump();
- }
- if (FBB == 0) {
- if (Cond.empty()) {
- BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(TBB);
- } else {
- BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
- .addMBB(TBB).addReg(Cond[0].getReg());
- }
- return 1;
- } else {
- BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
- .addMBB(TBB).addReg(Cond[0].getReg());
- BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(FBB);
- }
- assert(0 && "Inserting two branches not supported");
- return 0;
-}
-
-unsigned int AMDGPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin()) {
- return 0;
- }
- --I;
- switch (I->getOpcode()) {
- default:
- return 0;
- ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
- case AMDGPU::BRANCH:
- I->eraseFromParent();
- break;
- }
- I = MBB.end();
-
- if (I == MBB.begin()) {
- return 1;
- }
- --I;
- switch (I->getOpcode()) {
- // FIXME: only one case??
- default:
- return 1;
- ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
- I->eraseFromParent();
- break;
- }
- return 2;
-}
-
MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator tmp = MBB->end();
if (!MBB->size()) {
TargetMachine &TM;
bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
MachineBasicBlock &MBB) const;
- unsigned int getBranchInstr(const MachineOperand &op) const;
-
public:
explicit AMDGPUInstrInfo(TargetMachine &tm);
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const;
- bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
-
- unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
- unsigned
- InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
private:
+ void reversePredicateSetter(typename BlockT::iterator);
void orderBlocks();
void printOrderedBlocks(llvm::raw_ostream &OS);
int patternMatch(BlockT *CurBlock);
retireBlock(dstBlk, landBlk);
} //mergeLooplandBlock
+template<class PassT>
+void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I)
+{
+ while (I--) {
+ if (I->getOpcode() == AMDGPU::PRED_X) {
+ switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
+ case OPCODE_IS_ZERO_INT:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
+ return;
+ case OPCODE_IS_NOT_ZERO_INT:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
+ return;
+ case OPCODE_IS_ZERO:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
+ return;
+ case OPCODE_IS_NOT_ZERO:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
+ return;
+ default:
+ assert(0 && "PRED_X Opcode invalid!");
+ }
+ }
+ }
+}
+
template<class PassT>
void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
BlockT *exitBlk,
if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
//break_logical
- int newOpcode =
- (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode)
- : CFGTraits::getBreakZeroOpcode(oldOpcode);
+
+ if (trueBranch != exitBlk) {
+ reversePredicateSetter(branchInstrPos);
+ }
+ int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
} else {
- int newOpcode =
- (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
- : CFGTraits::getBranchZeroOpcode(oldOpcode);
+ if (trueBranch != exitBlk) {
+ reversePredicateSetter(branchInstr);
+ }
+ int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
if (exitBlk != exitLandBlk) {
//splice is insert-before ...
static int getBreakNzeroOpcode(int oldOpcode) {
switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALNZ);
+ case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32;
default:
assert(0 && "internal error");
};
static int getBreakZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALZ);
+ case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32;
default:
assert(0 && "internal error");
};
static int getBranchNzeroOpcode(int oldOpcode) {
switch(oldOpcode) {
+ case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32;
ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
default:
static int getBranchZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
+ case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32;
ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
default:
static int getContinueNzeroOpcode(int oldOpcode)
{
switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALNZ);
+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
default:
assert(0 && "internal error");
};
static int getContinueZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALZ);
+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
default:
assert(0 && "internal error");
};
static bool isCondBranch(MachineInstr *instr) {
switch (instr->getOpcode()) {
+ case AMDGPU::JUMP:
+ return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
case AMDGPU::SI_IF_NZ:
case AMDGPU::SI_IF_Z:
static bool isUncondBranch(MachineInstr *instr) {
switch (instr->getOpcode()) {
- case AMDGPU::BRANCH:
- break;
+ case AMDGPU::JUMP:
+ return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
default:
return false;
}
// Custom Inserter for Branches and returns, this eventually will be a
// seperate pass
//===---------------------------------------------------------------------===//
-let isTerminator = 1 in {
+let isTerminator = 1, usesCustomInserter = 1 in {
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
"; Pseudo unconditional branch instruction",
[(br bb:$target)]>;
.addReg(t1, RegState::Implicit);
break;
}
+ case AMDGPU::BRANCH:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ .addOperand(MI->getOperand(0))
+ .addReg(0);
+ break;
+ case AMDGPU::BRANCH_COND_f32:
+ MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
+ .addReg(AMDGPU::PREDICATE_BIT)
+ .addOperand(MI->getOperand(1))
+ .addImm(OPCODE_IS_ZERO);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ break;
+ case AMDGPU::BRANCH_COND_i32:
+ MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
+ .addReg(AMDGPU::PREDICATE_BIT)
+ .addOperand(MI->getOperand(1))
+ .addImm(OPCODE_IS_ZERO_INT);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ break;
}
#include "R600RegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "AMDILUtilityFunctions.h"
+#include "AMDGPUUtil.h"
#define GET_INSTRINFO_CTOR
#include "AMDGPUGenDFAPacketizer.inc"
bool R600InstrInfo::isMov(unsigned Opcode) const
{
+
+
switch(Opcode) {
default: return false;
case AMDGPU::MOV:
return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
}
+static bool
+isPredicateSetter(unsigned opcode)
+{
+ switch (opcode) {
+ case AMDGPU::PRED_X:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static MachineInstr *
+findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I)
+{
+ while (I != MBB.begin()) {
+ --I;
+ MachineInstr *MI = I;
+ if (isPredicateSetter(MI->getOpcode()))
+ return MI;
+ }
+
+ return NULL;
+}
+
+bool
+R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const
+{
+ // Most of the following comes from the ARM implementation of AnalyzeBranch
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
+ return false;
+ }
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() ||
+ static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
+ if (LastOpc == AMDGPU::JUMP) {
+ if(!isPredicated(LastInst)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ MachineInstr *predSet = I;
+ while (!isPredicateSetter(predSet->getOpcode())) {
+ predSet = --I;
+ }
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(predSet->getOperand(1));
+ Cond.push_back(predSet->getOperand(2));
+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ return false;
+ }
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If the block ends with a B and a Bcc, handle it.
+ if (SecondLastOpc == AMDGPU::JUMP &&
+ isPredicated(SecondLastInst) &&
+ LastOpc == AMDGPU::JUMP &&
+ !isPredicated(LastInst)) {
+ MachineInstr *predSet = --I;
+ while (!isPredicateSetter(predSet->getOpcode())) {
+ predSet = --I;
+ }
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ FBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(predSet->getOperand(1));
+ Cond.push_back(predSet->getOperand(2));
+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
+ const MachineInstr *MI = op.getParent();
+
+ switch (MI->getDesc().OpInfo->RegClass) {
+ default: // FIXME: fallthrough??
+ case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
+ case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
+ };
+}
+
+unsigned
+R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const
+{
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
+ return 1;
+ } else {
+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+ assert(PredSet && "No previous predicate !");
+ PredSet->getOperand(1).addTargetFlag(1<<4);
+ PredSet->getOperand(2).setImm(Cond[1].getImm());
+
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP))
+ .addMBB(TBB)
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ return 1;
+ }
+ } else {
+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+ assert(PredSet && "No previous predicate !");
+ PredSet->getOperand(1).addTargetFlag(1<<4);
+ PredSet->getOperand(2).setImm(Cond[1].getImm());
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP))
+ .addMBB(TBB)
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
+ return 2;
+ }
+}
+
+unsigned
+R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+
+ // Note : we leave PRED* instructions there.
+ // They may be needed when predicating instructions.
+
+ MachineBasicBlock::iterator I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 0;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ default:
+ return 0;
+ case AMDGPU::JUMP:
+ if (isPredicated(I)) {
+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+ char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
+ predSet->getOperand(1).setTargetFlags(flag);
+ }
+ I->eraseFromParent();
+ break;
+ }
+ I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 1;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ // FIXME: only one case??
+ default:
+ return 1;
+ case AMDGPU::JUMP:
+ if (isPredicated(I)) {
+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+ char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
+ predSet->getOperand(1).setTargetFlags(flag);
+ }
+ I->eraseFromParent();
+ break;
+ }
+ return 2;
+}
+
bool
R600InstrInfo::isPredicated(const MachineInstr *MI) const
{
const R600RegisterInfo RI;
AMDGPUTargetMachine &TM;
+ int getBranchInstr(const MachineOperand &op) const;
+
public:
explicit R600InstrInfo(AMDGPUTargetMachine &tm);
DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const;
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
+
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
bool isPredicated(const MachineInstr *MI) const;
bool isPredicable(MachineInstr *MI) const;