-//===-- AMDILPeepholeOptimizer.cpp - TODO: Add brief description -------===//
+//===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===//
//
// The LLVM Compiler Infrastructure
//
//
//==-----------------------------------------------------------------------===//
-#include "AMDILAlgorithms.tpp"
#include "AMDILDevices.h"
-#include "AMDILInstrInfo.h"
+#include "AMDGPUInstrInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
class OpaqueType;
-class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass {
+class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
public:
TargetMachine &TM;
static char ID;
- AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
- ~AMDILPeepholeOpt();
+ AMDGPUPeepholeOpt(TargetMachine &tm);
+ ~AMDGPUPeepholeOpt();
const char *getPassName() const;
bool runOnFunction(Function &F);
bool doInitialization(Module &M);
LLVMContext *mCTX;
Function *mF;
- const AMDILSubtarget *mSTM;
+ const AMDGPUSubtarget *mSTM;
SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
SmallVector<CallInst *, 16> isConstVec;
-}; // class AMDILPeepholeOpt
- char AMDILPeepholeOpt::ID = 0;
+}; // class AMDGPUPeepholeOpt
+ char AMDGPUPeepholeOpt::ID = 0;
+
+// A template function that has two levels of looping before calling the
+// function with a pointer to the current iterator.
+template<class InputIterator, class SecondIterator, class Function>
+Function safeNestedForEach(InputIterator First, InputIterator Last,
+ SecondIterator S, Function F)
+{
+ for ( ; First != Last; ++First) {
+ SecondIterator sf, sl;
+ for (sf = First->begin(), sl = First->end();
+ sf != sl; ) {
+ if (!F(&sf)) {
+ ++sf;
+ }
+ }
+ }
+ return F;
+}
+
} // anonymous namespace
namespace llvm {
FunctionPass *
- createAMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+ createAMDGPUPeepholeOpt(TargetMachine &tm)
{
- return new AMDILPeepholeOpt(tm AMDIL_OPT_LEVEL_VAR);
+ return new AMDGPUPeepholeOpt(tm);
}
} // llvm namespace
-AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
: FunctionPass(ID), TM(tm)
{
mDebug = false;
}
-AMDILPeepholeOpt::~AMDILPeepholeOpt()
+AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt()
{
}
const char *
-AMDILPeepholeOpt::getPassName() const
+AMDGPUPeepholeOpt::getPassName() const
{
- return "AMDIL PeepHole Optimization Pass";
+ return "AMDGPU PeepHole Optimization Pass";
}
bool
}
bool
-AMDILPeepholeOpt::dumpAllIntoArena(Function &F)
+AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F)
{
bool dumpAll = false;
for (Function::const_arg_iterator cab = F.arg_begin(),
return dumpAll;
}
void
-AMDILPeepholeOpt::doIsConstCallConversionIfNeeded()
+AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded()
{
if (isConstVec.empty()) {
return;
isConstVec.clear();
}
void
-AMDILPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
+AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
{
// Don't do anything if we don't have any atomic operations.
if (atomicFuncs.empty()) {
}
bool
-AMDILPeepholeOpt::runOnFunction(Function &MF)
+AMDGPUPeepholeOpt::runOnFunction(Function &MF)
{
mChanged = false;
mF = &MF;
- mSTM = &TM.getSubtarget<AMDILSubtarget>();
+ mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
if (mDebug) {
MF.dump();
}
mCTX = &MF.getType()->getContext();
mConvertAtomics = true;
safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
- std::bind1st(std::mem_fun(&AMDILPeepholeOpt::instLevelOptimizations),
+ std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
this));
doAtomicConversionIfNeeded(MF);
}
bool
-AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
+AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
{
Instruction *inst = (*bbb);
CallInst *CI = dyn_cast<CallInst>(inst);
atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
}
- if (!mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
- && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
+ if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
+ && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
return false;
}
if (!mConvertAtomics) {
}
bool
-AMDILPeepholeOpt::setupBitInsert(Instruction *base,
+AMDGPUPeepholeOpt::setupBitInsert(Instruction *base,
Instruction *&src,
Constant *&mask,
Constant *&shift)
return true;
}
bool
-AMDILPeepholeOpt::optimizeBitInsert(Instruction *inst)
+AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst)
{
if (!inst) {
return false;
// (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
// (A & B) | (D << F) when (1 << F) >= B
// (A << C) | (D & E) when (1 << C) >= E
- if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
// The HD4XXX hardware doesn't support the ubit_insert instruction.
return false;
}
}
bool
-AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
+AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst)
{
if (!inst) {
return false;
// __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
// can be found in Section 7.9 of the ATI IL spec of the stream SDK for
// Evergreen hardware.
- if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
// This does not work on HD4XXX hardware.
return false;
}
Type *aType = inst->getType();
bool isVector = aType->isVectorTy();
+
+ // XXX Support vector types
+ if (isVector) {
+ return false;
+ }
int numEle = 1;
// This only works on 32bit integers
if (aType->getScalarType()
callTypes.push_back(aType);
callTypes.push_back(aType);
FunctionType *funcType = FunctionType::get(aType, callTypes, false);
- std::string name = "__amdil_ubit_extract";
+ std::string name = "llvm.AMDIL.bit.extract.u32";
if (isVector) {
- name += "_v" + itostr(numEle) + "i32";
+ name += ".v" + itostr(numEle) + "i32";
} else {
- name += "_i32";
+ name += ".";
}
// Lets create the function.
Function *Func =
dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
getOrInsertFunction(llvm::StringRef(name), funcType));
Value *Operands[3] = {
- newMaskConst,
+ ShiftInst->getOperand(0),
shiftValConst,
- ShiftInst->getOperand(0)
+ newMaskConst
};
// Lets create the Call with the operands
CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
+ CI->setDoesNotAccessMemory();
CI->insertBefore(inst);
inst->replaceAllUsesWith(CI);
return true;
}
bool
-AMDILPeepholeOpt::expandBFI(CallInst *CI)
+AMDGPUPeepholeOpt::expandBFI(CallInst *CI)
{
- if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) {
+ if (!CI) {
return false;
}
Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
}
bool
-AMDILPeepholeOpt::expandBFM(CallInst *CI)
+AMDGPUPeepholeOpt::expandBFM(CallInst *CI)
{
- if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) {
+ if (!CI) {
return false;
}
Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
}
bool
-AMDILPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)
+AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)
{
Instruction *inst = (*bbb);
if (optimizeCallInst(bbb)) {
return false;
}
bool
-AMDILPeepholeOpt::correctMisalignedMemOp(Instruction *inst)
+AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst)
{
LoadInst *linst = dyn_cast<LoadInst>(inst);
StoreInst *sinst = dyn_cast<StoreInst>(inst);
return false;
}
bool
-AMDILPeepholeOpt::isSigned24BitOps(CallInst *CI)
+AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI)
{
if (!CI) {
return false;
&& namePrefix != "__amdil__imul24_high") {
return false;
}
- if (mSTM->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
+ if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
return false;
}
return true;
}
void
-AMDILPeepholeOpt::expandSigned24BitOps(CallInst *CI)
+AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI)
{
assert(isSigned24BitOps(CI) && "Must be a "
"signed 24 bit operation to call this function!");
}
bool
-AMDILPeepholeOpt::isRWGLocalOpt(CallInst *CI)
+AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI)
{
return (CI != NULL
&& CI->getOperand(CI->getNumOperands() - 1)->getName()
}
bool
-AMDILPeepholeOpt::convertAccurateDivide(CallInst *CI)
+AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI)
{
if (!CI) {
return false;
}
- if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
&& (mSTM->getDeviceName() == "cayman")) {
return false;
}
}
void
-AMDILPeepholeOpt::expandAccurateDivide(CallInst *CI)
+AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI)
{
assert(convertAccurateDivide(CI)
&& "expanding accurate divide can only happen if it is expandable!");
}
bool
-AMDILPeepholeOpt::propagateSamplerInst(CallInst *CI)
+AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI)
{
if (optLevel != CodeGenOpt::None) {
return false;
return false;
}
- if (lInst->getPointerAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
+ if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
return false;
}
}
bool
-AMDILPeepholeOpt::doInitialization(Module &M)
+AMDGPUPeepholeOpt::doInitialization(Module &M)
{
return false;
}
bool
-AMDILPeepholeOpt::doFinalization(Module &M)
+AMDGPUPeepholeOpt::doFinalization(Module &M)
{
return false;
}
void
-AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
+AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
{
AU.addRequired<MachineFunctionAnalysis>();
FunctionPass::getAnalysisUsage(AU);
AU.setPreservesAll();
}
-size_t AMDILPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
+size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
size_t size = 0;
if (!T) {
return size;
return size;
}
-size_t AMDILPeepholeOpt::getTypeSize(StructType * const ST,
+size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
bool dereferencePtr) {
size_t size = 0;
if (!ST) {
return size;
}
-size_t AMDILPeepholeOpt::getTypeSize(IntegerType * const IT,
+size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
bool dereferencePtr) {
return IT ? (IT->getBitWidth() >> 3) : 0;
}
-size_t AMDILPeepholeOpt::getTypeSize(FunctionType * const FT,
+size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
bool dereferencePtr) {
assert(0 && "Should not be able to calculate the size of an function type");
return 0;
}
-size_t AMDILPeepholeOpt::getTypeSize(ArrayType * const AT,
+size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
bool dereferencePtr) {
return (size_t)(AT ? (getTypeSize(AT->getElementType(),
dereferencePtr) * AT->getNumElements())
: 0);
}
-size_t AMDILPeepholeOpt::getTypeSize(VectorType * const VT,
+size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
bool dereferencePtr) {
return VT ? (VT->getBitWidth() >> 3) : 0;
}
-size_t AMDILPeepholeOpt::getTypeSize(PointerType * const PT,
+size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
bool dereferencePtr) {
if (!PT) {
return 0;
}
Type *CT = PT->getElementType();
if (CT->getTypeID() == Type::StructTyID &&
- PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+ PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
return getTypeSize(dyn_cast<StructType>(CT));
} else if (dereferencePtr) {
size_t size = 0;
}
}
-size_t AMDILPeepholeOpt::getTypeSize(OpaqueType * const OT,
+size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
bool dereferencePtr) {
//assert(0 && "Should not be able to calculate the size of an opaque type");
return 4;