-//===-- AMDILPeepholeOptimizer.cpp - TODO: Add brief description -------===//
+//===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===//
//
// The LLVM Compiler Infrastructure
//
//
//==-----------------------------------------------------------------------===//
-#define DEBUG_TYPE "PeepholeOpt"
-#ifdef DEBUG
-#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
-#else
-#define DEBUGME 0
-#endif
-
-#include "AMDILAlgorithms.tpp"
#include "AMDILDevices.h"
-#include "AMDILMachineFunctionInfo.h"
-#include "AMDILUtilityFunctions.h"
+#include "AMDGPUInstrInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Constants.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/Function.h"
// The Peephole optimization pass is used to do simple last minute optimizations
// that are required for correct code or to remove redundant functions
namespace {
-class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass {
+
+class OpaqueType;
+
+class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
public:
TargetMachine &TM;
static char ID;
- AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
- ~AMDILPeepholeOpt();
+ AMDGPUPeepholeOpt(TargetMachine &tm);
+ ~AMDGPUPeepholeOpt();
const char *getPassName() const;
bool runOnFunction(Function &F);
bool doInitialization(Module &M);
// samplers at compile time.
bool propagateSamplerInst(CallInst *CI);
+ // Helper functions
+
+ // Group of functions that recursively calculate the size of a structure based
+ // on it's sub-types.
+ size_t getTypeSize(Type * const T, bool dereferencePtr = false);
+ size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
+ size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
+ size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
+ size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
+ size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
+ size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
+ size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
+
LLVMContext *mCTX;
Function *mF;
- const AMDILSubtarget *mSTM;
+ const AMDGPUSubtarget *mSTM;
SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
SmallVector<CallInst *, 16> isConstVec;
-}; // class AMDILPeepholeOpt
- char AMDILPeepholeOpt::ID = 0;
+}; // class AMDGPUPeepholeOpt
+ char AMDGPUPeepholeOpt::ID = 0;
+
+// A template function that has two levels of looping before calling the
+// function with a pointer to the current iterator.
+template<class InputIterator, class SecondIterator, class Function>
+Function safeNestedForEach(InputIterator First, InputIterator Last,
+ SecondIterator S, Function F)
+{
+ for ( ; First != Last; ++First) {
+ SecondIterator sf, sl;
+ for (sf = First->begin(), sl = First->end();
+ sf != sl; ) {
+ if (!F(&sf)) {
+ ++sf;
+ }
+ }
+ }
+ return F;
+}
+
} // anonymous namespace
namespace llvm {
FunctionPass *
- createAMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+ createAMDGPUPeepholeOpt(TargetMachine &tm)
{
- return new AMDILPeepholeOpt(tm AMDIL_OPT_LEVEL_VAR);
+ return new AMDGPUPeepholeOpt(tm);
}
} // llvm namespace
-AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
: FunctionPass(ID), TM(tm)
{
- mDebug = DEBUGME;
+ mDebug = false;
optLevel = TM.getOptLevel();
}
-AMDILPeepholeOpt::~AMDILPeepholeOpt()
+AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt()
{
}
const char *
-AMDILPeepholeOpt::getPassName() const
+AMDGPUPeepholeOpt::getPassName() const
{
- return "AMDIL PeepHole Optimization Pass";
+ return "AMDGPU PeepHole Optimization Pass";
}
bool
}
bool
-AMDILPeepholeOpt::dumpAllIntoArena(Function &F)
+AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F)
{
bool dumpAll = false;
for (Function::const_arg_iterator cab = F.arg_begin(),
return dumpAll;
}
void
-AMDILPeepholeOpt::doIsConstCallConversionIfNeeded()
+AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded()
{
if (isConstVec.empty()) {
return;
isConstVec.clear();
}
void
-AMDILPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
+AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
{
// Don't do anything if we don't have any atomic operations.
if (atomicFuncs.empty()) {
if (mConvertAtomics) {
return;
}
- // If we did not convert all of the atomics, then we need to make sure that
- // the atomics that were not converted have their base pointers set to use the
- // arena path.
- Function::arg_iterator argB = F.arg_begin();
- Function::arg_iterator argE = F.arg_end();
- AMDILMachineFunctionInfo *mMFI = getAnalysis<MachineFunctionAnalysis>().getMF()
- .getInfo<AMDILMachineFunctionInfo>();
- for (; argB != argE; ++argB) {
- if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaUAV)) {
- mMFI->uav_insert(mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID));
- } else {
- mMFI->uav_insert(mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID));
- }
- }
}
bool
-AMDILPeepholeOpt::runOnFunction(Function &MF)
+AMDGPUPeepholeOpt::runOnFunction(Function &MF)
{
mChanged = false;
mF = &MF;
- mSTM = &TM.getSubtarget<AMDILSubtarget>();
+ mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
if (mDebug) {
MF.dump();
}
mCTX = &MF.getType()->getContext();
mConvertAtomics = true;
safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
- std::bind1st(std::mem_fun(&AMDILPeepholeOpt::instLevelOptimizations),
+ std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
this));
doAtomicConversionIfNeeded(MF);
}
bool
-AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
+AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
{
Instruction *inst = (*bbb);
CallInst *CI = dyn_cast<CallInst>(inst);
atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
}
- if (!mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
- && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
+ if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
+ && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
return false;
}
if (!mConvertAtomics) {
}
bool
-AMDILPeepholeOpt::setupBitInsert(Instruction *base,
+AMDGPUPeepholeOpt::setupBitInsert(Instruction *base,
Instruction *&src,
Constant *&mask,
Constant *&shift)
return true;
}
bool
-AMDILPeepholeOpt::optimizeBitInsert(Instruction *inst)
+AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst)
{
if (!inst) {
return false;
// (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
// (A & B) | (D << F) when (1 << F) >= B
// (A << C) | (D & E) when (1 << C) >= E
- if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
// The HD4XXX hardware doesn't support the ubit_insert instruction.
return false;
}
}
bool
-AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
+AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst)
{
if (!inst) {
return false;
// __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
// can be found in Section 7.9 of the ATI IL spec of the stream SDK for
// Evergreen hardware.
- if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
// This does not work on HD4XXX hardware.
return false;
}
Type *aType = inst->getType();
bool isVector = aType->isVectorTy();
+
+ // XXX Support vector types
+ if (isVector) {
+ return false;
+ }
int numEle = 1;
// This only works on 32bit integers
if (aType->getScalarType()
callTypes.push_back(aType);
callTypes.push_back(aType);
FunctionType *funcType = FunctionType::get(aType, callTypes, false);
- std::string name = "__amdil_ubit_extract";
+ std::string name = "llvm.AMDIL.bit.extract.u32";
if (isVector) {
- name += "_v" + itostr(numEle) + "i32";
+ name += ".v" + itostr(numEle) + "i32";
} else {
- name += "_i32";
+ name += ".";
}
// Lets create the function.
Function *Func =
dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
getOrInsertFunction(llvm::StringRef(name), funcType));
Value *Operands[3] = {
- newMaskConst,
+ ShiftInst->getOperand(0),
shiftValConst,
- ShiftInst->getOperand(0)
+ newMaskConst
};
// Lets create the Call with the operands
CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
+ CI->setDoesNotAccessMemory();
CI->insertBefore(inst);
inst->replaceAllUsesWith(CI);
return true;
}
bool
-AMDILPeepholeOpt::expandBFI(CallInst *CI)
+AMDGPUPeepholeOpt::expandBFI(CallInst *CI)
{
- if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) {
+ if (!CI) {
return false;
}
Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
}
bool
-AMDILPeepholeOpt::expandBFM(CallInst *CI)
+AMDGPUPeepholeOpt::expandBFM(CallInst *CI)
{
- if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) {
+ if (!CI) {
return false;
}
Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
}
bool
-AMDILPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)
+AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)
{
Instruction *inst = (*bbb);
if (optimizeCallInst(bbb)) {
return false;
}
bool
-AMDILPeepholeOpt::correctMisalignedMemOp(Instruction *inst)
+AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst)
{
LoadInst *linst = dyn_cast<LoadInst>(inst);
StoreInst *sinst = dyn_cast<StoreInst>(inst);
return false;
}
bool
-AMDILPeepholeOpt::isSigned24BitOps(CallInst *CI)
+AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI)
{
if (!CI) {
return false;
&& namePrefix != "__amdil__imul24_high") {
return false;
}
- if (mSTM->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
+ if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
return false;
}
return true;
}
void
-AMDILPeepholeOpt::expandSigned24BitOps(CallInst *CI)
+AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI)
{
assert(isSigned24BitOps(CI) && "Must be a "
"signed 24 bit operation to call this function!");
}
bool
-AMDILPeepholeOpt::isRWGLocalOpt(CallInst *CI)
+AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI)
{
return (CI != NULL
&& CI->getOperand(CI->getNumOperands() - 1)->getName()
}
bool
-AMDILPeepholeOpt::convertAccurateDivide(CallInst *CI)
+AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI)
{
if (!CI) {
return false;
}
- if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
&& (mSTM->getDeviceName() == "cayman")) {
return false;
}
}
void
-AMDILPeepholeOpt::expandAccurateDivide(CallInst *CI)
+AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI)
{
assert(convertAccurateDivide(CI)
&& "expanding accurate divide can only happen if it is expandable!");
}
bool
-AMDILPeepholeOpt::propagateSamplerInst(CallInst *CI)
+AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI)
{
if (optLevel != CodeGenOpt::None) {
return false;
return false;
}
- if (lInst->getPointerAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
+ if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
return false;
}
}
bool
-AMDILPeepholeOpt::doInitialization(Module &M)
+AMDGPUPeepholeOpt::doInitialization(Module &M)
{
return false;
}
bool
-AMDILPeepholeOpt::doFinalization(Module &M)
+AMDGPUPeepholeOpt::doFinalization(Module &M)
{
return false;
}
void
-AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
+AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
{
AU.addRequired<MachineFunctionAnalysis>();
FunctionPass::getAnalysisUsage(AU);
AU.setPreservesAll();
}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
+ size_t size = 0;
+ if (!T) {
+ return size;
+ }
+ switch (T->getTypeID()) {
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ assert(0 && "These types are not supported by this backend");
+ default:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ size = T->getPrimitiveSizeInBits() >> 3;
+ break;
+ case Type::PointerTyID:
+ size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
+ break;
+ case Type::IntegerTyID:
+ size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
+ break;
+ case Type::StructTyID:
+ size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
+ break;
+ case Type::ArrayTyID:
+ size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
+ break;
+ case Type::FunctionTyID:
+ size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
+ break;
+ case Type::VectorTyID:
+ size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
+ break;
+ };
+ return size;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
+ bool dereferencePtr) {
+ size_t size = 0;
+ if (!ST) {
+ return size;
+ }
+ Type *curType;
+ StructType::element_iterator eib;
+ StructType::element_iterator eie;
+ for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
+ curType = *eib;
+ size += getTypeSize(curType, dereferencePtr);
+ }
+ return size;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
+ bool dereferencePtr) {
+ return IT ? (IT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
+ bool dereferencePtr) {
+ assert(0 && "Should not be able to calculate the size of an function type");
+ return 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
+ bool dereferencePtr) {
+ return (size_t)(AT ? (getTypeSize(AT->getElementType(),
+ dereferencePtr) * AT->getNumElements())
+ : 0);
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
+ bool dereferencePtr) {
+ return VT ? (VT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
+ bool dereferencePtr) {
+ if (!PT) {
+ return 0;
+ }
+ Type *CT = PT->getElementType();
+ if (CT->getTypeID() == Type::StructTyID &&
+ PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
+ return getTypeSize(dyn_cast<StructType>(CT));
+ } else if (dereferencePtr) {
+ size_t size = 0;
+ for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
+ size += getTypeSize(PT->getContainedType(x), dereferencePtr);
+ }
+ return size;
+ } else {
+ return 4;
+ }
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
+ bool dereferencePtr) {
+ //assert(0 && "Should not be able to calculate the size of an opaque type");
+ return 4;
+}