radeon/llvm: Merge AMDILInstrInfo.cpp into AMDGPUInstrInfo.cpp
[mesa.git] / src / gallium / drivers / radeon / AMDILPeepholeOptimizer.cpp
1 //===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9
10 #include "AMDILAlgorithms.tpp"
11 #include "AMDILDevices.h"
12 #include "AMDGPUInstrInfo.h"
13 #include "llvm/ADT/Statistic.h"
14 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/Constants.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
20 #include "llvm/Function.h"
21 #include "llvm/Instructions.h"
22 #include "llvm/Module.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25
26 #include <sstream>
27
28 #if 0
29 STATISTIC(PointerAssignments, "Number of dynamic pointer "
30 "assigments discovered");
31 STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
32 #endif
33
34 using namespace llvm;
35 // The Peephole optimization pass is used to do simple last minute optimizations
36 // that are required for correct code or to remove redundant functions
37 namespace {
38
39 class OpaqueType;
40
41 class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass {
42 public:
43 TargetMachine &TM;
44 static char ID;
45 AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
46 ~AMDILPeepholeOpt();
47 const char *getPassName() const;
48 bool runOnFunction(Function &F);
49 bool doInitialization(Module &M);
50 bool doFinalization(Module &M);
51 void getAnalysisUsage(AnalysisUsage &AU) const;
52 protected:
53 private:
54 // Function to initiate all of the instruction level optimizations.
55 bool instLevelOptimizations(BasicBlock::iterator *inst);
56 // Quick check to see if we need to dump all of the pointers into the
57 // arena. If this is correct, then we set all pointers to exist in arena. This
58 // is a workaround for aliasing of pointers in a struct/union.
59 bool dumpAllIntoArena(Function &F);
60 // Because I don't want to invalidate any pointers while in the
61 // safeNestedForEachFunction. I push atomic conversions to a vector and handle
62 // it later. This function does the conversions if required.
63 void doAtomicConversionIfNeeded(Function &F);
64 // Because __amdil_is_constant cannot be properly evaluated if
65 // optimizations are disabled, the call's are placed in a vector
66 // and evaluated after the __amdil_image* functions are evaluated
67 // which should allow the __amdil_is_constant function to be
68 // evaluated correctly.
69 void doIsConstCallConversionIfNeeded();
70 bool mChanged;
71 bool mDebug;
72 bool mConvertAtomics;
73 CodeGenOpt::Level optLevel;
74 // Run a series of tests to see if we can optimize a CALL instruction.
75 bool optimizeCallInst(BasicBlock::iterator *bbb);
76 // A peephole optimization to optimize bit extract sequences.
77 bool optimizeBitExtract(Instruction *inst);
78 // A peephole optimization to optimize bit insert sequences.
79 bool optimizeBitInsert(Instruction *inst);
80 bool setupBitInsert(Instruction *base,
81 Instruction *&src,
82 Constant *&mask,
83 Constant *&shift);
84 // Expand the bit field insert instruction on versions of OpenCL that
85 // don't support it.
86 bool expandBFI(CallInst *CI);
87 // Expand the bit field mask instruction on version of OpenCL that
88 // don't support it.
89 bool expandBFM(CallInst *CI);
90 // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
91 // this case we need to expand them. These functions check for 24bit functions
92 // and then expand.
93 bool isSigned24BitOps(CallInst *CI);
94 void expandSigned24BitOps(CallInst *CI);
95 // One optimization that can occur is that if the required workgroup size is
96 // specified then the result of get_local_size is known at compile time and
97 // can be returned accordingly.
98 bool isRWGLocalOpt(CallInst *CI);
99 // On northern island cards, the division is slightly less accurate than on
100 // previous generations, so we need to utilize a more accurate division. So we
101 // can translate the accurate divide to a normal divide on all other cards.
102 bool convertAccurateDivide(CallInst *CI);
103 void expandAccurateDivide(CallInst *CI);
104 // If the alignment is set incorrectly, it can produce really inefficient
105 // code. This checks for this scenario and fixes it if possible.
106 bool correctMisalignedMemOp(Instruction *inst);
107
108 // If we are in no opt mode, then we need to make sure that
109 // local samplers are properly propagated as constant propagation
110 // doesn't occur and we need to know the value of kernel defined
111 // samplers at compile time.
112 bool propagateSamplerInst(CallInst *CI);
113
114 // Helper functions
115
116 // Group of functions that recursively calculate the size of a structure based
117 // on it's sub-types.
118 size_t getTypeSize(Type * const T, bool dereferencePtr = false);
119 size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
120 size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
121 size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
122 size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
123 size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
124 size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
125 size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
126
127 LLVMContext *mCTX;
128 Function *mF;
129 const AMDILSubtarget *mSTM;
130 SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
131 SmallVector<CallInst *, 16> isConstVec;
132 }; // class AMDILPeepholeOpt
133 char AMDILPeepholeOpt::ID = 0;
134 } // anonymous namespace
135
136 namespace llvm {
137 FunctionPass *
138 createAMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
139 {
140 return new AMDILPeepholeOpt(tm AMDIL_OPT_LEVEL_VAR);
141 }
142 } // llvm namespace
143
144 AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
145 : FunctionPass(ID), TM(tm)
146 {
147 mDebug = false;
148 optLevel = TM.getOptLevel();
149
150 }
151
152 AMDILPeepholeOpt::~AMDILPeepholeOpt()
153 {
154 }
155
156 const char *
157 AMDILPeepholeOpt::getPassName() const
158 {
159 return "AMDIL PeepHole Optimization Pass";
160 }
161
162 bool
163 containsPointerType(Type *Ty)
164 {
165 if (!Ty) {
166 return false;
167 }
168 switch(Ty->getTypeID()) {
169 default:
170 return false;
171 case Type::StructTyID: {
172 const StructType *ST = dyn_cast<StructType>(Ty);
173 for (StructType::element_iterator stb = ST->element_begin(),
174 ste = ST->element_end(); stb != ste; ++stb) {
175 if (!containsPointerType(*stb)) {
176 continue;
177 }
178 return true;
179 }
180 break;
181 }
182 case Type::VectorTyID:
183 case Type::ArrayTyID:
184 return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
185 case Type::PointerTyID:
186 return true;
187 };
188 return false;
189 }
190
191 bool
192 AMDILPeepholeOpt::dumpAllIntoArena(Function &F)
193 {
194 bool dumpAll = false;
195 for (Function::const_arg_iterator cab = F.arg_begin(),
196 cae = F.arg_end(); cab != cae; ++cab) {
197 const Argument *arg = cab;
198 const PointerType *PT = dyn_cast<PointerType>(arg->getType());
199 if (!PT) {
200 continue;
201 }
202 Type *DereferencedType = PT->getElementType();
203 if (!dyn_cast<StructType>(DereferencedType)
204 ) {
205 continue;
206 }
207 if (!containsPointerType(DereferencedType)) {
208 continue;
209 }
210 // FIXME: Because a pointer inside of a struct/union may be aliased to
211 // another pointer we need to take the conservative approach and place all
212 // pointers into the arena until more advanced detection is implemented.
213 dumpAll = true;
214 }
215 return dumpAll;
216 }
217 void
218 AMDILPeepholeOpt::doIsConstCallConversionIfNeeded()
219 {
220 if (isConstVec.empty()) {
221 return;
222 }
223 for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
224 CallInst *CI = isConstVec[x];
225 Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
226 Type *aType = Type::getInt32Ty(*mCTX);
227 Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
228 : ConstantInt::get(aType, 0);
229 CI->replaceAllUsesWith(Val);
230 CI->eraseFromParent();
231 }
232 isConstVec.clear();
233 }
234 void
235 AMDILPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
236 {
237 // Don't do anything if we don't have any atomic operations.
238 if (atomicFuncs.empty()) {
239 return;
240 }
241 // Change the function name for the atomic if it is required
242 uint32_t size = atomicFuncs.size();
243 for (uint32_t x = 0; x < size; ++x) {
244 atomicFuncs[x].first->setOperand(
245 atomicFuncs[x].first->getNumOperands()-1,
246 atomicFuncs[x].second);
247
248 }
249 mChanged = true;
250 if (mConvertAtomics) {
251 return;
252 }
253 }
254
255 bool
256 AMDILPeepholeOpt::runOnFunction(Function &MF)
257 {
258 mChanged = false;
259 mF = &MF;
260 mSTM = &TM.getSubtarget<AMDILSubtarget>();
261 if (mDebug) {
262 MF.dump();
263 }
264 mCTX = &MF.getType()->getContext();
265 mConvertAtomics = true;
266 safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
267 std::bind1st(std::mem_fun(&AMDILPeepholeOpt::instLevelOptimizations),
268 this));
269
270 doAtomicConversionIfNeeded(MF);
271 doIsConstCallConversionIfNeeded();
272
273 if (mDebug) {
274 MF.dump();
275 }
276 return mChanged;
277 }
278
279 bool
280 AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
281 {
282 Instruction *inst = (*bbb);
283 CallInst *CI = dyn_cast<CallInst>(inst);
284 if (!CI) {
285 return false;
286 }
287 if (isSigned24BitOps(CI)) {
288 expandSigned24BitOps(CI);
289 ++(*bbb);
290 CI->eraseFromParent();
291 return true;
292 }
293 if (propagateSamplerInst(CI)) {
294 return false;
295 }
296 if (expandBFI(CI) || expandBFM(CI)) {
297 ++(*bbb);
298 CI->eraseFromParent();
299 return true;
300 }
301 if (convertAccurateDivide(CI)) {
302 expandAccurateDivide(CI);
303 ++(*bbb);
304 CI->eraseFromParent();
305 return true;
306 }
307
308 StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
309 if (calleeName.startswith("__amdil_is_constant")) {
310 // If we do not have optimizations, then this
311 // cannot be properly evaluated, so we add the
312 // call instruction to a vector and process
313 // them at the end of processing after the
314 // samplers have been correctly handled.
315 if (optLevel == CodeGenOpt::None) {
316 isConstVec.push_back(CI);
317 return false;
318 } else {
319 Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
320 Type *aType = Type::getInt32Ty(*mCTX);
321 Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
322 : ConstantInt::get(aType, 0);
323 CI->replaceAllUsesWith(Val);
324 ++(*bbb);
325 CI->eraseFromParent();
326 return true;
327 }
328 }
329
330 if (calleeName.equals("__amdil_is_asic_id_i32")) {
331 ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
332 Type *aType = Type::getInt32Ty(*mCTX);
333 Value *Val = CV;
334 if (Val) {
335 Val = ConstantInt::get(aType,
336 mSTM->device()->getDeviceFlag() & CV->getZExtValue());
337 } else {
338 Val = ConstantInt::get(aType, 0);
339 }
340 CI->replaceAllUsesWith(Val);
341 ++(*bbb);
342 CI->eraseFromParent();
343 return true;
344 }
345 Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
346 if (!F) {
347 return false;
348 }
349 if (F->getName().startswith("__atom") && !CI->getNumUses()
350 && F->getName().find("_xchg") == StringRef::npos) {
351 std::string buffer(F->getName().str() + "_noret");
352 F = dyn_cast<Function>(
353 F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
354 atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
355 }
356
357 if (!mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
358 && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
359 return false;
360 }
361 if (!mConvertAtomics) {
362 return false;
363 }
364 StringRef name = F->getName();
365 if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
366 mConvertAtomics = false;
367 }
368 return false;
369 }
370
371 bool
372 AMDILPeepholeOpt::setupBitInsert(Instruction *base,
373 Instruction *&src,
374 Constant *&mask,
375 Constant *&shift)
376 {
377 if (!base) {
378 if (mDebug) {
379 dbgs() << "Null pointer passed into function.\n";
380 }
381 return false;
382 }
383 bool andOp = false;
384 if (base->getOpcode() == Instruction::Shl) {
385 shift = dyn_cast<Constant>(base->getOperand(1));
386 } else if (base->getOpcode() == Instruction::And) {
387 mask = dyn_cast<Constant>(base->getOperand(1));
388 andOp = true;
389 } else {
390 if (mDebug) {
391 dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
392 }
393 // If the base is neither a Shl or a And, we don't fit any of the patterns above.
394 return false;
395 }
396 src = dyn_cast<Instruction>(base->getOperand(0));
397 if (!src) {
398 if (mDebug) {
399 dbgs() << "Failed setup since the base operand is not an instruction!\n";
400 }
401 return false;
402 }
403 // If we find an 'and' operation, then we don't need to
404 // find the next operation as we already know the
405 // bits that are valid at this point.
406 if (andOp) {
407 return true;
408 }
409 if (src->getOpcode() == Instruction::Shl && !shift) {
410 shift = dyn_cast<Constant>(src->getOperand(1));
411 src = dyn_cast<Instruction>(src->getOperand(0));
412 } else if (src->getOpcode() == Instruction::And && !mask) {
413 mask = dyn_cast<Constant>(src->getOperand(1));
414 }
415 if (!mask && !shift) {
416 if (mDebug) {
417 dbgs() << "Failed setup since both mask and shift are NULL!\n";
418 }
419 // Did not find a constant mask or a shift.
420 return false;
421 }
422 return true;
423 }
424 bool
425 AMDILPeepholeOpt::optimizeBitInsert(Instruction *inst)
426 {
427 if (!inst) {
428 return false;
429 }
430 if (!inst->isBinaryOp()) {
431 return false;
432 }
433 if (inst->getOpcode() != Instruction::Or) {
434 return false;
435 }
436 if (optLevel == CodeGenOpt::None) {
437 return false;
438 }
439 // We want to do an optimization on a sequence of ops that in the end equals a
440 // single ISA instruction.
441 // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
442 // Some simplified versions of this pattern are as follows:
443 // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
444 // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
445 // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
446 // (A & B) | (D << F) when (1 << F) >= B
447 // (A << C) | (D & E) when (1 << C) >= E
448 if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
449 // The HD4XXX hardware doesn't support the ubit_insert instruction.
450 return false;
451 }
452 Type *aType = inst->getType();
453 bool isVector = aType->isVectorTy();
454 int numEle = 1;
455 // This optimization only works on 32bit integers.
456 if (aType->getScalarType()
457 != Type::getInt32Ty(inst->getContext())) {
458 return false;
459 }
460 if (isVector) {
461 const VectorType *VT = dyn_cast<VectorType>(aType);
462 numEle = VT->getNumElements();
463 // We currently cannot support more than 4 elements in a intrinsic and we
464 // cannot support Vec3 types.
465 if (numEle > 4 || numEle == 3) {
466 return false;
467 }
468 }
469 // TODO: Handle vectors.
470 if (isVector) {
471 if (mDebug) {
472 dbgs() << "!!! Vectors are not supported yet!\n";
473 }
474 return false;
475 }
476 Instruction *LHSSrc = NULL, *RHSSrc = NULL;
477 Constant *LHSMask = NULL, *RHSMask = NULL;
478 Constant *LHSShift = NULL, *RHSShift = NULL;
479 Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
480 Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
481 if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
482 if (mDebug) {
483 dbgs() << "Found an OR Operation that failed setup!\n";
484 inst->dump();
485 if (LHS) { LHS->dump(); }
486 if (LHSSrc) { LHSSrc->dump(); }
487 if (LHSMask) { LHSMask->dump(); }
488 if (LHSShift) { LHSShift->dump(); }
489 }
490 // There was an issue with the setup for BitInsert.
491 return false;
492 }
493 if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
494 if (mDebug) {
495 dbgs() << "Found an OR Operation that failed setup!\n";
496 inst->dump();
497 if (RHS) { RHS->dump(); }
498 if (RHSSrc) { RHSSrc->dump(); }
499 if (RHSMask) { RHSMask->dump(); }
500 if (RHSShift) { RHSShift->dump(); }
501 }
502 // There was an issue with the setup for BitInsert.
503 return false;
504 }
505 if (mDebug) {
506 dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
507 dbgs() << "Op: "; inst->dump();
508 dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
509 dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
510 dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
511 dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
512 dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
513 dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
514 dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
515 dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
516 }
517 Constant *offset = NULL;
518 Constant *width = NULL;
519 int32_t lhsMaskVal = 0, rhsMaskVal = 0;
520 int32_t lhsShiftVal = 0, rhsShiftVal = 0;
521 int32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
522 int32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
523 lhsMaskVal = (int32_t)(LHSMask
524 ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
525 rhsMaskVal = (int32_t)(RHSMask
526 ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
527 lhsShiftVal = (int32_t)(LHSShift
528 ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
529 rhsShiftVal = (int32_t)(RHSShift
530 ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
531 lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
532 rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
533 lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
534 rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
535 // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
536 if (mDebug) {
537 dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")");
538 dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ;
539 dbgs() << (RHSMask ? " & E)" : ")");
540 dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n");
541 dbgs() << "A = LHSSrc\t\tD = RHSSrc \n";
542 dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n";
543 dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n";
544 dbgs() << "width(B) = " << lhsMaskWidth;
545 dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n";
546 dbgs() << "offset(B) = " << lhsMaskOffset;
547 dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n";
548 dbgs() << "Constraints: \n";
549 dbgs() << "\t(1) B ^ E == 0\n";
550 dbgs() << "\t(2-LHS) B is a mask\n";
551 dbgs() << "\t(2-LHS) E is a mask\n";
552 dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n";
553 dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n";
554 }
555 if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
556 if (mDebug) {
557 dbgs() << lhsMaskVal << " ^ " << rhsMaskVal;
558 dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n";
559 dbgs() << "Failed constraint 1!\n";
560 }
561 return false;
562 }
563 if (mDebug) {
564 dbgs() << "LHS = " << lhsMaskOffset << "";
565 dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = ";
566 dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset));
567 dbgs() << "\nRHS = " << rhsMaskOffset << "";
568 dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = ";
569 dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset));
570 dbgs() << "\n";
571 }
572 if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
573 offset = ConstantInt::get(aType, lhsMaskOffset, false);
574 width = ConstantInt::get(aType, lhsMaskWidth, false);
575 RHSSrc = RHS;
576 if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
577 if (mDebug) {
578 dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n";
579 dbgs() << "Failed constraint 2!\n";
580 }
581 return false;
582 }
583 if (!LHSShift) {
584 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
585 "MaskShr", LHS);
586 } else if (lhsShiftVal != lhsMaskOffset) {
587 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
588 "MaskShr", LHS);
589 }
590 if (mDebug) {
591 dbgs() << "Optimizing LHS!\n";
592 }
593 } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
594 offset = ConstantInt::get(aType, rhsMaskOffset, false);
595 width = ConstantInt::get(aType, rhsMaskWidth, false);
596 LHSSrc = RHSSrc;
597 RHSSrc = LHS;
598 if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
599 if (mDebug) {
600 dbgs() << "Non-Mask: " << rhsMaskVal << "\n";
601 dbgs() << "Failed constraint 2!\n";
602 }
603 return false;
604 }
605 if (!RHSShift) {
606 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
607 "MaskShr", RHS);
608 } else if (rhsShiftVal != rhsMaskOffset) {
609 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
610 "MaskShr", RHS);
611 }
612 if (mDebug) {
613 dbgs() << "Optimizing RHS!\n";
614 }
615 } else {
616 if (mDebug) {
617 dbgs() << "Failed constraint 3!\n";
618 }
619 return false;
620 }
621 if (mDebug) {
622 dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
623 dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
624 dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
625 dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
626 }
627 if (!offset || !width) {
628 if (mDebug) {
629 dbgs() << "Either width or offset are NULL, failed detection!\n";
630 }
631 return false;
632 }
633 // Lets create the function signature.
634 std::vector<Type *> callTypes;
635 callTypes.push_back(aType);
636 callTypes.push_back(aType);
637 callTypes.push_back(aType);
638 callTypes.push_back(aType);
639 FunctionType *funcType = FunctionType::get(aType, callTypes, false);
640 std::string name = "__amdil_ubit_insert";
641 if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
642 Function *Func =
643 dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
644 getOrInsertFunction(llvm::StringRef(name), funcType));
645 Value *Operands[4] = {
646 width,
647 offset,
648 LHSSrc,
649 RHSSrc
650 };
651 CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
652 if (mDebug) {
653 dbgs() << "Old Inst: ";
654 inst->dump();
655 dbgs() << "New Inst: ";
656 CI->dump();
657 dbgs() << "\n\n";
658 }
659 CI->insertBefore(inst);
660 inst->replaceAllUsesWith(CI);
661 return true;
662 }
663
664 bool
665 AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
666 {
667 if (!inst) {
668 return false;
669 }
670 if (!inst->isBinaryOp()) {
671 return false;
672 }
673 if (inst->getOpcode() != Instruction::And) {
674 return false;
675 }
676 if (optLevel == CodeGenOpt::None) {
677 return false;
678 }
679 // We want to do some simple optimizations on Shift right/And patterns. The
680 // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
681 // value smaller than 32 and C is a mask. If C is a constant value, then the
682 // following transformation can occur. For signed integers, it turns into the
683 // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
684 // integers, it turns into the function call dst =
685 // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
686 // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
687 // Evergreen hardware.
688 if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
689 // This does not work on HD4XXX hardware.
690 return false;
691 }
692 Type *aType = inst->getType();
693 bool isVector = aType->isVectorTy();
694
695 // XXX Support vector types
696 if (isVector) {
697 return false;
698 }
699 int numEle = 1;
700 // This only works on 32bit integers
701 if (aType->getScalarType()
702 != Type::getInt32Ty(inst->getContext())) {
703 return false;
704 }
705 if (isVector) {
706 const VectorType *VT = dyn_cast<VectorType>(aType);
707 numEle = VT->getNumElements();
708 // We currently cannot support more than 4 elements in a intrinsic and we
709 // cannot support Vec3 types.
710 if (numEle > 4 || numEle == 3) {
711 return false;
712 }
713 }
714 BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
715 // If the first operand is not a shift instruction, then we can return as it
716 // doesn't match this pattern.
717 if (!ShiftInst || !ShiftInst->isShift()) {
718 return false;
719 }
720 // If we are a shift left, then we need don't match this pattern.
721 if (ShiftInst->getOpcode() == Instruction::Shl) {
722 return false;
723 }
724 bool isSigned = ShiftInst->isArithmeticShift();
725 Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
726 Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
727 // Lets make sure that the shift value and the and mask are constant integers.
728 if (!AndMask || !ShrVal) {
729 return false;
730 }
731 Constant *newMaskConst;
732 Constant *shiftValConst;
733 if (isVector) {
734 // Handle the vector case
735 std::vector<Constant *> maskVals;
736 std::vector<Constant *> shiftVals;
737 ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
738 ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
739 Type *scalarType = AndMaskVec->getType()->getScalarType();
740 assert(AndMaskVec->getNumOperands() ==
741 ShrValVec->getNumOperands() && "cannot have a "
742 "combination where the number of elements to a "
743 "shift and an and are different!");
744 for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
745 ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
746 ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
747 if (!AndCI || !ShiftIC) {
748 return false;
749 }
750 uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
751 if (!isMask_32(maskVal)) {
752 return false;
753 }
754 maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
755 uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
756 // If the mask or shiftval is greater than the bitcount, then break out.
757 if (maskVal >= 32 || shiftVal >= 32) {
758 return false;
759 }
760 // If the mask val is greater than the the number of original bits left
761 // then this optimization is invalid.
762 if (maskVal > (32 - shiftVal)) {
763 return false;
764 }
765 maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
766 shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
767 }
768 newMaskConst = ConstantVector::get(maskVals);
769 shiftValConst = ConstantVector::get(shiftVals);
770 } else {
771 // Handle the scalar case
772 uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
773 // This must be a mask value where all lower bits are set to 1 and then any
774 // bit higher is set to 0.
775 if (!isMask_32(maskVal)) {
776 return false;
777 }
778 maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
779 // Count the number of bits set in the mask, this is the width of the
780 // resulting bit set that is extracted from the source value.
781 uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
782 // If the mask or shift val is greater than the bitcount, then break out.
783 if (maskVal >= 32 || shiftVal >= 32) {
784 return false;
785 }
786 // If the mask val is greater than the the number of original bits left then
787 // this optimization is invalid.
788 if (maskVal > (32 - shiftVal)) {
789 return false;
790 }
791 newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
792 shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
793 }
794 // Lets create the function signature.
795 std::vector<Type *> callTypes;
796 callTypes.push_back(aType);
797 callTypes.push_back(aType);
798 callTypes.push_back(aType);
799 FunctionType *funcType = FunctionType::get(aType, callTypes, false);
800 std::string name = "llvm.AMDIL.bit.extract.u32";
801 if (isVector) {
802 name += ".v" + itostr(numEle) + "i32";
803 } else {
804 name += ".";
805 }
806 // Lets create the function.
807 Function *Func =
808 dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
809 getOrInsertFunction(llvm::StringRef(name), funcType));
810 Value *Operands[3] = {
811 ShiftInst->getOperand(0),
812 shiftValConst,
813 newMaskConst
814 };
815 // Lets create the Call with the operands
816 CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
817 CI->setDoesNotAccessMemory();
818 CI->insertBefore(inst);
819 inst->replaceAllUsesWith(CI);
820 return true;
821 }
822
823 bool
824 AMDILPeepholeOpt::expandBFI(CallInst *CI)
825 {
826 if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) {
827 return false;
828 }
829 Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
830 if (!LHS->getName().startswith("__amdil_bfi")) {
831 return false;
832 }
833 Type* type = CI->getOperand(0)->getType();
834 Constant *negOneConst = NULL;
835 if (type->isVectorTy()) {
836 std::vector<Constant *> negOneVals;
837 negOneConst = ConstantInt::get(CI->getContext(),
838 APInt(32, StringRef("-1"), 10));
839 for (size_t x = 0,
840 y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
841 negOneVals.push_back(negOneConst);
842 }
843 negOneConst = ConstantVector::get(negOneVals);
844 } else {
845 negOneConst = ConstantInt::get(CI->getContext(),
846 APInt(32, StringRef("-1"), 10));
847 }
848 // __amdil_bfi => (A & B) | (~A & C)
849 BinaryOperator *lhs =
850 BinaryOperator::Create(Instruction::And, CI->getOperand(0),
851 CI->getOperand(1), "bfi_and", CI);
852 BinaryOperator *rhs =
853 BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
854 "bfi_not", CI);
855 rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
856 "bfi_and", CI);
857 lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
858 CI->replaceAllUsesWith(lhs);
859 return true;
860 }
861
862 bool
863 AMDILPeepholeOpt::expandBFM(CallInst *CI)
864 {
865 if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) {
866 return false;
867 }
868 Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
869 if (!LHS->getName().startswith("__amdil_bfm")) {
870 return false;
871 }
872 // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
873 Constant *newMaskConst = NULL;
874 Constant *newShiftConst = NULL;
875 Type* type = CI->getOperand(0)->getType();
876 if (type->isVectorTy()) {
877 std::vector<Constant*> newMaskVals, newShiftVals;
878 newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
879 newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
880 for (size_t x = 0,
881 y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
882 newMaskVals.push_back(newMaskConst);
883 newShiftVals.push_back(newShiftConst);
884 }
885 newMaskConst = ConstantVector::get(newMaskVals);
886 newShiftConst = ConstantVector::get(newShiftVals);
887 } else {
888 newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
889 newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
890 }
891 BinaryOperator *lhs =
892 BinaryOperator::Create(Instruction::And, CI->getOperand(0),
893 newMaskConst, "bfm_mask", CI);
894 lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
895 lhs, "bfm_shl", CI);
896 lhs = BinaryOperator::Create(Instruction::Sub, lhs,
897 newShiftConst, "bfm_sub", CI);
898 BinaryOperator *rhs =
899 BinaryOperator::Create(Instruction::And, CI->getOperand(1),
900 newMaskConst, "bfm_mask", CI);
901 lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
902 CI->replaceAllUsesWith(lhs);
903 return true;
904 }
905
906 bool
907 AMDILPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)
908 {
909 Instruction *inst = (*bbb);
910 if (optimizeCallInst(bbb)) {
911 return true;
912 }
913 if (optimizeBitExtract(inst)) {
914 return false;
915 }
916 if (optimizeBitInsert(inst)) {
917 return false;
918 }
919 if (correctMisalignedMemOp(inst)) {
920 return false;
921 }
922 return false;
923 }
924 bool
925 AMDILPeepholeOpt::correctMisalignedMemOp(Instruction *inst)
926 {
927 LoadInst *linst = dyn_cast<LoadInst>(inst);
928 StoreInst *sinst = dyn_cast<StoreInst>(inst);
929 unsigned alignment;
930 Type* Ty = inst->getType();
931 if (linst) {
932 alignment = linst->getAlignment();
933 Ty = inst->getType();
934 } else if (sinst) {
935 alignment = sinst->getAlignment();
936 Ty = sinst->getValueOperand()->getType();
937 } else {
938 return false;
939 }
940 unsigned size = getTypeSize(Ty);
941 if (size == alignment || size < alignment) {
942 return false;
943 }
944 if (!Ty->isStructTy()) {
945 return false;
946 }
947 if (alignment < 4) {
948 if (linst) {
949 linst->setAlignment(0);
950 return true;
951 } else if (sinst) {
952 sinst->setAlignment(0);
953 return true;
954 }
955 }
956 return false;
957 }
958 bool
959 AMDILPeepholeOpt::isSigned24BitOps(CallInst *CI)
960 {
961 if (!CI) {
962 return false;
963 }
964 Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
965 std::string namePrefix = LHS->getName().substr(0, 14);
966 if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
967 && namePrefix != "__amdil__imul24_high") {
968 return false;
969 }
970 if (mSTM->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
971 return false;
972 }
973 return true;
974 }
975
976 void
977 AMDILPeepholeOpt::expandSigned24BitOps(CallInst *CI)
978 {
979 assert(isSigned24BitOps(CI) && "Must be a "
980 "signed 24 bit operation to call this function!");
981 Value *LHS = CI->getOperand(CI->getNumOperands()-1);
982 // On 7XX and 8XX we do not have signed 24bit, so we need to
983 // expand it to the following:
984 // imul24 turns into 32bit imul
985 // imad24 turns into 32bit imad
986 // imul24_high turns into 32bit imulhigh
987 if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
988 Type *aType = CI->getOperand(0)->getType();
989 bool isVector = aType->isVectorTy();
990 int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
991 std::vector<Type*> callTypes;
992 callTypes.push_back(CI->getOperand(0)->getType());
993 callTypes.push_back(CI->getOperand(1)->getType());
994 callTypes.push_back(CI->getOperand(2)->getType());
995 FunctionType *funcType =
996 FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
997 std::string name = "__amdil_imad";
998 if (isVector) {
999 name += "_v" + itostr(numEle) + "i32";
1000 } else {
1001 name += "_i32";
1002 }
1003 Function *Func = dyn_cast<Function>(
1004 CI->getParent()->getParent()->getParent()->
1005 getOrInsertFunction(llvm::StringRef(name), funcType));
1006 Value *Operands[3] = {
1007 CI->getOperand(0),
1008 CI->getOperand(1),
1009 CI->getOperand(2)
1010 };
1011 CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
1012 nCI->insertBefore(CI);
1013 CI->replaceAllUsesWith(nCI);
1014 } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
1015 BinaryOperator *mulOp =
1016 BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
1017 CI->getOperand(1), "imul24", CI);
1018 CI->replaceAllUsesWith(mulOp);
1019 } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
1020 Type *aType = CI->getOperand(0)->getType();
1021
1022 bool isVector = aType->isVectorTy();
1023 int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
1024 std::vector<Type*> callTypes;
1025 callTypes.push_back(CI->getOperand(0)->getType());
1026 callTypes.push_back(CI->getOperand(1)->getType());
1027 FunctionType *funcType =
1028 FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
1029 std::string name = "__amdil_imul_high";
1030 if (isVector) {
1031 name += "_v" + itostr(numEle) + "i32";
1032 } else {
1033 name += "_i32";
1034 }
1035 Function *Func = dyn_cast<Function>(
1036 CI->getParent()->getParent()->getParent()->
1037 getOrInsertFunction(llvm::StringRef(name), funcType));
1038 Value *Operands[2] = {
1039 CI->getOperand(0),
1040 CI->getOperand(1)
1041 };
1042 CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
1043 nCI->insertBefore(CI);
1044 CI->replaceAllUsesWith(nCI);
1045 }
1046 }
1047
1048 bool
1049 AMDILPeepholeOpt::isRWGLocalOpt(CallInst *CI)
1050 {
1051 return (CI != NULL
1052 && CI->getOperand(CI->getNumOperands() - 1)->getName()
1053 == "__amdil_get_local_size_int");
1054 }
1055
1056 bool
1057 AMDILPeepholeOpt::convertAccurateDivide(CallInst *CI)
1058 {
1059 if (!CI) {
1060 return false;
1061 }
1062 if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX
1063 && (mSTM->getDeviceName() == "cayman")) {
1064 return false;
1065 }
1066 return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
1067 == "__amdil_improved_div";
1068 }
1069
1070 void
1071 AMDILPeepholeOpt::expandAccurateDivide(CallInst *CI)
1072 {
1073 assert(convertAccurateDivide(CI)
1074 && "expanding accurate divide can only happen if it is expandable!");
1075 BinaryOperator *divOp =
1076 BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
1077 CI->getOperand(1), "fdiv32", CI);
1078 CI->replaceAllUsesWith(divOp);
1079 }
1080
1081 bool
1082 AMDILPeepholeOpt::propagateSamplerInst(CallInst *CI)
1083 {
1084 if (optLevel != CodeGenOpt::None) {
1085 return false;
1086 }
1087
1088 if (!CI) {
1089 return false;
1090 }
1091
1092 unsigned funcNameIdx = 0;
1093 funcNameIdx = CI->getNumOperands() - 1;
1094 StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
1095 if (calleeName != "__amdil_image2d_read_norm"
1096 && calleeName != "__amdil_image2d_read_unnorm"
1097 && calleeName != "__amdil_image3d_read_norm"
1098 && calleeName != "__amdil_image3d_read_unnorm") {
1099 return false;
1100 }
1101
1102 unsigned samplerIdx = 2;
1103 samplerIdx = 1;
1104 Value *sampler = CI->getOperand(samplerIdx);
1105 LoadInst *lInst = dyn_cast<LoadInst>(sampler);
1106 if (!lInst) {
1107 return false;
1108 }
1109
1110 if (lInst->getPointerAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
1111 return false;
1112 }
1113
1114 GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
1115 // If we are loading from what is not a global value, then we
1116 // fail and return.
1117 if (!gv) {
1118 return false;
1119 }
1120
1121 // If we don't have an initializer or we have an initializer and
1122 // the initializer is not a 32bit integer, we fail.
1123 if (!gv->hasInitializer()
1124 || !gv->getInitializer()->getType()->isIntegerTy(32)) {
1125 return false;
1126 }
1127
1128 // Now that we have the global variable initializer, lets replace
1129 // all uses of the load instruction with the samplerVal and
1130 // reparse the __amdil_is_constant() function.
1131 Constant *samplerVal = gv->getInitializer();
1132 lInst->replaceAllUsesWith(samplerVal);
1133 return true;
1134 }
1135
1136 bool
1137 AMDILPeepholeOpt::doInitialization(Module &M)
1138 {
1139 return false;
1140 }
1141
1142 bool
1143 AMDILPeepholeOpt::doFinalization(Module &M)
1144 {
1145 return false;
1146 }
1147
1148 void
1149 AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
1150 {
1151 AU.addRequired<MachineFunctionAnalysis>();
1152 FunctionPass::getAnalysisUsage(AU);
1153 AU.setPreservesAll();
1154 }
1155
1156 size_t AMDILPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
1157 size_t size = 0;
1158 if (!T) {
1159 return size;
1160 }
1161 switch (T->getTypeID()) {
1162 case Type::X86_FP80TyID:
1163 case Type::FP128TyID:
1164 case Type::PPC_FP128TyID:
1165 case Type::LabelTyID:
1166 assert(0 && "These types are not supported by this backend");
1167 default:
1168 case Type::FloatTyID:
1169 case Type::DoubleTyID:
1170 size = T->getPrimitiveSizeInBits() >> 3;
1171 break;
1172 case Type::PointerTyID:
1173 size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
1174 break;
1175 case Type::IntegerTyID:
1176 size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
1177 break;
1178 case Type::StructTyID:
1179 size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
1180 break;
1181 case Type::ArrayTyID:
1182 size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
1183 break;
1184 case Type::FunctionTyID:
1185 size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
1186 break;
1187 case Type::VectorTyID:
1188 size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
1189 break;
1190 };
1191 return size;
1192 }
1193
1194 size_t AMDILPeepholeOpt::getTypeSize(StructType * const ST,
1195 bool dereferencePtr) {
1196 size_t size = 0;
1197 if (!ST) {
1198 return size;
1199 }
1200 Type *curType;
1201 StructType::element_iterator eib;
1202 StructType::element_iterator eie;
1203 for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
1204 curType = *eib;
1205 size += getTypeSize(curType, dereferencePtr);
1206 }
1207 return size;
1208 }
1209
1210 size_t AMDILPeepholeOpt::getTypeSize(IntegerType * const IT,
1211 bool dereferencePtr) {
1212 return IT ? (IT->getBitWidth() >> 3) : 0;
1213 }
1214
1215 size_t AMDILPeepholeOpt::getTypeSize(FunctionType * const FT,
1216 bool dereferencePtr) {
1217 assert(0 && "Should not be able to calculate the size of an function type");
1218 return 0;
1219 }
1220
1221 size_t AMDILPeepholeOpt::getTypeSize(ArrayType * const AT,
1222 bool dereferencePtr) {
1223 return (size_t)(AT ? (getTypeSize(AT->getElementType(),
1224 dereferencePtr) * AT->getNumElements())
1225 : 0);
1226 }
1227
1228 size_t AMDILPeepholeOpt::getTypeSize(VectorType * const VT,
1229 bool dereferencePtr) {
1230 return VT ? (VT->getBitWidth() >> 3) : 0;
1231 }
1232
1233 size_t AMDILPeepholeOpt::getTypeSize(PointerType * const PT,
1234 bool dereferencePtr) {
1235 if (!PT) {
1236 return 0;
1237 }
1238 Type *CT = PT->getElementType();
1239 if (CT->getTypeID() == Type::StructTyID &&
1240 PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
1241 return getTypeSize(dyn_cast<StructType>(CT));
1242 } else if (dereferencePtr) {
1243 size_t size = 0;
1244 for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
1245 size += getTypeSize(PT->getContainedType(x), dereferencePtr);
1246 }
1247 return size;
1248 } else {
1249 return 4;
1250 }
1251 }
1252
1253 size_t AMDILPeepholeOpt::getTypeSize(OpaqueType * const OT,
1254 bool dereferencePtr) {
1255 //assert(0 && "Should not be able to calculate the size of an opaque type");
1256 return 4;
1257 }