Merge branch 'gallium-userbuf'
[mesa.git] / src / gallium / drivers / radeon / AMDILPeepholeOptimizer.cpp
1 //===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9
10 #include "AMDILAlgorithms.tpp"
11 #include "AMDILDevices.h"
12 #include "AMDILInstrInfo.h"
13 #include "llvm/ADT/Statistic.h"
14 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/Constants.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
20 #include "llvm/Function.h"
21 #include "llvm/Instructions.h"
22 #include "llvm/Module.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25
26 #include <sstream>
27
28 #if 0
29 STATISTIC(PointerAssignments, "Number of dynamic pointer "
30 "assigments discovered");
31 STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
32 #endif
33
34 using namespace llvm;
35 // The Peephole optimization pass is used to do simple last minute optimizations
36 // that are required for correct code or to remove redundant functions
37 namespace {
38
39 class OpaqueType;
40
41 class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass {
42 public:
43 TargetMachine &TM;
44 static char ID;
45 AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
46 ~AMDILPeepholeOpt();
47 const char *getPassName() const;
48 bool runOnFunction(Function &F);
49 bool doInitialization(Module &M);
50 bool doFinalization(Module &M);
51 void getAnalysisUsage(AnalysisUsage &AU) const;
52 protected:
53 private:
54 // Function to initiate all of the instruction level optimizations.
55 bool instLevelOptimizations(BasicBlock::iterator *inst);
56 // Quick check to see if we need to dump all of the pointers into the
57 // arena. If this is correct, then we set all pointers to exist in arena. This
58 // is a workaround for aliasing of pointers in a struct/union.
59 bool dumpAllIntoArena(Function &F);
60 // Because I don't want to invalidate any pointers while in the
61 // safeNestedForEachFunction. I push atomic conversions to a vector and handle
62 // it later. This function does the conversions if required.
63 void doAtomicConversionIfNeeded(Function &F);
64 // Because __amdil_is_constant cannot be properly evaluated if
65 // optimizations are disabled, the call's are placed in a vector
66 // and evaluated after the __amdil_image* functions are evaluated
67 // which should allow the __amdil_is_constant function to be
68 // evaluated correctly.
69 void doIsConstCallConversionIfNeeded();
70 bool mChanged;
71 bool mDebug;
72 bool mConvertAtomics;
73 CodeGenOpt::Level optLevel;
74 // Run a series of tests to see if we can optimize a CALL instruction.
75 bool optimizeCallInst(BasicBlock::iterator *bbb);
76 // A peephole optimization to optimize bit extract sequences.
77 bool optimizeBitExtract(Instruction *inst);
78 // A peephole optimization to optimize bit insert sequences.
79 bool optimizeBitInsert(Instruction *inst);
80 bool setupBitInsert(Instruction *base,
81 Instruction *&src,
82 Constant *&mask,
83 Constant *&shift);
84 // Expand the bit field insert instruction on versions of OpenCL that
85 // don't support it.
86 bool expandBFI(CallInst *CI);
87 // Expand the bit field mask instruction on version of OpenCL that
88 // don't support it.
89 bool expandBFM(CallInst *CI);
90 // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
91 // this case we need to expand them. These functions check for 24bit functions
92 // and then expand.
93 bool isSigned24BitOps(CallInst *CI);
94 void expandSigned24BitOps(CallInst *CI);
95 // One optimization that can occur is that if the required workgroup size is
96 // specified then the result of get_local_size is known at compile time and
97 // can be returned accordingly.
98 bool isRWGLocalOpt(CallInst *CI);
99 // On northern island cards, the division is slightly less accurate than on
100 // previous generations, so we need to utilize a more accurate division. So we
101 // can translate the accurate divide to a normal divide on all other cards.
102 bool convertAccurateDivide(CallInst *CI);
103 void expandAccurateDivide(CallInst *CI);
104 // If the alignment is set incorrectly, it can produce really inefficient
105 // code. This checks for this scenario and fixes it if possible.
106 bool correctMisalignedMemOp(Instruction *inst);
107
108 // If we are in no opt mode, then we need to make sure that
109 // local samplers are properly propagated as constant propagation
110 // doesn't occur and we need to know the value of kernel defined
111 // samplers at compile time.
112 bool propagateSamplerInst(CallInst *CI);
113
114 // Helper functions
115
116 // Group of functions that recursively calculate the size of a structure based
117 // on it's sub-types.
118 size_t getTypeSize(Type * const T, bool dereferencePtr = false);
119 size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
120 size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
121 size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
122 size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
123 size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
124 size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
125 size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
126
127 LLVMContext *mCTX;
128 Function *mF;
129 const AMDILSubtarget *mSTM;
130 SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
131 SmallVector<CallInst *, 16> isConstVec;
132 }; // class AMDILPeepholeOpt
133 char AMDILPeepholeOpt::ID = 0;
134 } // anonymous namespace
135
136 namespace llvm {
137 FunctionPass *
138 createAMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
139 {
140 return new AMDILPeepholeOpt(tm AMDIL_OPT_LEVEL_VAR);
141 }
142 } // llvm namespace
143
144 AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
145 : FunctionPass(ID), TM(tm)
146 {
147 mDebug = false;
148 optLevel = TM.getOptLevel();
149
150 }
151
152 AMDILPeepholeOpt::~AMDILPeepholeOpt()
153 {
154 }
155
156 const char *
157 AMDILPeepholeOpt::getPassName() const
158 {
159 return "AMDIL PeepHole Optimization Pass";
160 }
161
162 bool
163 containsPointerType(Type *Ty)
164 {
165 if (!Ty) {
166 return false;
167 }
168 switch(Ty->getTypeID()) {
169 default:
170 return false;
171 case Type::StructTyID: {
172 const StructType *ST = dyn_cast<StructType>(Ty);
173 for (StructType::element_iterator stb = ST->element_begin(),
174 ste = ST->element_end(); stb != ste; ++stb) {
175 if (!containsPointerType(*stb)) {
176 continue;
177 }
178 return true;
179 }
180 break;
181 }
182 case Type::VectorTyID:
183 case Type::ArrayTyID:
184 return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
185 case Type::PointerTyID:
186 return true;
187 };
188 return false;
189 }
190
191 bool
192 AMDILPeepholeOpt::dumpAllIntoArena(Function &F)
193 {
194 bool dumpAll = false;
195 for (Function::const_arg_iterator cab = F.arg_begin(),
196 cae = F.arg_end(); cab != cae; ++cab) {
197 const Argument *arg = cab;
198 const PointerType *PT = dyn_cast<PointerType>(arg->getType());
199 if (!PT) {
200 continue;
201 }
202 Type *DereferencedType = PT->getElementType();
203 if (!dyn_cast<StructType>(DereferencedType)
204 ) {
205 continue;
206 }
207 if (!containsPointerType(DereferencedType)) {
208 continue;
209 }
210 // FIXME: Because a pointer inside of a struct/union may be aliased to
211 // another pointer we need to take the conservative approach and place all
212 // pointers into the arena until more advanced detection is implemented.
213 dumpAll = true;
214 }
215 return dumpAll;
216 }
217 void
218 AMDILPeepholeOpt::doIsConstCallConversionIfNeeded()
219 {
220 if (isConstVec.empty()) {
221 return;
222 }
223 for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
224 CallInst *CI = isConstVec[x];
225 Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
226 Type *aType = Type::getInt32Ty(*mCTX);
227 Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
228 : ConstantInt::get(aType, 0);
229 CI->replaceAllUsesWith(Val);
230 CI->eraseFromParent();
231 }
232 isConstVec.clear();
233 }
234 void
235 AMDILPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
236 {
237 // Don't do anything if we don't have any atomic operations.
238 if (atomicFuncs.empty()) {
239 return;
240 }
241 // Change the function name for the atomic if it is required
242 uint32_t size = atomicFuncs.size();
243 for (uint32_t x = 0; x < size; ++x) {
244 atomicFuncs[x].first->setOperand(
245 atomicFuncs[x].first->getNumOperands()-1,
246 atomicFuncs[x].second);
247
248 }
249 mChanged = true;
250 if (mConvertAtomics) {
251 return;
252 }
253 }
254
255 bool
256 AMDILPeepholeOpt::runOnFunction(Function &MF)
257 {
258 mChanged = false;
259 mF = &MF;
260 mSTM = &TM.getSubtarget<AMDILSubtarget>();
261 if (mDebug) {
262 MF.dump();
263 }
264 mCTX = &MF.getType()->getContext();
265 mConvertAtomics = true;
266 safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
267 std::bind1st(std::mem_fun(&AMDILPeepholeOpt::instLevelOptimizations),
268 this));
269
270 doAtomicConversionIfNeeded(MF);
271 doIsConstCallConversionIfNeeded();
272
273 if (mDebug) {
274 MF.dump();
275 }
276 return mChanged;
277 }
278
279 bool
280 AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
281 {
282 Instruction *inst = (*bbb);
283 CallInst *CI = dyn_cast<CallInst>(inst);
284 if (!CI) {
285 return false;
286 }
287 if (isSigned24BitOps(CI)) {
288 expandSigned24BitOps(CI);
289 ++(*bbb);
290 CI->eraseFromParent();
291 return true;
292 }
293 if (propagateSamplerInst(CI)) {
294 return false;
295 }
296 if (expandBFI(CI) || expandBFM(CI)) {
297 ++(*bbb);
298 CI->eraseFromParent();
299 return true;
300 }
301 if (convertAccurateDivide(CI)) {
302 expandAccurateDivide(CI);
303 ++(*bbb);
304 CI->eraseFromParent();
305 return true;
306 }
307
308 StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
309 if (calleeName.startswith("__amdil_is_constant")) {
310 // If we do not have optimizations, then this
311 // cannot be properly evaluated, so we add the
312 // call instruction to a vector and process
313 // them at the end of processing after the
314 // samplers have been correctly handled.
315 if (optLevel == CodeGenOpt::None) {
316 isConstVec.push_back(CI);
317 return false;
318 } else {
319 Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
320 Type *aType = Type::getInt32Ty(*mCTX);
321 Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
322 : ConstantInt::get(aType, 0);
323 CI->replaceAllUsesWith(Val);
324 ++(*bbb);
325 CI->eraseFromParent();
326 return true;
327 }
328 }
329
330 if (calleeName.equals("__amdil_is_asic_id_i32")) {
331 ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
332 Type *aType = Type::getInt32Ty(*mCTX);
333 Value *Val = CV;
334 if (Val) {
335 Val = ConstantInt::get(aType,
336 mSTM->device()->getDeviceFlag() & CV->getZExtValue());
337 } else {
338 Val = ConstantInt::get(aType, 0);
339 }
340 CI->replaceAllUsesWith(Val);
341 ++(*bbb);
342 CI->eraseFromParent();
343 return true;
344 }
345 Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
346 if (!F) {
347 return false;
348 }
349 if (F->getName().startswith("__atom") && !CI->getNumUses()
350 && F->getName().find("_xchg") == StringRef::npos) {
351 std::string buffer(F->getName().str() + "_noret");
352 F = dyn_cast<Function>(
353 F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
354 atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
355 }
356
357 if (!mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
358 && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
359 return false;
360 }
361 if (!mConvertAtomics) {
362 return false;
363 }
364 StringRef name = F->getName();
365 if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
366 mConvertAtomics = false;
367 }
368 return false;
369 }
370
371 bool
372 AMDILPeepholeOpt::setupBitInsert(Instruction *base,
373 Instruction *&src,
374 Constant *&mask,
375 Constant *&shift)
376 {
377 if (!base) {
378 if (mDebug) {
379 dbgs() << "Null pointer passed into function.\n";
380 }
381 return false;
382 }
383 bool andOp = false;
384 if (base->getOpcode() == Instruction::Shl) {
385 shift = dyn_cast<Constant>(base->getOperand(1));
386 } else if (base->getOpcode() == Instruction::And) {
387 mask = dyn_cast<Constant>(base->getOperand(1));
388 andOp = true;
389 } else {
390 if (mDebug) {
391 dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
392 }
393 // If the base is neither a Shl or a And, we don't fit any of the patterns above.
394 return false;
395 }
396 src = dyn_cast<Instruction>(base->getOperand(0));
397 if (!src) {
398 if (mDebug) {
399 dbgs() << "Failed setup since the base operand is not an instruction!\n";
400 }
401 return false;
402 }
403 // If we find an 'and' operation, then we don't need to
404 // find the next operation as we already know the
405 // bits that are valid at this point.
406 if (andOp) {
407 return true;
408 }
409 if (src->getOpcode() == Instruction::Shl && !shift) {
410 shift = dyn_cast<Constant>(src->getOperand(1));
411 src = dyn_cast<Instruction>(src->getOperand(0));
412 } else if (src->getOpcode() == Instruction::And && !mask) {
413 mask = dyn_cast<Constant>(src->getOperand(1));
414 }
415 if (!mask && !shift) {
416 if (mDebug) {
417 dbgs() << "Failed setup since both mask and shift are NULL!\n";
418 }
419 // Did not find a constant mask or a shift.
420 return false;
421 }
422 return true;
423 }
424 bool
425 AMDILPeepholeOpt::optimizeBitInsert(Instruction *inst)
426 {
427 if (!inst) {
428 return false;
429 }
430 if (!inst->isBinaryOp()) {
431 return false;
432 }
433 if (inst->getOpcode() != Instruction::Or) {
434 return false;
435 }
436 if (optLevel == CodeGenOpt::None) {
437 return false;
438 }
439 // We want to do an optimization on a sequence of ops that in the end equals a
440 // single ISA instruction.
441 // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
442 // Some simplified versions of this pattern are as follows:
443 // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
444 // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
445 // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
446 // (A & B) | (D << F) when (1 << F) >= B
447 // (A << C) | (D & E) when (1 << C) >= E
448 if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
449 // The HD4XXX hardware doesn't support the ubit_insert instruction.
450 return false;
451 }
452 Type *aType = inst->getType();
453 bool isVector = aType->isVectorTy();
454 int numEle = 1;
455 // This optimization only works on 32bit integers.
456 if (aType->getScalarType()
457 != Type::getInt32Ty(inst->getContext())) {
458 return false;
459 }
460 if (isVector) {
461 const VectorType *VT = dyn_cast<VectorType>(aType);
462 numEle = VT->getNumElements();
463 // We currently cannot support more than 4 elements in a intrinsic and we
464 // cannot support Vec3 types.
465 if (numEle > 4 || numEle == 3) {
466 return false;
467 }
468 }
469 // TODO: Handle vectors.
470 if (isVector) {
471 if (mDebug) {
472 dbgs() << "!!! Vectors are not supported yet!\n";
473 }
474 return false;
475 }
476 Instruction *LHSSrc = NULL, *RHSSrc = NULL;
477 Constant *LHSMask = NULL, *RHSMask = NULL;
478 Constant *LHSShift = NULL, *RHSShift = NULL;
479 Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
480 Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
481 if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
482 if (mDebug) {
483 dbgs() << "Found an OR Operation that failed setup!\n";
484 inst->dump();
485 if (LHS) { LHS->dump(); }
486 if (LHSSrc) { LHSSrc->dump(); }
487 if (LHSMask) { LHSMask->dump(); }
488 if (LHSShift) { LHSShift->dump(); }
489 }
490 // There was an issue with the setup for BitInsert.
491 return false;
492 }
493 if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
494 if (mDebug) {
495 dbgs() << "Found an OR Operation that failed setup!\n";
496 inst->dump();
497 if (RHS) { RHS->dump(); }
498 if (RHSSrc) { RHSSrc->dump(); }
499 if (RHSMask) { RHSMask->dump(); }
500 if (RHSShift) { RHSShift->dump(); }
501 }
502 // There was an issue with the setup for BitInsert.
503 return false;
504 }
505 if (mDebug) {
506 dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
507 dbgs() << "Op: "; inst->dump();
508 dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
509 dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
510 dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
511 dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
512 dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
513 dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
514 dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
515 dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
516 }
517 Constant *offset = NULL;
518 Constant *width = NULL;
519 int32_t lhsMaskVal = 0, rhsMaskVal = 0;
520 int32_t lhsShiftVal = 0, rhsShiftVal = 0;
521 int32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
522 int32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
523 lhsMaskVal = (int32_t)(LHSMask
524 ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
525 rhsMaskVal = (int32_t)(RHSMask
526 ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
527 lhsShiftVal = (int32_t)(LHSShift
528 ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
529 rhsShiftVal = (int32_t)(RHSShift
530 ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
531 lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
532 rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
533 lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
534 rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
535 // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
536 if (mDebug) {
537 dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")");
538 dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ;
539 dbgs() << (RHSMask ? " & E)" : ")");
540 dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n");
541 dbgs() << "A = LHSSrc\t\tD = RHSSrc \n";
542 dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n";
543 dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n";
544 dbgs() << "width(B) = " << lhsMaskWidth;
545 dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n";
546 dbgs() << "offset(B) = " << lhsMaskOffset;
547 dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n";
548 dbgs() << "Constraints: \n";
549 dbgs() << "\t(1) B ^ E == 0\n";
550 dbgs() << "\t(2-LHS) B is a mask\n";
551 dbgs() << "\t(2-LHS) E is a mask\n";
552 dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n";
553 dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n";
554 }
555 if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
556 if (mDebug) {
557 dbgs() << lhsMaskVal << " ^ " << rhsMaskVal;
558 dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n";
559 dbgs() << "Failed constraint 1!\n";
560 }
561 return false;
562 }
563 if (mDebug) {
564 dbgs() << "LHS = " << lhsMaskOffset << "";
565 dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = ";
566 dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset));
567 dbgs() << "\nRHS = " << rhsMaskOffset << "";
568 dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = ";
569 dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset));
570 dbgs() << "\n";
571 }
572 if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
573 offset = ConstantInt::get(aType, lhsMaskOffset, false);
574 width = ConstantInt::get(aType, lhsMaskWidth, false);
575 RHSSrc = RHS;
576 if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
577 if (mDebug) {
578 dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n";
579 dbgs() << "Failed constraint 2!\n";
580 }
581 return false;
582 }
583 if (!LHSShift) {
584 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
585 "MaskShr", LHS);
586 } else if (lhsShiftVal != lhsMaskOffset) {
587 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
588 "MaskShr", LHS);
589 }
590 if (mDebug) {
591 dbgs() << "Optimizing LHS!\n";
592 }
593 } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
594 offset = ConstantInt::get(aType, rhsMaskOffset, false);
595 width = ConstantInt::get(aType, rhsMaskWidth, false);
596 LHSSrc = RHSSrc;
597 RHSSrc = LHS;
598 if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
599 if (mDebug) {
600 dbgs() << "Non-Mask: " << rhsMaskVal << "\n";
601 dbgs() << "Failed constraint 2!\n";
602 }
603 return false;
604 }
605 if (!RHSShift) {
606 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
607 "MaskShr", RHS);
608 } else if (rhsShiftVal != rhsMaskOffset) {
609 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
610 "MaskShr", RHS);
611 }
612 if (mDebug) {
613 dbgs() << "Optimizing RHS!\n";
614 }
615 } else {
616 if (mDebug) {
617 dbgs() << "Failed constraint 3!\n";
618 }
619 return false;
620 }
621 if (mDebug) {
622 dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
623 dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
624 dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
625 dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
626 }
627 if (!offset || !width) {
628 if (mDebug) {
629 dbgs() << "Either width or offset are NULL, failed detection!\n";
630 }
631 return false;
632 }
633 // Lets create the function signature.
634 std::vector<Type *> callTypes;
635 callTypes.push_back(aType);
636 callTypes.push_back(aType);
637 callTypes.push_back(aType);
638 callTypes.push_back(aType);
639 FunctionType *funcType = FunctionType::get(aType, callTypes, false);
640 std::string name = "__amdil_ubit_insert";
641 if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
642 Function *Func =
643 dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
644 getOrInsertFunction(llvm::StringRef(name), funcType));
645 Value *Operands[4] = {
646 width,
647 offset,
648 LHSSrc,
649 RHSSrc
650 };
651 CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
652 if (mDebug) {
653 dbgs() << "Old Inst: ";
654 inst->dump();
655 dbgs() << "New Inst: ";
656 CI->dump();
657 dbgs() << "\n\n";
658 }
659 CI->insertBefore(inst);
660 inst->replaceAllUsesWith(CI);
661 return true;
662 }
663
664 bool
665 AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
666 {
667 if (!inst) {
668 return false;
669 }
670 if (!inst->isBinaryOp()) {
671 return false;
672 }
673 if (inst->getOpcode() != Instruction::And) {
674 return false;
675 }
676 if (optLevel == CodeGenOpt::None) {
677 return false;
678 }
679 // We want to do some simple optimizations on Shift right/And patterns. The
680 // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
681 // value smaller than 32 and C is a mask. If C is a constant value, then the
682 // following transformation can occur. For signed integers, it turns into the
683 // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
684 // integers, it turns into the function call dst =
685 // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
686 // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
687 // Evergreen hardware.
688 if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
689 // This does not work on HD4XXX hardware.
690 return false;
691 }
692 Type *aType = inst->getType();
693 bool isVector = aType->isVectorTy();
694 int numEle = 1;
695 // This only works on 32bit integers
696 if (aType->getScalarType()
697 != Type::getInt32Ty(inst->getContext())) {
698 return false;
699 }
700 if (isVector) {
701 const VectorType *VT = dyn_cast<VectorType>(aType);
702 numEle = VT->getNumElements();
703 // We currently cannot support more than 4 elements in a intrinsic and we
704 // cannot support Vec3 types.
705 if (numEle > 4 || numEle == 3) {
706 return false;
707 }
708 }
709 BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
710 // If the first operand is not a shift instruction, then we can return as it
711 // doesn't match this pattern.
712 if (!ShiftInst || !ShiftInst->isShift()) {
713 return false;
714 }
715 // If we are a shift left, then we need don't match this pattern.
716 if (ShiftInst->getOpcode() == Instruction::Shl) {
717 return false;
718 }
719 bool isSigned = ShiftInst->isArithmeticShift();
720 Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
721 Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
722 // Lets make sure that the shift value and the and mask are constant integers.
723 if (!AndMask || !ShrVal) {
724 return false;
725 }
726 Constant *newMaskConst;
727 Constant *shiftValConst;
728 if (isVector) {
729 // Handle the vector case
730 std::vector<Constant *> maskVals;
731 std::vector<Constant *> shiftVals;
732 ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
733 ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
734 Type *scalarType = AndMaskVec->getType()->getScalarType();
735 assert(AndMaskVec->getNumOperands() ==
736 ShrValVec->getNumOperands() && "cannot have a "
737 "combination where the number of elements to a "
738 "shift and an and are different!");
739 for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
740 ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
741 ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
742 if (!AndCI || !ShiftIC) {
743 return false;
744 }
745 uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
746 if (!isMask_32(maskVal)) {
747 return false;
748 }
749 maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
750 uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
751 // If the mask or shiftval is greater than the bitcount, then break out.
752 if (maskVal >= 32 || shiftVal >= 32) {
753 return false;
754 }
755 // If the mask val is greater than the the number of original bits left
756 // then this optimization is invalid.
757 if (maskVal > (32 - shiftVal)) {
758 return false;
759 }
760 maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
761 shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
762 }
763 newMaskConst = ConstantVector::get(maskVals);
764 shiftValConst = ConstantVector::get(shiftVals);
765 } else {
766 // Handle the scalar case
767 uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
768 // This must be a mask value where all lower bits are set to 1 and then any
769 // bit higher is set to 0.
770 if (!isMask_32(maskVal)) {
771 return false;
772 }
773 maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
774 // Count the number of bits set in the mask, this is the width of the
775 // resulting bit set that is extracted from the source value.
776 uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
777 // If the mask or shift val is greater than the bitcount, then break out.
778 if (maskVal >= 32 || shiftVal >= 32) {
779 return false;
780 }
781 // If the mask val is greater than the the number of original bits left then
782 // this optimization is invalid.
783 if (maskVal > (32 - shiftVal)) {
784 return false;
785 }
786 newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
787 shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
788 }
789 // Lets create the function signature.
790 std::vector<Type *> callTypes;
791 callTypes.push_back(aType);
792 callTypes.push_back(aType);
793 callTypes.push_back(aType);
794 FunctionType *funcType = FunctionType::get(aType, callTypes, false);
795 std::string name = "__amdil_ubit_extract";
796 if (isVector) {
797 name += "_v" + itostr(numEle) + "i32";
798 } else {
799 name += "_i32";
800 }
801 // Lets create the function.
802 Function *Func =
803 dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
804 getOrInsertFunction(llvm::StringRef(name), funcType));
805 Value *Operands[3] = {
806 newMaskConst,
807 shiftValConst,
808 ShiftInst->getOperand(0)
809 };
810 // Lets create the Call with the operands
811 CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
812 CI->insertBefore(inst);
813 inst->replaceAllUsesWith(CI);
814 return true;
815 }
816
817 bool
818 AMDILPeepholeOpt::expandBFI(CallInst *CI)
819 {
820 if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) {
821 return false;
822 }
823 Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
824 if (!LHS->getName().startswith("__amdil_bfi")) {
825 return false;
826 }
827 Type* type = CI->getOperand(0)->getType();
828 Constant *negOneConst = NULL;
829 if (type->isVectorTy()) {
830 std::vector<Constant *> negOneVals;
831 negOneConst = ConstantInt::get(CI->getContext(),
832 APInt(32, StringRef("-1"), 10));
833 for (size_t x = 0,
834 y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
835 negOneVals.push_back(negOneConst);
836 }
837 negOneConst = ConstantVector::get(negOneVals);
838 } else {
839 negOneConst = ConstantInt::get(CI->getContext(),
840 APInt(32, StringRef("-1"), 10));
841 }
842 // __amdil_bfi => (A & B) | (~A & C)
843 BinaryOperator *lhs =
844 BinaryOperator::Create(Instruction::And, CI->getOperand(0),
845 CI->getOperand(1), "bfi_and", CI);
846 BinaryOperator *rhs =
847 BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
848 "bfi_not", CI);
849 rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
850 "bfi_and", CI);
851 lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
852 CI->replaceAllUsesWith(lhs);
853 return true;
854 }
855
856 bool
857 AMDILPeepholeOpt::expandBFM(CallInst *CI)
858 {
859 if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) {
860 return false;
861 }
862 Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
863 if (!LHS->getName().startswith("__amdil_bfm")) {
864 return false;
865 }
866 // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
867 Constant *newMaskConst = NULL;
868 Constant *newShiftConst = NULL;
869 Type* type = CI->getOperand(0)->getType();
870 if (type->isVectorTy()) {
871 std::vector<Constant*> newMaskVals, newShiftVals;
872 newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
873 newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
874 for (size_t x = 0,
875 y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
876 newMaskVals.push_back(newMaskConst);
877 newShiftVals.push_back(newShiftConst);
878 }
879 newMaskConst = ConstantVector::get(newMaskVals);
880 newShiftConst = ConstantVector::get(newShiftVals);
881 } else {
882 newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
883 newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
884 }
885 BinaryOperator *lhs =
886 BinaryOperator::Create(Instruction::And, CI->getOperand(0),
887 newMaskConst, "bfm_mask", CI);
888 lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
889 lhs, "bfm_shl", CI);
890 lhs = BinaryOperator::Create(Instruction::Sub, lhs,
891 newShiftConst, "bfm_sub", CI);
892 BinaryOperator *rhs =
893 BinaryOperator::Create(Instruction::And, CI->getOperand(1),
894 newMaskConst, "bfm_mask", CI);
895 lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
896 CI->replaceAllUsesWith(lhs);
897 return true;
898 }
899
900 bool
901 AMDILPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)
902 {
903 Instruction *inst = (*bbb);
904 if (optimizeCallInst(bbb)) {
905 return true;
906 }
907 if (optimizeBitExtract(inst)) {
908 return false;
909 }
910 if (optimizeBitInsert(inst)) {
911 return false;
912 }
913 if (correctMisalignedMemOp(inst)) {
914 return false;
915 }
916 return false;
917 }
918 bool
919 AMDILPeepholeOpt::correctMisalignedMemOp(Instruction *inst)
920 {
921 LoadInst *linst = dyn_cast<LoadInst>(inst);
922 StoreInst *sinst = dyn_cast<StoreInst>(inst);
923 unsigned alignment;
924 Type* Ty = inst->getType();
925 if (linst) {
926 alignment = linst->getAlignment();
927 Ty = inst->getType();
928 } else if (sinst) {
929 alignment = sinst->getAlignment();
930 Ty = sinst->getValueOperand()->getType();
931 } else {
932 return false;
933 }
934 unsigned size = getTypeSize(Ty);
935 if (size == alignment || size < alignment) {
936 return false;
937 }
938 if (!Ty->isStructTy()) {
939 return false;
940 }
941 if (alignment < 4) {
942 if (linst) {
943 linst->setAlignment(0);
944 return true;
945 } else if (sinst) {
946 sinst->setAlignment(0);
947 return true;
948 }
949 }
950 return false;
951 }
952 bool
953 AMDILPeepholeOpt::isSigned24BitOps(CallInst *CI)
954 {
955 if (!CI) {
956 return false;
957 }
958 Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
959 std::string namePrefix = LHS->getName().substr(0, 14);
960 if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
961 && namePrefix != "__amdil__imul24_high") {
962 return false;
963 }
964 if (mSTM->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
965 return false;
966 }
967 return true;
968 }
969
970 void
971 AMDILPeepholeOpt::expandSigned24BitOps(CallInst *CI)
972 {
973 assert(isSigned24BitOps(CI) && "Must be a "
974 "signed 24 bit operation to call this function!");
975 Value *LHS = CI->getOperand(CI->getNumOperands()-1);
976 // On 7XX and 8XX we do not have signed 24bit, so we need to
977 // expand it to the following:
978 // imul24 turns into 32bit imul
979 // imad24 turns into 32bit imad
980 // imul24_high turns into 32bit imulhigh
981 if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
982 Type *aType = CI->getOperand(0)->getType();
983 bool isVector = aType->isVectorTy();
984 int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
985 std::vector<Type*> callTypes;
986 callTypes.push_back(CI->getOperand(0)->getType());
987 callTypes.push_back(CI->getOperand(1)->getType());
988 callTypes.push_back(CI->getOperand(2)->getType());
989 FunctionType *funcType =
990 FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
991 std::string name = "__amdil_imad";
992 if (isVector) {
993 name += "_v" + itostr(numEle) + "i32";
994 } else {
995 name += "_i32";
996 }
997 Function *Func = dyn_cast<Function>(
998 CI->getParent()->getParent()->getParent()->
999 getOrInsertFunction(llvm::StringRef(name), funcType));
1000 Value *Operands[3] = {
1001 CI->getOperand(0),
1002 CI->getOperand(1),
1003 CI->getOperand(2)
1004 };
1005 CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
1006 nCI->insertBefore(CI);
1007 CI->replaceAllUsesWith(nCI);
1008 } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
1009 BinaryOperator *mulOp =
1010 BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
1011 CI->getOperand(1), "imul24", CI);
1012 CI->replaceAllUsesWith(mulOp);
1013 } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
1014 Type *aType = CI->getOperand(0)->getType();
1015
1016 bool isVector = aType->isVectorTy();
1017 int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
1018 std::vector<Type*> callTypes;
1019 callTypes.push_back(CI->getOperand(0)->getType());
1020 callTypes.push_back(CI->getOperand(1)->getType());
1021 FunctionType *funcType =
1022 FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
1023 std::string name = "__amdil_imul_high";
1024 if (isVector) {
1025 name += "_v" + itostr(numEle) + "i32";
1026 } else {
1027 name += "_i32";
1028 }
1029 Function *Func = dyn_cast<Function>(
1030 CI->getParent()->getParent()->getParent()->
1031 getOrInsertFunction(llvm::StringRef(name), funcType));
1032 Value *Operands[2] = {
1033 CI->getOperand(0),
1034 CI->getOperand(1)
1035 };
1036 CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
1037 nCI->insertBefore(CI);
1038 CI->replaceAllUsesWith(nCI);
1039 }
1040 }
1041
1042 bool
1043 AMDILPeepholeOpt::isRWGLocalOpt(CallInst *CI)
1044 {
1045 return (CI != NULL
1046 && CI->getOperand(CI->getNumOperands() - 1)->getName()
1047 == "__amdil_get_local_size_int");
1048 }
1049
1050 bool
1051 AMDILPeepholeOpt::convertAccurateDivide(CallInst *CI)
1052 {
1053 if (!CI) {
1054 return false;
1055 }
1056 if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX
1057 && (mSTM->getDeviceName() == "cayman")) {
1058 return false;
1059 }
1060 return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
1061 == "__amdil_improved_div";
1062 }
1063
1064 void
1065 AMDILPeepholeOpt::expandAccurateDivide(CallInst *CI)
1066 {
1067 assert(convertAccurateDivide(CI)
1068 && "expanding accurate divide can only happen if it is expandable!");
1069 BinaryOperator *divOp =
1070 BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
1071 CI->getOperand(1), "fdiv32", CI);
1072 CI->replaceAllUsesWith(divOp);
1073 }
1074
1075 bool
1076 AMDILPeepholeOpt::propagateSamplerInst(CallInst *CI)
1077 {
1078 if (optLevel != CodeGenOpt::None) {
1079 return false;
1080 }
1081
1082 if (!CI) {
1083 return false;
1084 }
1085
1086 unsigned funcNameIdx = 0;
1087 funcNameIdx = CI->getNumOperands() - 1;
1088 StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
1089 if (calleeName != "__amdil_image2d_read_norm"
1090 && calleeName != "__amdil_image2d_read_unnorm"
1091 && calleeName != "__amdil_image3d_read_norm"
1092 && calleeName != "__amdil_image3d_read_unnorm") {
1093 return false;
1094 }
1095
1096 unsigned samplerIdx = 2;
1097 samplerIdx = 1;
1098 Value *sampler = CI->getOperand(samplerIdx);
1099 LoadInst *lInst = dyn_cast<LoadInst>(sampler);
1100 if (!lInst) {
1101 return false;
1102 }
1103
1104 if (lInst->getPointerAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
1105 return false;
1106 }
1107
1108 GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
1109 // If we are loading from what is not a global value, then we
1110 // fail and return.
1111 if (!gv) {
1112 return false;
1113 }
1114
1115 // If we don't have an initializer or we have an initializer and
1116 // the initializer is not a 32bit integer, we fail.
1117 if (!gv->hasInitializer()
1118 || !gv->getInitializer()->getType()->isIntegerTy(32)) {
1119 return false;
1120 }
1121
1122 // Now that we have the global variable initializer, lets replace
1123 // all uses of the load instruction with the samplerVal and
1124 // reparse the __amdil_is_constant() function.
1125 Constant *samplerVal = gv->getInitializer();
1126 lInst->replaceAllUsesWith(samplerVal);
1127 return true;
1128 }
1129
1130 bool
1131 AMDILPeepholeOpt::doInitialization(Module &M)
1132 {
1133 return false;
1134 }
1135
1136 bool
1137 AMDILPeepholeOpt::doFinalization(Module &M)
1138 {
1139 return false;
1140 }
1141
1142 void
1143 AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
1144 {
1145 AU.addRequired<MachineFunctionAnalysis>();
1146 FunctionPass::getAnalysisUsage(AU);
1147 AU.setPreservesAll();
1148 }
1149
1150 size_t AMDILPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
1151 size_t size = 0;
1152 if (!T) {
1153 return size;
1154 }
1155 switch (T->getTypeID()) {
1156 case Type::X86_FP80TyID:
1157 case Type::FP128TyID:
1158 case Type::PPC_FP128TyID:
1159 case Type::LabelTyID:
1160 assert(0 && "These types are not supported by this backend");
1161 default:
1162 case Type::FloatTyID:
1163 case Type::DoubleTyID:
1164 size = T->getPrimitiveSizeInBits() >> 3;
1165 break;
1166 case Type::PointerTyID:
1167 size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
1168 break;
1169 case Type::IntegerTyID:
1170 size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
1171 break;
1172 case Type::StructTyID:
1173 size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
1174 break;
1175 case Type::ArrayTyID:
1176 size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
1177 break;
1178 case Type::FunctionTyID:
1179 size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
1180 break;
1181 case Type::VectorTyID:
1182 size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
1183 break;
1184 };
1185 return size;
1186 }
1187
1188 size_t AMDILPeepholeOpt::getTypeSize(StructType * const ST,
1189 bool dereferencePtr) {
1190 size_t size = 0;
1191 if (!ST) {
1192 return size;
1193 }
1194 Type *curType;
1195 StructType::element_iterator eib;
1196 StructType::element_iterator eie;
1197 for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
1198 curType = *eib;
1199 size += getTypeSize(curType, dereferencePtr);
1200 }
1201 return size;
1202 }
1203
1204 size_t AMDILPeepholeOpt::getTypeSize(IntegerType * const IT,
1205 bool dereferencePtr) {
1206 return IT ? (IT->getBitWidth() >> 3) : 0;
1207 }
1208
1209 size_t AMDILPeepholeOpt::getTypeSize(FunctionType * const FT,
1210 bool dereferencePtr) {
1211 assert(0 && "Should not be able to calculate the size of an function type");
1212 return 0;
1213 }
1214
1215 size_t AMDILPeepholeOpt::getTypeSize(ArrayType * const AT,
1216 bool dereferencePtr) {
1217 return (size_t)(AT ? (getTypeSize(AT->getElementType(),
1218 dereferencePtr) * AT->getNumElements())
1219 : 0);
1220 }
1221
1222 size_t AMDILPeepholeOpt::getTypeSize(VectorType * const VT,
1223 bool dereferencePtr) {
1224 return VT ? (VT->getBitWidth() >> 3) : 0;
1225 }
1226
1227 size_t AMDILPeepholeOpt::getTypeSize(PointerType * const PT,
1228 bool dereferencePtr) {
1229 if (!PT) {
1230 return 0;
1231 }
1232 Type *CT = PT->getElementType();
1233 if (CT->getTypeID() == Type::StructTyID &&
1234 PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
1235 return getTypeSize(dyn_cast<StructType>(CT));
1236 } else if (dereferencePtr) {
1237 size_t size = 0;
1238 for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
1239 size += getTypeSize(PT->getContainedType(x), dereferencePtr);
1240 }
1241 return size;
1242 } else {
1243 return 4;
1244 }
1245 }
1246
1247 size_t AMDILPeepholeOpt::getTypeSize(OpaqueType * const OT,
1248 bool dereferencePtr) {
1249 //assert(0 && "Should not be able to calculate the size of an opaque type");
1250 return 4;
1251 }