9cac61cb7180a8a1a6e97fec67674d291b5b3345
[mesa.git] / src / gallium / drivers / radeon / AMDILPointerManager.cpp
1 //===-------- AMDILPointerManager.cpp - Manage Pointers for HW-------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 // Implementation for the AMDILPointerManager classes. See header file for
10 // more documentation of class.
11 // TODO: This fails when function calls are enabled, must always be inlined
12 //===----------------------------------------------------------------------===//
13 #include "AMDILPointerManager.h"
14 #include "AMDILCompilerErrors.h"
15 #include "AMDILDeviceInfo.h"
16 #include "AMDILGlobalManager.h"
17 #include "AMDILKernelManager.h"
18 #include "AMDILMachineFunctionInfo.h"
19 #include "AMDILTargetMachine.h"
20 #include "AMDILUtilityFunctions.h"
21 #include "llvm/ADT/PostOrderIterator.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/ADT/ValueMap.h"
24 #include "llvm/CodeGen/MachineDominators.h"
25 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/Passes.h"
29 #include "llvm/DerivedTypes.h"
30 #include "llvm/Function.h"
31 #include "llvm/GlobalValue.h"
32 #include "llvm/Instructions.h"
33 #include "llvm/Metadata.h"
34 #include "llvm/Module.h"
35 #include "llvm/Support/FormattedStream.h"
36
37 #include <stdio.h>
38 using namespace llvm;
39 char AMDILPointerManager::ID = 0;
40 namespace llvm {
41 FunctionPass*
42 createAMDILPointerManager(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
43 {
44 return tm.getSubtarget<AMDILSubtarget>()
45 .device()->getPointerManager(tm AMDIL_OPT_LEVEL_VAR);
46 }
47 }
48
49 AMDILPointerManager::AMDILPointerManager(
50 TargetMachine &tm
51 AMDIL_OPT_LEVEL_DECL) :
52 MachineFunctionPass(ID),
53 TM(tm)
54 {
55 mDebug = DEBUGME;
56 initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
57 }
58
59 AMDILPointerManager::~AMDILPointerManager()
60 {
61 }
62
63 const char*
64 AMDILPointerManager::getPassName() const
65 {
66 return "AMD IL Default Pointer Manager Pass";
67 }
68
69 void
70 AMDILPointerManager::getAnalysisUsage(AnalysisUsage &AU) const
71 {
72 AU.setPreservesAll();
73 AU.addRequiredID(MachineDominatorsID);
74 MachineFunctionPass::getAnalysisUsage(AU);
75 }
76
77 AMDILEGPointerManager::AMDILEGPointerManager(
78 TargetMachine &tm
79 AMDIL_OPT_LEVEL_DECL) :
80 AMDILPointerManager(tm AMDIL_OPT_LEVEL_VAR),
81 TM(tm)
82 {
83 }
84
85 AMDILEGPointerManager::~AMDILEGPointerManager()
86 {
87 }
88 std::string
89 findSamplerName(MachineInstr* MI,
90 FIPMap &FIToPtrMap,
91 RVPVec &lookupTable,
92 const TargetMachine *TM)
93 {
94 std::string sampler = "unknown";
95 assert(MI->getNumOperands() == 5 && "Only an "
96 "image read instruction with 5 arguments can "
97 "have a sampler.");
98 assert(MI->getOperand(3).isReg() &&
99 "Argument 3 must be a register to call this function");
100 unsigned reg = MI->getOperand(3).getReg();
101 // If this register points to an argument, then
102 // we can return the argument name.
103 if (lookupTable[reg].second && dyn_cast<Argument>(lookupTable[reg].second)) {
104 return lookupTable[reg].second->getName();
105 }
106 // Otherwise the sampler is coming from memory somewhere.
107 // If the sampler memory location can be tracked, then
108 // we ascertain the sampler name that way.
109 // The most common case is when optimizations are disabled
110 // or mem2reg is not enabled, then the sampler when it is
111 // an argument is passed through the frame index.
112
113 // In the optimized case, the instruction that defined
114 // register from operand #3 is a private load.
115 MachineRegisterInfo &regInfo = MI->getParent()->getParent()->getRegInfo();
116 assert(!regInfo.def_empty(reg)
117 && "We don't have any defs of this register, but we aren't an argument!");
118 MachineOperand *defOp = regInfo.getRegUseDefListHead(reg);
119 MachineInstr *defMI = defOp->getParent();
120 if (isPrivateInst(TM->getInstrInfo(), defMI) && isLoadInst(TM->getInstrInfo(), defMI)) {
121 if (defMI->getOperand(1).isFI()) {
122 RegValPair &fiRVP = FIToPtrMap[reg];
123 if (fiRVP.second && dyn_cast<Argument>(fiRVP.second)) {
124 return fiRVP.second->getName();
125 } else {
126 // FIXME: Fix the case where the value stored is not a kernel argument.
127 assert(!"Found a private load of a sampler where the value isn't an argument!");
128 }
129 } else {
130 // FIXME: Fix the case where someone dynamically loads a sampler value
131 // from private memory. This is problematic because we need to know the
132 // sampler value at compile time and if it is dynamically loaded, we won't
133 // know what sampler value to use.
134 assert(!"Found a private load of a sampler that isn't from a frame index!");
135 }
136 } else {
137 // FIXME: Handle the case where the def is neither a private instruction
138 // and not a load instruction. This shouldn't occur, but putting an assertion
139 // just to make sure that it doesn't.
140 assert(!"Found a case which we don't handle.");
141 }
142 return sampler;
143 }
144
145 const char*
146 AMDILEGPointerManager::getPassName() const
147 {
148 return "AMD IL EG Pointer Manager Pass";
149 }
150
151 // Helper function to determine if the current pointer is from the
152 // local, region or private address spaces.
153 static bool
154 isLRPInst(MachineInstr *MI,
155 const AMDILTargetMachine *ATM)
156 {
157 const AMDILSubtarget *STM
158 = ATM->getSubtargetImpl();
159 if (!MI) {
160 return false;
161 }
162 if ((isRegionInst(ATM->getInstrInfo(), MI)
163 && STM->device()->usesHardware(AMDILDeviceInfo::RegionMem))
164 || (isLocalInst(ATM->getInstrInfo(), MI)
165 && STM->device()->usesHardware(AMDILDeviceInfo::LocalMem))
166 || (isPrivateInst(ATM->getInstrInfo(), MI)
167 && STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem))) {
168 return true;
169 }
170 return false;
171 }
172
173 /// Helper function to determine if the I/O instruction uses
174 /// global device memory or not.
175 static bool
176 usesGlobal(
177 const AMDILTargetMachine *ATM,
178 MachineInstr *MI) {
179 const AMDILSubtarget *STM
180 = ATM->getSubtargetImpl();
181 switch(MI->getOpcode()) {
182 ExpandCaseToAllTypes(AMDIL::GLOBALSTORE);
183 ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE);
184 ExpandCaseToAllTypes(AMDIL::GLOBALLOAD);
185 ExpandCaseToAllTypes(AMDIL::GLOBALSEXTLOAD);
186 ExpandCaseToAllTypes(AMDIL::GLOBALZEXTLOAD);
187 ExpandCaseToAllTypes(AMDIL::GLOBALAEXTLOAD);
188 return true;
189 ExpandCaseToAllTypes(AMDIL::REGIONLOAD);
190 ExpandCaseToAllTypes(AMDIL::REGIONSEXTLOAD);
191 ExpandCaseToAllTypes(AMDIL::REGIONZEXTLOAD);
192 ExpandCaseToAllTypes(AMDIL::REGIONAEXTLOAD);
193 ExpandCaseToAllTypes(AMDIL::REGIONSTORE);
194 ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE);
195 return !STM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
196 ExpandCaseToAllTypes(AMDIL::LOCALLOAD);
197 ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD);
198 ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD);
199 ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD);
200 ExpandCaseToAllTypes(AMDIL::LOCALSTORE);
201 ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE);
202 return !STM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
203 ExpandCaseToAllTypes(AMDIL::CPOOLLOAD);
204 ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD);
205 ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD);
206 ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD);
207 ExpandCaseToAllTypes(AMDIL::CONSTANTLOAD);
208 ExpandCaseToAllTypes(AMDIL::CONSTANTSEXTLOAD);
209 ExpandCaseToAllTypes(AMDIL::CONSTANTAEXTLOAD);
210 ExpandCaseToAllTypes(AMDIL::CONSTANTZEXTLOAD);
211 return !STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
212 ExpandCaseToAllTypes(AMDIL::PRIVATELOAD);
213 ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD);
214 ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD);
215 ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD);
216 ExpandCaseToAllTypes(AMDIL::PRIVATESTORE);
217 ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE);
218 return !STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
219 default:
220 return false;
221 }
222 return false;
223 }
224
225 // Helper function that allocates the default resource ID for the
226 // respective I/O types.
227 static void
228 allocateDefaultID(
229 const AMDILTargetMachine *ATM,
230 AMDILAS::InstrResEnc &curRes,
231 MachineInstr *MI,
232 bool mDebug)
233 {
234 AMDILMachineFunctionInfo *mMFI =
235 MI->getParent()->getParent()->getInfo<AMDILMachineFunctionInfo>();
236 const AMDILSubtarget *STM
237 = ATM->getSubtargetImpl();
238 if (mDebug) {
239 dbgs() << "Assigning instruction to default ID. Inst:";
240 MI->dump();
241 }
242 // If we use global memory, lets set the Operand to
243 // the ARENA_UAV_ID.
244 if (usesGlobal(ATM, MI)) {
245 curRes.bits.ResourceID =
246 STM->device()->getResourceID(AMDILDevice::GLOBAL_ID);
247 if (isAtomicInst(ATM->getInstrInfo(), MI)) {
248 MI->getOperand(MI->getNumOperands()-1)
249 .setImm(curRes.bits.ResourceID);
250 }
251 AMDILKernelManager *KM = STM->getKernelManager();
252 if (curRes.bits.ResourceID == 8
253 && !STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
254 KM->setUAVID(NULL, curRes.bits.ResourceID);
255 mMFI->uav_insert(curRes.bits.ResourceID);
256 }
257 } else if (isPrivateInst(ATM->getInstrInfo(), MI)) {
258 curRes.bits.ResourceID =
259 STM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
260 } else if (isLocalInst(ATM->getInstrInfo(), MI) || isLocalAtomic(ATM->getInstrInfo(), MI)) {
261 curRes.bits.ResourceID =
262 STM->device()->getResourceID(AMDILDevice::LDS_ID);
263 AMDILMachineFunctionInfo *mMFI =
264 MI->getParent()->getParent()->getInfo<AMDILMachineFunctionInfo>();
265 mMFI->setUsesLocal();
266 if (isAtomicInst(ATM->getInstrInfo(), MI)) {
267 assert(curRes.bits.ResourceID && "Atomic resource ID "
268 "cannot be zero!");
269 MI->getOperand(MI->getNumOperands()-1)
270 .setImm(curRes.bits.ResourceID);
271 }
272 } else if (isRegionInst(ATM->getInstrInfo(), MI) || isRegionAtomic(ATM->getInstrInfo(), MI)) {
273 curRes.bits.ResourceID =
274 STM->device()->getResourceID(AMDILDevice::GDS_ID);
275 AMDILMachineFunctionInfo *mMFI =
276 MI->getParent()->getParent()->getInfo<AMDILMachineFunctionInfo>();
277 mMFI->setUsesRegion();
278 if (isAtomicInst(ATM->getInstrInfo(), MI)) {
279 assert(curRes.bits.ResourceID && "Atomic resource ID "
280 "cannot be zero!");
281 (MI)->getOperand((MI)->getNumOperands()-1)
282 .setImm(curRes.bits.ResourceID);
283 }
284 } else if (isConstantInst(ATM->getInstrInfo(), MI)) {
285 // If we are unknown constant instruction and the base pointer is known.
286 // Set the resource ID accordingly, otherwise use the default constant ID.
287 // FIXME: this should not require the base pointer to know what constant
288 // it is from.
289 AMDILGlobalManager *GM = STM->getGlobalManager();
290 MachineFunction *MF = MI->getParent()->getParent();
291 if (GM->isKernel(MF->getFunction()->getName())) {
292 const kernel &krnl = GM->getKernel(MF->getFunction()->getName());
293 const Value *V = getBasePointerValue(MI);
294 if (V && !dyn_cast<AllocaInst>(V)) {
295 curRes.bits.ResourceID = GM->getConstPtrCB(krnl, V->getName());
296 curRes.bits.HardwareInst = 1;
297 } else if (V && dyn_cast<AllocaInst>(V)) {
298 // FIXME: Need a better way to fix this. Requires a rewrite of how
299 // we lower global addresses to various address spaces.
300 // So for now, lets assume that there is only a single
301 // constant buffer that can be accessed from a load instruction
302 // that is derived from an alloca instruction.
303 curRes.bits.ResourceID = 2;
304 curRes.bits.HardwareInst = 1;
305 } else {
306 if (isStoreInst(ATM->getInstrInfo(), MI)) {
307 if (mDebug) {
308 dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
309 MI->dump();
310 }
311 curRes.bits.ByteStore = 1;
312 }
313 curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::CONSTANT_ID);
314 }
315 } else {
316 if (isStoreInst(ATM->getInstrInfo(), MI)) {
317 if (mDebug) {
318 dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
319 MI->dump();
320 }
321 curRes.bits.ByteStore = 1;
322 }
323 curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::GLOBAL_ID);
324 AMDILKernelManager *KM = STM->getKernelManager();
325 KM->setUAVID(NULL, curRes.bits.ResourceID);
326 mMFI->uav_insert(curRes.bits.ResourceID);
327 }
328 } else if (isAppendInst(ATM->getInstrInfo(), MI)) {
329 unsigned opcode = MI->getOpcode();
330 if (opcode == AMDIL::APPEND_ALLOC
331 || opcode == AMDIL::APPEND_ALLOC_NORET) {
332 curRes.bits.ResourceID = 1;
333 } else {
334 curRes.bits.ResourceID = 2;
335 }
336 }
337 setAsmPrinterFlags(MI, curRes);
338 }
339
340 // Function that parses the arguments and updates the lookupTable with the
341 // pointer -> register mapping. This function also checks for cacheable
342 // pointers and updates the CacheableSet with the arguments that
343 // can be cached based on the readonlypointer annotation. The final
344 // purpose of this function is to update the imageSet and counterSet
345 // with all pointers that are either images or atomic counters.
346 uint32_t
347 parseArguments(MachineFunction &MF,
348 RVPVec &lookupTable,
349 const AMDILTargetMachine *ATM,
350 CacheableSet &cacheablePtrs,
351 ImageSet &imageSet,
352 AppendSet &counterSet,
353 bool mDebug)
354 {
355 const AMDILSubtarget *STM
356 = ATM->getSubtargetImpl();
357 uint32_t writeOnlyImages = 0;
358 uint32_t readOnlyImages = 0;
359 std::string cachedKernelName = "llvm.readonlypointer.annotations.";
360 cachedKernelName.append(MF.getFunction()->getName());
361 GlobalVariable *GV = MF.getFunction()->getParent()
362 ->getGlobalVariable(cachedKernelName);
363 unsigned cbNum = 0;
364 unsigned regNum = AMDIL::R1;
365 AMDILMachineFunctionInfo *mMFI = MF.getInfo<AMDILMachineFunctionInfo>();
366 for (Function::const_arg_iterator I = MF.getFunction()->arg_begin(),
367 E = MF.getFunction()->arg_end(); I != E; ++I) {
368 const Argument *curArg = I;
369 if (mDebug) {
370 dbgs() << "Argument: ";
371 curArg->dump();
372 }
373 Type *curType = curArg->getType();
374 // We are either a scalar or vector type that
375 // is passed by value that is not a opaque/struct
376 // type. We just need to increment regNum
377 // the correct number of times to match the number
378 // of registers that it takes up.
379 if (curType->isFPOrFPVectorTy() ||
380 curType->isIntOrIntVectorTy()) {
381 // We are scalar, so increment once and
382 // move on
383 if (!curType->isVectorTy()) {
384 lookupTable[regNum] = std::make_pair<unsigned, const Value*>(~0U, curArg);
385 ++regNum;
386 ++cbNum;
387 continue;
388 }
389 VectorType *VT = dyn_cast<VectorType>(curType);
390 // We are a vector type. If we are 64bit type, then
391 // we increment length / 2 times, otherwise we
392 // increment length / 4 times. The only corner case
393 // is with vec3 where the vector gets scalarized and
394 // therefor we need a loop count of 3.
395 size_t loopCount = VT->getNumElements();
396 if (loopCount != 3) {
397 if (VT->getScalarSizeInBits() == 64) {
398 loopCount = loopCount >> 1;
399 } else {
400 loopCount = (loopCount + 2) >> 2;
401 }
402 cbNum += loopCount;
403 } else {
404 cbNum++;
405 }
406 while (loopCount--) {
407 lookupTable[regNum] = std::make_pair<unsigned, const Value*>(~0U, curArg);
408 ++regNum;
409 }
410 } else if (curType->isPointerTy()) {
411 Type *CT = dyn_cast<PointerType>(curType)->getElementType();
412 const StructType *ST = dyn_cast<StructType>(CT);
413 if (ST && ST->isOpaque()) {
414 StringRef name = ST->getName();
415 bool i1d_type = name == "struct._image1d_t";
416 bool i1da_type = name == "struct._image1d_array_t";
417 bool i1db_type = name == "struct._image1d_buffer_t";
418 bool i2d_type = name == "struct._image2d_t";
419 bool i2da_type = name == "struct._image2d_array_t";
420 bool i3d_type = name == "struct._image3d_t";
421 bool c32_type = name == "struct._counter32_t";
422 bool c64_type = name == "struct._counter64_t";
423 if (i2d_type || i3d_type || i2da_type ||
424 i1d_type || i1db_type || i1da_type) {
425 imageSet.insert(I);
426 uint32_t imageNum = readOnlyImages + writeOnlyImages;
427 if (STM->getGlobalManager()
428 ->isReadOnlyImage(MF.getFunction()->getName(), imageNum)) {
429 if (mDebug) {
430 dbgs() << "Pointer: '" << curArg->getName()
431 << "' is a read only image # " << readOnlyImages << "!\n";
432 }
433 // We store the cbNum along with the image number so that we can
434 // correctly encode the 'info' intrinsics.
435 lookupTable[regNum] = std::make_pair<unsigned, const Value*>
436 ((cbNum << 16 | readOnlyImages++), curArg);
437 } else if (STM->getGlobalManager()
438 ->isWriteOnlyImage(MF.getFunction()->getName(), imageNum)) {
439 if (mDebug) {
440 dbgs() << "Pointer: '" << curArg->getName()
441 << "' is a write only image # " << writeOnlyImages << "!\n";
442 }
443 // We store the cbNum along with the image number so that we can
444 // correctly encode the 'info' intrinsics.
445 lookupTable[regNum] = std::make_pair<unsigned, const Value*>
446 ((cbNum << 16 | writeOnlyImages++), curArg);
447 } else {
448 assert(!"Read/Write images are not supported!");
449 }
450 ++regNum;
451 cbNum += 2;
452 continue;
453 } else if (c32_type || c64_type) {
454 if (mDebug) {
455 dbgs() << "Pointer: '" << curArg->getName()
456 << "' is a " << (c32_type ? "32" : "64")
457 << " bit atomic counter type!\n";
458 }
459 counterSet.push_back(I);
460 }
461 }
462
463 if (STM->device()->isSupported(AMDILDeviceInfo::CachedMem)
464 && GV && GV->hasInitializer()) {
465 const ConstantArray *nameArray
466 = dyn_cast_or_null<ConstantArray>(GV->getInitializer());
467 if (nameArray) {
468 for (unsigned x = 0, y = nameArray->getNumOperands(); x < y; ++x) {
469 const GlobalVariable *gV= dyn_cast_or_null<GlobalVariable>(
470 nameArray->getOperand(x)->getOperand(0));
471 const ConstantDataArray *argName =
472 dyn_cast_or_null<ConstantDataArray>(gV->getInitializer());
473 if (!argName) {
474 continue;
475 }
476 std::string argStr = argName->getAsString();
477 std::string curStr = curArg->getName();
478 if (!strcmp(argStr.data(), curStr.data())) {
479 if (mDebug) {
480 dbgs() << "Pointer: '" << curArg->getName()
481 << "' is cacheable!\n";
482 }
483 cacheablePtrs.insert(curArg);
484 }
485 }
486 }
487 }
488 uint32_t as = dyn_cast<PointerType>(curType)->getAddressSpace();
489 // Handle the case where the kernel argument is a pointer
490 if (mDebug) {
491 dbgs() << "Pointer: " << curArg->getName() << " is assigned ";
492 if (as == AMDILAS::GLOBAL_ADDRESS) {
493 dbgs() << "uav " << STM->device()
494 ->getResourceID(AMDILDevice::GLOBAL_ID);
495 } else if (as == AMDILAS::PRIVATE_ADDRESS) {
496 dbgs() << "scratch " << STM->device()
497 ->getResourceID(AMDILDevice::SCRATCH_ID);
498 } else if (as == AMDILAS::LOCAL_ADDRESS) {
499 dbgs() << "lds " << STM->device()
500 ->getResourceID(AMDILDevice::LDS_ID);
501 } else if (as == AMDILAS::CONSTANT_ADDRESS) {
502 dbgs() << "cb " << STM->device()
503 ->getResourceID(AMDILDevice::CONSTANT_ID);
504 } else if (as == AMDILAS::REGION_ADDRESS) {
505 dbgs() << "gds " << STM->device()
506 ->getResourceID(AMDILDevice::GDS_ID);
507 } else {
508 assert(!"Found an address space that we don't support!");
509 }
510 dbgs() << " @ register " << regNum << ". Inst: ";
511 curArg->dump();
512 }
513 switch (as) {
514 default:
515 lookupTable[regNum] = std::make_pair<unsigned, const Value*>
516 (STM->device()->getResourceID(AMDILDevice::GLOBAL_ID), curArg);
517 break;
518 case AMDILAS::LOCAL_ADDRESS:
519 lookupTable[regNum] = std::make_pair<unsigned, const Value*>
520 (STM->device()->getResourceID(AMDILDevice::LDS_ID), curArg);
521 mMFI->setHasLocalArg();
522 break;
523 case AMDILAS::REGION_ADDRESS:
524 lookupTable[regNum] = std::make_pair<unsigned, const Value*>
525 (STM->device()->getResourceID(AMDILDevice::GDS_ID), curArg);
526 mMFI->setHasRegionArg();
527 break;
528 case AMDILAS::CONSTANT_ADDRESS:
529 lookupTable[regNum] = std::make_pair<unsigned, const Value*>
530 (STM->device()->getResourceID(AMDILDevice::CONSTANT_ID), curArg);
531 break;
532 case AMDILAS::PRIVATE_ADDRESS:
533 lookupTable[regNum] = std::make_pair<unsigned, const Value*>
534 (STM->device()->getResourceID(AMDILDevice::SCRATCH_ID), curArg);
535 break;
536 }
537 // In this case we need to increment it once.
538 ++regNum;
539 ++cbNum;
540 } else {
541 // Is anything missing that is legal in CL?
542 assert(0 && "Current type is not supported!");
543 lookupTable[regNum] = std::make_pair<unsigned, const Value*>
544 (STM->device()->getResourceID(AMDILDevice::GLOBAL_ID), curArg);
545 ++regNum;
546 ++cbNum;
547 }
548 }
549 return writeOnlyImages;
550 }
551 // The call stack is interesting in that even in SSA form, it assigns
552 // registers to the same value's over and over again. So we need to
553 // ignore the values that are assigned and just deal with the input
554 // and return registers.
555 static void
556 parseCall(
557 const AMDILTargetMachine *ATM,
558 InstPMap &InstToPtrMap,
559 PtrIMap &PtrToInstMap,
560 RVPVec &lookupTable,
561 MachineBasicBlock::iterator &mBegin,
562 MachineBasicBlock::iterator mEnd,
563 bool mDebug)
564 {
565 SmallVector<unsigned, 8> inputRegs;
566 AMDILAS::InstrResEnc curRes;
567 if (mDebug) {
568 dbgs() << "Parsing Call Stack Start.\n";
569 }
570 MachineBasicBlock::iterator callInst = mBegin;
571 MachineInstr *CallMI = callInst;
572 getAsmPrinterFlags(CallMI, curRes);
573 MachineInstr *MI = --mBegin;
574 unsigned reg = AMDIL::R1;
575 // First we need to check the input registers.
576 do {
577 // We stop if we hit the beginning of the call stack
578 // adjustment.
579 if (MI->getOpcode() == AMDIL::ADJCALLSTACKDOWN
580 || MI->getOpcode() == AMDIL::ADJCALLSTACKUP
581 || MI->getNumOperands() != 2
582 || !MI->getOperand(0).isReg()) {
583 break;
584 }
585 reg = MI->getOperand(0).getReg();
586 if (MI->getOperand(1).isReg()) {
587 unsigned reg1 = MI->getOperand(1).getReg();
588 inputRegs.push_back(reg1);
589 if (lookupTable[reg1].second) {
590 curRes.bits.PointerPath = 1;
591 }
592 }
593 lookupTable.erase(reg);
594 if ((signed)reg < 0
595 || mBegin == CallMI->getParent()->begin()) {
596 break;
597 }
598 MI = --mBegin;
599 } while (1);
600 mBegin = callInst;
601 MI = ++mBegin;
602 // If the next registers operand 1 is not a register or that register
603 // is not R1, then we don't have any return values.
604 if (MI->getNumOperands() == 2
605 && MI->getOperand(1).isReg()
606 && MI->getOperand(1).getReg() == AMDIL::R1) {
607 // Next we check the output register.
608 reg = MI->getOperand(0).getReg();
609 // Now we link the inputs to the output.
610 for (unsigned x = 0; x < inputRegs.size(); ++x) {
611 if (lookupTable[inputRegs[x]].second) {
612 curRes.bits.PointerPath = 1;
613 lookupTable[reg] = lookupTable[inputRegs[x]];
614 InstToPtrMap[CallMI].insert(
615 lookupTable[reg].second);
616 break;
617 }
618 }
619 lookupTable.erase(MI->getOperand(1).getReg());
620 }
621 setAsmPrinterFlags(CallMI, curRes);
622 if (mDebug) {
623 dbgs() << "Parsing Call Stack End.\n";
624 }
625 return;
626 }
627
628 // Detect if the current instruction conflicts with another instruction
629 // and add the instruction to the correct location accordingly.
630 static void
631 detectConflictInst(
632 MachineInstr *MI,
633 AMDILAS::InstrResEnc &curRes,
634 RVPVec &lookupTable,
635 InstPMap &InstToPtrMap,
636 bool isLoadStore,
637 unsigned reg,
638 unsigned dstReg,
639 bool mDebug)
640 {
641 // If the instruction does not have a point path flag
642 // associated with it, then we know that no other pointer
643 // hits this instruciton.
644 if (!curRes.bits.PointerPath) {
645 if (dyn_cast<PointerType>(lookupTable[reg].second->getType())) {
646 curRes.bits.PointerPath = 1;
647 }
648 // We don't want to transfer to the register number
649 // between load/store because the load dest can be completely
650 // different pointer path and the store doesn't have a real
651 // destination register.
652 if (!isLoadStore) {
653 if (mDebug) {
654 if (dyn_cast<PointerType>(lookupTable[reg].second->getType())) {
655 dbgs() << "Pointer: " << lookupTable[reg].second->getName();
656 assert(dyn_cast<PointerType>(lookupTable[reg].second->getType())
657 && "Must be a pointer type for an instruction!");
658 switch (dyn_cast<PointerType>(
659 lookupTable[reg].second->getType())->getAddressSpace())
660 {
661 case AMDILAS::GLOBAL_ADDRESS: dbgs() << " UAV: "; break;
662 case AMDILAS::LOCAL_ADDRESS: dbgs() << " LDS: "; break;
663 case AMDILAS::REGION_ADDRESS: dbgs() << " GDS: "; break;
664 case AMDILAS::PRIVATE_ADDRESS: dbgs() << " SCRATCH: "; break;
665 case AMDILAS::CONSTANT_ADDRESS: dbgs() << " CB: "; break;
666
667 }
668 dbgs() << lookupTable[reg].first << " Reg: " << reg
669 << " assigned to reg " << dstReg << ". Inst: ";
670 MI->dump();
671 }
672 }
673 // We don't want to do any copies if the register is not virtual
674 // as it is the result of a CALL. ParseCallInst handles the
675 // case where the input and output need to be linked up
676 // if it occurs. The easiest way to check for virtual
677 // is to check the top bit.
678 lookupTable[dstReg] = lookupTable[reg];
679 }
680 } else {
681 if (dyn_cast<PointerType>(lookupTable[reg].second->getType())) {
682 // Otherwise we have a conflict between two pointers somehow.
683 curRes.bits.ConflictPtr = 1;
684 if (mDebug) {
685 dbgs() << "Pointer: " << lookupTable[reg].second->getName();
686 assert(dyn_cast<PointerType>(lookupTable[reg].second->getType())
687 && "Must be a pointer type for a conflict instruction!");
688 switch (dyn_cast<PointerType>(
689 lookupTable[reg].second->getType())->getAddressSpace())
690 {
691 case AMDILAS::GLOBAL_ADDRESS: dbgs() << " UAV: "; break;
692 case AMDILAS::LOCAL_ADDRESS: dbgs() << " LDS: "; break;
693 case AMDILAS::REGION_ADDRESS: dbgs() << " GDS: "; break;
694 case AMDILAS::PRIVATE_ADDRESS: dbgs() << " SCRATCH: "; break;
695 case AMDILAS::CONSTANT_ADDRESS: dbgs() << " CB: "; break;
696
697 }
698 dbgs() << lookupTable[reg].first << " Reg: " << reg;
699 if (InstToPtrMap[MI].size() > 1) {
700 dbgs() << " conflicts with:\n ";
701 for (PtrSet::iterator psib = InstToPtrMap[MI].begin(),
702 psie = InstToPtrMap[MI].end(); psib != psie; ++psib) {
703 dbgs() << "\t\tPointer: " << (*psib)->getName() << " ";
704 assert(dyn_cast<PointerType>((*psib)->getType())
705 && "Must be a pointer type for a conflict instruction!");
706 (*psib)->dump();
707 }
708 } else {
709 dbgs() << ".";
710 }
711 dbgs() << " Inst: ";
712 MI->dump();
713 }
714 }
715 // Add the conflicting values to the pointer set for the instruction
716 InstToPtrMap[MI].insert(lookupTable[reg].second);
717 // We don't want to add the destination register if
718 // we are a load or store.
719 if (!isLoadStore) {
720 InstToPtrMap[MI].insert(lookupTable[dstReg].second);
721 }
722 }
723 setAsmPrinterFlags(MI, curRes);
724 }
725
726 // In this case we want to handle a load instruction.
727 static void
728 parseLoadInst(
729 const AMDILTargetMachine *ATM,
730 InstPMap &InstToPtrMap,
731 PtrIMap &PtrToInstMap,
732 FIPMap &FIToPtrMap,
733 RVPVec &lookupTable,
734 CPoolSet &cpool,
735 BlockCacheableInfo &bci,
736 MachineInstr *MI,
737 bool mDebug)
738 {
739 assert(isLoadInst(ATM->getInstrInfo(), MI) && "Only a load instruction can be parsed by "
740 "the parseLoadInst function.");
741 AMDILAS::InstrResEnc curRes;
742 getAsmPrinterFlags(MI, curRes);
743 unsigned dstReg = MI->getOperand(0).getReg();
744 unsigned idx = 0;
745 const Value *basePtr = NULL;
746 if (MI->getOperand(1).isReg()) {
747 idx = MI->getOperand(1).getReg();
748 basePtr = lookupTable[idx].second;
749 // If we don't know what value the register
750 // is assigned to, then we need to special case
751 // this instruction.
752 } else if (MI->getOperand(1).isFI()) {
753 idx = MI->getOperand(1).getIndex();
754 lookupTable[dstReg] = FIToPtrMap[idx];
755 } else if (MI->getOperand(1).isCPI()) {
756 cpool.insert(MI);
757 }
758 // If we are a hardware local, then we don't need to track as there
759 // is only one resource ID that we need to know about, so we
760 // map it using allocateDefaultID, which maps it to the default.
761 // This is also the case for REGION_ADDRESS and PRIVATE_ADDRESS.
762 if (isLRPInst(MI, ATM) || !basePtr) {
763 allocateDefaultID(ATM, curRes, MI, mDebug);
764 return;
765 }
766 // We have a load instruction so we map this instruction
767 // to the pointer and insert it into the set of known
768 // load instructions.
769 InstToPtrMap[MI].insert(basePtr);
770 PtrToInstMap[basePtr].push_back(MI);
771
772 if (isGlobalInst(ATM->getInstrInfo(), MI)) {
773 // Add to the cacheable set for the block. If there was a store earlier
774 // in the block, this call won't actually add it to the cacheable set.
775 bci.addPossiblyCacheableInst(ATM, MI);
776 }
777
778 if (mDebug) {
779 dbgs() << "Assigning instruction to pointer ";
780 dbgs() << basePtr->getName() << ". Inst: ";
781 MI->dump();
782 }
783 detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true,
784 idx, dstReg, mDebug);
785 }
786
787 // In this case we want to handle a store instruction.
788 static void
789 parseStoreInst(
790 const AMDILTargetMachine *ATM,
791 InstPMap &InstToPtrMap,
792 PtrIMap &PtrToInstMap,
793 FIPMap &FIToPtrMap,
794 RVPVec &lookupTable,
795 CPoolSet &cpool,
796 BlockCacheableInfo &bci,
797 MachineInstr *MI,
798 ByteSet &bytePtrs,
799 ConflictSet &conflictPtrs,
800 bool mDebug)
801 {
802 assert(isStoreInst(ATM->getInstrInfo(), MI) && "Only a store instruction can be parsed by "
803 "the parseStoreInst function.");
804 AMDILAS::InstrResEnc curRes;
805 getAsmPrinterFlags(MI, curRes);
806 unsigned dstReg = MI->getOperand(0).getReg();
807
808 // If the data part of the store instruction is known to
809 // be a pointer, then we need to mark this pointer as being
810 // a byte pointer. This is the conservative case that needs
811 // to be handled correctly.
812 if (lookupTable[dstReg].second && lookupTable[dstReg].first != ~0U) {
813 curRes.bits.ConflictPtr = 1;
814 if (mDebug) {
815 dbgs() << "Found a case where the pointer is being stored!\n";
816 MI->dump();
817 dbgs() << "Pointer is ";
818 lookupTable[dstReg].second->print(dbgs());
819 dbgs() << "\n";
820 }
821 //PtrToInstMap[lookupTable[dstReg].second].push_back(MI);
822 if (lookupTable[dstReg].second->getType()->isPointerTy()) {
823 conflictPtrs.insert(lookupTable[dstReg].second);
824 }
825 }
826
827 // Before we go through the special cases, for the cacheable information
828 // all we care is if the store if global or not.
829 if (!isLRPInst(MI, ATM)) {
830 bci.setReachesExit();
831 }
832
833 // If the address is not a register address,
834 // then we need to lower it as an unknown id.
835 if (!MI->getOperand(1).isReg()) {
836 if (MI->getOperand(1).isCPI()) {
837 if (mDebug) {
838 dbgs() << "Found an instruction with a CPI index #"
839 << MI->getOperand(1).getIndex() << "!\n";
840 }
841 cpool.insert(MI);
842 } else if (MI->getOperand(1).isFI()) {
843 if (mDebug) {
844 dbgs() << "Found an instruction with a frame index #"
845 << MI->getOperand(1).getIndex() << "!\n";
846 }
847 // If we are a frame index and we are storing a pointer there, lets
848 // go ahead and assign the pointer to the location within the frame
849 // index map so that we can get the value out later.
850 FIToPtrMap[MI->getOperand(1).getIndex()] = lookupTable[dstReg];
851 }
852
853 allocateDefaultID(ATM, curRes, MI, mDebug);
854 return;
855 }
856 unsigned reg = MI->getOperand(1).getReg();
857 // If we don't know what value the register
858 // is assigned to, then we need to special case
859 // this instruction.
860 if (!lookupTable[reg].second) {
861 allocateDefaultID(ATM, curRes, MI, mDebug);
862 return;
863 }
864 // const Value *basePtr = lookupTable[reg].second;
865 // If we are a hardware local, then we don't need to track as there
866 // is only one resource ID that we need to know about, so we
867 // map it using allocateDefaultID, which maps it to the default.
868 // This is also the case for REGION_ADDRESS and PRIVATE_ADDRESS.
869 if (isLRPInst(MI, ATM)) {
870 allocateDefaultID(ATM, curRes, MI, mDebug);
871 return;
872 }
873
874 // We have a store instruction so we map this instruction
875 // to the pointer and insert it into the set of known
876 // store instructions.
877 InstToPtrMap[MI].insert(lookupTable[reg].second);
878 PtrToInstMap[lookupTable[reg].second].push_back(MI);
879 uint16_t RegClass = MI->getDesc().OpInfo[0].RegClass;
880 switch (RegClass) {
881 default:
882 break;
883 case AMDIL::GPRI8RegClassID:
884 case AMDIL::GPRV2I8RegClassID:
885 case AMDIL::GPRI16RegClassID:
886 if (usesGlobal(ATM, MI)) {
887 if (mDebug) {
888 dbgs() << "Annotating instruction as Byte Store. Inst: ";
889 MI->dump();
890 }
891 curRes.bits.ByteStore = 1;
892 setAsmPrinterFlags(MI, curRes);
893 const PointerType *PT = dyn_cast<PointerType>(
894 lookupTable[reg].second->getType());
895 if (PT) {
896 bytePtrs.insert(lookupTable[reg].second);
897 }
898 }
899 break;
900 };
901 // If we are a truncating store, then we need to determine the
902 // size of the pointer that we are truncating to, and if we
903 // are less than 32 bits, we need to mark the pointer as a
904 // byte store pointer.
905 switch (MI->getOpcode()) {
906 case AMDIL::GLOBALTRUNCSTORE_i16i8:
907 case AMDIL::GLOBALTRUNCSTORE_v2i16i8:
908 case AMDIL::GLOBALTRUNCSTORE_i32i8:
909 case AMDIL::GLOBALTRUNCSTORE_v2i32i8:
910 case AMDIL::GLOBALTRUNCSTORE_i64i8:
911 case AMDIL::GLOBALTRUNCSTORE_v2i64i8:
912 case AMDIL::GLOBALTRUNCSTORE_i32i16:
913 case AMDIL::GLOBALTRUNCSTORE_i64i16:
914 case AMDIL::GLOBALSTORE_i8:
915 case AMDIL::GLOBALSTORE_i16:
916 curRes.bits.ByteStore = 1;
917 setAsmPrinterFlags(MI, curRes);
918 bytePtrs.insert(lookupTable[reg].second);
919 break;
920 default:
921 break;
922 }
923
924 if (mDebug) {
925 dbgs() << "Assigning instruction to pointer ";
926 dbgs() << lookupTable[reg].second->getName() << ". Inst: ";
927 MI->dump();
928 }
929 detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true,
930 reg, dstReg, mDebug);
931 }
932
933 // In this case we want to handle an atomic instruction.
934 static void
935 parseAtomicInst(
936 const AMDILTargetMachine *ATM,
937 InstPMap &InstToPtrMap,
938 PtrIMap &PtrToInstMap,
939 RVPVec &lookupTable,
940 BlockCacheableInfo &bci,
941 MachineInstr *MI,
942 ByteSet &bytePtrs,
943 bool mDebug)
944 {
945 assert(isAtomicInst(ATM->getInstrInfo(), MI) && "Only an atomic instruction can be parsed by "
946 "the parseAtomicInst function.");
947 AMDILAS::InstrResEnc curRes;
948 unsigned dstReg = MI->getOperand(0).getReg();
949 unsigned reg = 0;
950 getAsmPrinterFlags(MI, curRes);
951 unsigned numOps = MI->getNumOperands();
952 bool found = false;
953 while (--numOps) {
954 MachineOperand &Op = MI->getOperand(numOps);
955 if (!Op.isReg()) {
956 continue;
957 }
958 reg = Op.getReg();
959 // If the register is not known to be owned by a pointer
960 // then we can ignore it
961 if (!lookupTable[reg].second) {
962 continue;
963 }
964 // if the pointer is known to be local, region or private, then we
965 // can ignore it. Although there are no private atomics, we still
966 // do this check so we don't have to write a new function to check
967 // for only local and region.
968 if (isLRPInst(MI, ATM)) {
969 continue;
970 }
971 found = true;
972 InstToPtrMap[MI].insert(lookupTable[reg].second);
973 PtrToInstMap[lookupTable[reg].second].push_back(MI);
974
975 // We now know we have an atomic operation on global memory.
976 // This is a store so must update the cacheable information.
977 bci.setReachesExit();
978
979 // Only do if have SC with arena atomic bug fix (EPR 326883).
980 // TODO: enable once SC with EPR 326883 has been promoted to CAL.
981 if (ATM->getSubtargetImpl()->calVersion() >= CAL_VERSION_SC_150) {
982 // Force pointers that are used by atomics to be in the arena.
983 // If they were allowed to be accessed as RAW they would cause
984 // all access to use the slow complete path.
985 if (mDebug) {
986 dbgs() << __LINE__ << ": Setting byte store bit on atomic instruction: ";
987 MI->dump();
988 }
989 curRes.bits.ByteStore = 1;
990 bytePtrs.insert(lookupTable[reg].second);
991 }
992
993 if (mDebug) {
994 dbgs() << "Assigning instruction to pointer ";
995 dbgs() << lookupTable[reg].second->getName() << ". Inst: ";
996 MI->dump();
997 }
998 detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true,
999 reg, dstReg, mDebug);
1000 }
1001 if (!found) {
1002 allocateDefaultID(ATM, curRes, MI, mDebug);
1003 }
1004 }
1005 // In this case we want to handle a counter instruction.
1006 static void
1007 parseAppendInst(
1008 const AMDILTargetMachine *ATM,
1009 InstPMap &InstToPtrMap,
1010 PtrIMap &PtrToInstMap,
1011 RVPVec &lookupTable,
1012 MachineInstr *MI,
1013 bool mDebug)
1014 {
1015 assert(isAppendInst(ATM->getInstrInfo(), MI) && "Only an atomic counter instruction can be "
1016 "parsed by the parseAppendInst function.");
1017 AMDILAS::InstrResEnc curRes;
1018 unsigned dstReg = MI->getOperand(0).getReg();
1019 unsigned reg = MI->getOperand(1).getReg();
1020 getAsmPrinterFlags(MI, curRes);
1021 // If the register is not known to be owned by a pointer
1022 // then we set it to the default
1023 if (!lookupTable[reg].second) {
1024 allocateDefaultID(ATM, curRes, MI, mDebug);
1025 return;
1026 }
1027 InstToPtrMap[MI].insert(lookupTable[reg].second);
1028 PtrToInstMap[lookupTable[reg].second].push_back(MI);
1029 if (mDebug) {
1030 dbgs() << "Assigning instruction to pointer ";
1031 dbgs() << lookupTable[reg].second->getName() << ". Inst: ";
1032 MI->dump();
1033 }
1034 detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true,
1035 reg, dstReg, mDebug);
1036 }
1037 // In this case we want to handle an Image instruction.
1038 static void
1039 parseImageInst(
1040 const AMDILTargetMachine *ATM,
1041 InstPMap &InstToPtrMap,
1042 PtrIMap &PtrToInstMap,
1043 FIPMap &FIToPtrMap,
1044 RVPVec &lookupTable,
1045 MachineInstr *MI,
1046 bool mDebug)
1047 {
1048 assert(isImageInst(ATM->getInstrInfo(), MI) && "Only an image instruction can be "
1049 "parsed by the parseImageInst function.");
1050 AMDILAS::InstrResEnc curRes;
1051 getAsmPrinterFlags(MI, curRes);
1052 // AMDILKernelManager *km =
1053 // (AMDILKernelManager *)ATM->getSubtargetImpl()->getKernelManager();
1054 AMDILMachineFunctionInfo *mMFI = MI->getParent()->getParent()
1055 ->getInfo<AMDILMachineFunctionInfo>();
1056 if (MI->getOpcode() == AMDIL::IMAGE2D_WRITE
1057 || MI->getOpcode() == AMDIL::IMAGE3D_WRITE) {
1058 unsigned dstReg = MI->getOperand(0).getReg();
1059 curRes.bits.ResourceID = lookupTable[dstReg].first & 0xFFFF;
1060 curRes.bits.isImage = 1;
1061 InstToPtrMap[MI].insert(lookupTable[dstReg].second);
1062 PtrToInstMap[lookupTable[dstReg].second].push_back(MI);
1063 if (mDebug) {
1064 dbgs() << "Assigning instruction to pointer ";
1065 dbgs() << lookupTable[dstReg].second->getName() << ". Inst: ";
1066 MI->dump();
1067 }
1068 } else {
1069 // unsigned dstReg = MI->getOperand(0).getReg();
1070 unsigned reg = MI->getOperand(1).getReg();
1071
1072 // If the register is not known to be owned by a pointer
1073 // then we set it to the default
1074 if (!lookupTable[reg].second) {
1075 assert(!"This should not happen for images!");
1076 allocateDefaultID(ATM, curRes, MI, mDebug);
1077 return;
1078 }
1079 InstToPtrMap[MI].insert(lookupTable[reg].second);
1080 PtrToInstMap[lookupTable[reg].second].push_back(MI);
1081 if (mDebug) {
1082 dbgs() << "Assigning instruction to pointer ";
1083 dbgs() << lookupTable[reg].second->getName() << ". Inst: ";
1084 MI->dump();
1085 }
1086 switch (MI->getOpcode()) {
1087 case AMDIL::IMAGE2D_READ:
1088 case AMDIL::IMAGE2D_READ_UNNORM:
1089 case AMDIL::IMAGE3D_READ:
1090 case AMDIL::IMAGE3D_READ_UNNORM:
1091 curRes.bits.ResourceID = lookupTable[reg].first & 0xFFFF;
1092 if (MI->getOperand(3).isReg()) {
1093 // Our sampler is not a literal value.
1094 char buffer[256];
1095 memset(buffer, 0, sizeof(buffer));
1096 std::string sampler_name = "";
1097 unsigned reg = MI->getOperand(3).getReg();
1098 if (lookupTable[reg].second) {
1099 sampler_name = lookupTable[reg].second->getName();
1100 }
1101 if (sampler_name.empty()) {
1102 sampler_name = findSamplerName(MI, lookupTable, FIToPtrMap, ATM);
1103 }
1104 uint32_t val = mMFI->addSampler(sampler_name, ~0U);
1105 if (mDebug) {
1106 dbgs() << "Mapping kernel sampler " << sampler_name
1107 << " to sampler number " << val << " for Inst:\n";
1108 MI->dump();
1109 }
1110 MI->getOperand(3).ChangeToImmediate(val);
1111 } else {
1112 // Our sampler is known at runtime as a literal, lets make sure
1113 // that the metadata for it is known.
1114 char buffer[256];
1115 memset(buffer, 0, sizeof(buffer));
1116 sprintf(buffer,"_%d", (int32_t)MI->getOperand(3).getImm());
1117 std::string sampler_name = std::string("unknown") + std::string(buffer);
1118 uint32_t val = mMFI->addSampler(sampler_name, MI->getOperand(3).getImm());
1119 if (mDebug) {
1120 dbgs() << "Mapping internal sampler " << sampler_name
1121 << " to sampler number " << val << " for Inst:\n";
1122 MI->dump();
1123 }
1124 MI->getOperand(3).setImm(val);
1125 }
1126 break;
1127 case AMDIL::IMAGE2D_INFO0:
1128 case AMDIL::IMAGE3D_INFO0:
1129 curRes.bits.ResourceID = lookupTable[reg].first >> 16;
1130 break;
1131 case AMDIL::IMAGE2D_INFO1:
1132 case AMDIL::IMAGE2DA_INFO1:
1133 curRes.bits.ResourceID = (lookupTable[reg].first >> 16) + 1;
1134 break;
1135 };
1136 curRes.bits.isImage = 1;
1137 }
1138 setAsmPrinterFlags(MI, curRes);
1139 }
1140 // This case handles the rest of the instructions
1141 static void
1142 parseInstruction(
1143 const AMDILTargetMachine *ATM,
1144 InstPMap &InstToPtrMap,
1145 PtrIMap &PtrToInstMap,
1146 RVPVec &lookupTable,
1147 CPoolSet &cpool,
1148 MachineInstr *MI,
1149 bool mDebug)
1150 {
1151 assert(!isAtomicInst(ATM->getInstrInfo(), MI) && !isStoreInst(ATM->getInstrInfo(), MI) && !isLoadInst(ATM->getInstrInfo(), MI) &&
1152 !isAppendInst(ATM->getInstrInfo(), MI) && !isImageInst(ATM->getInstrInfo(), MI) &&
1153 "Atomic/Load/Store/Append/Image insts should not be handled here!");
1154 unsigned numOps = MI->getNumOperands();
1155 // If we don't have any operands, we can skip this instruction
1156 if (!numOps) {
1157 return;
1158 }
1159 // if the dst operand is not a register, then we can skip
1160 // this instruction. That is because we are probably a branch
1161 // or jump instruction.
1162 if (!MI->getOperand(0).isReg()) {
1163 return;
1164 }
1165 // If we are a LOADCONST_i32, we might be a sampler, so we need
1166 // to propogate the LOADCONST to IMAGE[2|3]D_READ instructions.
1167 if (MI->getOpcode() == AMDIL::LOADCONST_i32) {
1168 uint32_t val = MI->getOperand(1).getImm();
1169 MachineOperand* oldPtr = &MI->getOperand(0);
1170 MachineOperand* moPtr = oldPtr->getNextOperandForReg();
1171 while (moPtr) {
1172 oldPtr = moPtr;
1173 moPtr = oldPtr->getNextOperandForReg();
1174 switch (oldPtr->getParent()->getOpcode()) {
1175 default:
1176 break;
1177 case AMDIL::IMAGE2D_READ:
1178 case AMDIL::IMAGE2D_READ_UNNORM:
1179 case AMDIL::IMAGE3D_READ:
1180 case AMDIL::IMAGE3D_READ_UNNORM:
1181 if (mDebug) {
1182 dbgs() << "Found a constant sampler for image read inst: ";
1183 oldPtr->getParent()->print(dbgs());
1184 }
1185 oldPtr->ChangeToImmediate(val);
1186 break;
1187 }
1188 }
1189 }
1190 AMDILAS::InstrResEnc curRes;
1191 getAsmPrinterFlags(MI, curRes);
1192 unsigned dstReg = MI->getOperand(0).getReg();
1193 unsigned reg = 0;
1194 while (--numOps) {
1195 MachineOperand &Op = MI->getOperand(numOps);
1196 // if the operand is not a register, then we can ignore it
1197 if (!Op.isReg()) {
1198 if (Op.isCPI()) {
1199 cpool.insert(MI);
1200 }
1201 continue;
1202 }
1203 reg = Op.getReg();
1204 // If the register is not known to be owned by a pointer
1205 // then we can ignore it
1206 if (!lookupTable[reg].second) {
1207 continue;
1208 }
1209 detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, false,
1210 reg, dstReg, mDebug);
1211
1212 }
1213 }
1214
1215 // This function parses the basic block and based on the instruction type,
1216 // calls the function to finish parsing the instruction.
1217 static void
1218 parseBasicBlock(
1219 const AMDILTargetMachine *ATM,
1220 MachineBasicBlock *MB,
1221 InstPMap &InstToPtrMap,
1222 PtrIMap &PtrToInstMap,
1223 FIPMap &FIToPtrMap,
1224 RVPVec &lookupTable,
1225 ByteSet &bytePtrs,
1226 ConflictSet &conflictPtrs,
1227 CPoolSet &cpool,
1228 BlockCacheableInfo &bci,
1229 bool mDebug)
1230 {
1231 for (MachineBasicBlock::iterator mbb = MB->begin(), mbe = MB->end();
1232 mbb != mbe; ++mbb) {
1233 MachineInstr *MI = mbb;
1234 if (MI->getOpcode() == AMDIL::CALL) {
1235 parseCall(ATM, InstToPtrMap, PtrToInstMap, lookupTable,
1236 mbb, mbe, mDebug);
1237 }
1238 else if (isLoadInst(ATM->getInstrInfo(), MI)) {
1239 parseLoadInst(ATM, InstToPtrMap, PtrToInstMap,
1240 FIToPtrMap, lookupTable, cpool, bci, MI, mDebug);
1241 } else if (isStoreInst(ATM->getInstrInfo(), MI)) {
1242 parseStoreInst(ATM, InstToPtrMap, PtrToInstMap,
1243 FIToPtrMap, lookupTable, cpool, bci, MI, bytePtrs, conflictPtrs, mDebug);
1244 } else if (isAtomicInst(ATM->getInstrInfo(), MI)) {
1245 parseAtomicInst(ATM, InstToPtrMap, PtrToInstMap,
1246 lookupTable, bci, MI, bytePtrs, mDebug);
1247 } else if (isAppendInst(ATM->getInstrInfo(), MI)) {
1248 parseAppendInst(ATM, InstToPtrMap, PtrToInstMap,
1249 lookupTable, MI, mDebug);
1250 } else if (isImageInst(ATM->getInstrInfo(), MI)) {
1251 parseImageInst(ATM, InstToPtrMap, PtrToInstMap,
1252 FIToPtrMap, lookupTable, MI, mDebug);
1253 } else {
1254 parseInstruction(ATM, InstToPtrMap, PtrToInstMap,
1255 lookupTable, cpool, MI, mDebug);
1256 }
1257 }
1258 }
1259
1260 // Follows the Reverse Post Order Traversal of the basic blocks to
1261 // determine which order to parse basic blocks in.
1262 void
1263 parseFunction(
1264 const AMDILPointerManager *PM,
1265 const AMDILTargetMachine *ATM,
1266 MachineFunction &MF,
1267 InstPMap &InstToPtrMap,
1268 PtrIMap &PtrToInstMap,
1269 FIPMap &FIToPtrMap,
1270 RVPVec &lookupTable,
1271 ByteSet &bytePtrs,
1272 ConflictSet &conflictPtrs,
1273 CPoolSet &cpool,
1274 MBBCacheableMap &mbbCacheable,
1275 bool mDebug)
1276 {
1277 if (mDebug) {
1278 MachineDominatorTree *dominatorTree = &PM
1279 ->getAnalysis<MachineDominatorTree>();
1280 dominatorTree->dump();
1281 }
1282
1283 std::list<MachineBasicBlock*> prop_worklist;
1284
1285 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
1286 for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
1287 curBlock = RPOT.begin(), endBlock = RPOT.end();
1288 curBlock != endBlock; ++curBlock) {
1289 MachineBasicBlock *MB = (*curBlock);
1290 BlockCacheableInfo &bci = mbbCacheable[MB];
1291 for (MachineBasicBlock::pred_iterator mbbit = MB->pred_begin(),
1292 mbbitend = MB->pred_end();
1293 mbbit != mbbitend;
1294 mbbit++) {
1295 MBBCacheableMap::const_iterator mbbcmit = mbbCacheable.find(*mbbit);
1296 if (mbbcmit != mbbCacheable.end() &&
1297 mbbcmit->second.storeReachesExit()) {
1298 bci.setReachesTop();
1299 break;
1300 }
1301 }
1302
1303 if (mDebug) {
1304 dbgs() << "[BlockOrdering] Parsing CurrentBlock: "
1305 << MB->getNumber() << "\n";
1306 }
1307 parseBasicBlock(ATM, MB, InstToPtrMap, PtrToInstMap,
1308 FIToPtrMap, lookupTable, bytePtrs, conflictPtrs, cpool, bci, mDebug);
1309
1310 if (bci.storeReachesExit())
1311 prop_worklist.push_back(MB);
1312
1313 if (mDebug) {
1314 dbgs() << "BCI info: Top: " << bci.storeReachesTop() << " Exit: "
1315 << bci.storeReachesExit() << "\n Instructions:\n";
1316 for (CacheableInstrSet::const_iterator cibit = bci.cacheableBegin(),
1317 cibitend = bci.cacheableEnd();
1318 cibit != cibitend;
1319 cibit++)
1320 {
1321 (*cibit)->dump();
1322 }
1323 }
1324 }
1325
1326 // This loop pushes any "storeReachesExit" flags into successor
1327 // blocks until the flags have been fully propagated. This will
1328 // ensure that blocks that have reachable stores due to loops
1329 // are labeled appropriately.
1330 while (!prop_worklist.empty()) {
1331 MachineBasicBlock *wlb = prop_worklist.front();
1332 prop_worklist.pop_front();
1333 for (MachineBasicBlock::succ_iterator mbbit = wlb->succ_begin(),
1334 mbbitend = wlb->succ_end();
1335 mbbit != mbbitend;
1336 mbbit++)
1337 {
1338 BlockCacheableInfo &blockCache = mbbCacheable[*mbbit];
1339 if (!blockCache.storeReachesTop()) {
1340 blockCache.setReachesTop();
1341 prop_worklist.push_back(*mbbit);
1342 }
1343 if (mDebug) {
1344 dbgs() << "BCI Prop info: " << (*mbbit)->getNumber() << " Top: "
1345 << blockCache.storeReachesTop() << " Exit: "
1346 << blockCache.storeReachesExit()
1347 << "\n";
1348 }
1349 }
1350 }
1351 }
1352
1353 // Helper function that dumps to dbgs() information about
1354 // a pointer set.
1355 void
1356 dumpPointers(AppendSet &Ptrs, const char *str)
1357 {
1358 if (Ptrs.empty()) {
1359 return;
1360 }
1361 dbgs() << "[Dump]" << str << " found: " << "\n";
1362 for (AppendSet::iterator sb = Ptrs.begin();
1363 sb != Ptrs.end(); ++sb) {
1364 (*sb)->dump();
1365 }
1366 dbgs() << "\n";
1367 }
1368 // Helper function that dumps to dbgs() information about
1369 // a pointer set.
1370 void
1371 dumpPointers(PtrSet &Ptrs, const char *str)
1372 {
1373 if (Ptrs.empty()) {
1374 return;
1375 }
1376 dbgs() << "[Dump]" << str << " found: " << "\n";
1377 for (PtrSet::iterator sb = Ptrs.begin();
1378 sb != Ptrs.end(); ++sb) {
1379 (*sb)->dump();
1380 }
1381 dbgs() << "\n";
1382 }
1383 // Function that detects all the conflicting pointers and adds
1384 // the pointers that are detected to the conflict set, otherwise
1385 // they are added to the raw or byte set based on their usage.
1386 void
1387 detectConflictingPointers(
1388 const AMDILTargetMachine *ATM,
1389 InstPMap &InstToPtrMap,
1390 ByteSet &bytePtrs,
1391 RawSet &rawPtrs,
1392 ConflictSet &conflictPtrs,
1393 bool mDebug)
1394 {
1395 if (InstToPtrMap.empty()) {
1396 return;
1397 }
1398 PtrSet aliasedPtrs;
1399 const AMDILSubtarget *STM = ATM->getSubtargetImpl();
1400 for (InstPMap::iterator
1401 mapIter = InstToPtrMap.begin(), iterEnd = InstToPtrMap.end();
1402 mapIter != iterEnd; ++mapIter) {
1403 if (mDebug) {
1404 dbgs() << "Instruction: ";
1405 (mapIter)->first->dump();
1406 }
1407 MachineInstr* MI = mapIter->first;
1408 AMDILAS::InstrResEnc curRes;
1409 getAsmPrinterFlags(MI, curRes);
1410 if (curRes.bits.isImage) {
1411 continue;
1412 }
1413 bool byte = false;
1414 // We might have a case where more than 1 pointers is going to the same
1415 // I/O instruction
1416 if (mDebug) {
1417 dbgs() << "Base Pointer[s]:\n";
1418 }
1419 for (PtrSet::iterator cfIter = mapIter->second.begin(),
1420 cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) {
1421 if (mDebug) {
1422 (*cfIter)->dump();
1423 }
1424 if (bytePtrs.count(*cfIter)) {
1425 if (mDebug) {
1426 dbgs() << "Byte pointer found!\n";
1427 }
1428 byte = true;
1429 break;
1430 }
1431 }
1432 if (byte) {
1433 for (PtrSet::iterator cfIter = mapIter->second.begin(),
1434 cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) {
1435 const Value *ptr = (*cfIter);
1436 if (isLRPInst(mapIter->first, ATM)) {
1437 // We don't need to deal with pointers to local/region/private
1438 // memory regions
1439 continue;
1440 }
1441 if (mDebug) {
1442 dbgs() << "Adding pointer " << (ptr)->getName()
1443 << " to byte set!\n";
1444 }
1445 const PointerType *PT = dyn_cast<PointerType>(ptr->getType());
1446 if (PT) {
1447 bytePtrs.insert(ptr);
1448 }
1449 }
1450 } else {
1451 for (PtrSet::iterator cfIter = mapIter->second.begin(),
1452 cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) {
1453 const Value *ptr = (*cfIter);
1454 // bool aliased = false;
1455 if (isLRPInst(mapIter->first, ATM)) {
1456 // We don't need to deal with pointers to local/region/private
1457 // memory regions
1458 continue;
1459 }
1460 const Argument *arg = dyn_cast_or_null<Argument>(*cfIter);
1461 if (!arg) {
1462 continue;
1463 }
1464 if (!STM->device()->isSupported(AMDILDeviceInfo::NoAlias)
1465 && !arg->hasNoAliasAttr()) {
1466 if (mDebug) {
1467 dbgs() << "Possible aliased pointer found!\n";
1468 }
1469 aliasedPtrs.insert(ptr);
1470 }
1471 if (mapIter->second.size() > 1) {
1472 if (mDebug) {
1473 dbgs() << "Adding pointer " << ptr->getName()
1474 << " to conflict set!\n";
1475 }
1476 const PointerType *PT = dyn_cast<PointerType>(ptr->getType());
1477 if (PT) {
1478 conflictPtrs.insert(ptr);
1479 }
1480 }
1481 if (mDebug) {
1482 dbgs() << "Adding pointer " << ptr->getName()
1483 << " to raw set!\n";
1484 }
1485 const PointerType *PT = dyn_cast<PointerType>(ptr->getType());
1486 if (PT) {
1487 rawPtrs.insert(ptr);
1488 }
1489 }
1490 }
1491 if (mDebug) {
1492 dbgs() << "\n";
1493 }
1494 }
1495 // If we have any aliased pointers and byte pointers exist,
1496 // then make sure that all of the aliased pointers are
1497 // part of the byte pointer set.
1498 if (!bytePtrs.empty()) {
1499 for (PtrSet::iterator aIter = aliasedPtrs.begin(),
1500 aEnd = aliasedPtrs.end(); aIter != aEnd; ++aIter) {
1501 if (mDebug) {
1502 dbgs() << "Moving " << (*aIter)->getName()
1503 << " from raw to byte.\n";
1504 }
1505 bytePtrs.insert(*aIter);
1506 rawPtrs.erase(*aIter);
1507 }
1508 }
1509 }
1510 // Function that detects aliased constant pool operations.
1511 void
1512 detectAliasedCPoolOps(
1513 TargetMachine &TM,
1514 CPoolSet &cpool,
1515 bool mDebug
1516 )
1517 {
1518 const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
1519 if (mDebug && !cpool.empty()) {
1520 dbgs() << "Instructions w/ CPool Ops: \n";
1521 }
1522 // The algorithm for detecting aliased cpool is as follows.
1523 // For each instruction that has a cpool argument
1524 // follow def-use chain
1525 // if instruction is a load and load is a private load,
1526 // switch to constant pool load
1527 for (CPoolSet::iterator cpb = cpool.begin(), cpe = cpool.end();
1528 cpb != cpe; ++cpb) {
1529 if (mDebug) {
1530 (*cpb)->dump();
1531 }
1532 std::queue<MachineInstr*> queue;
1533 std::set<MachineInstr*> visited;
1534 queue.push(*cpb);
1535 MachineInstr *cur;
1536 while (!queue.empty()) {
1537 cur = queue.front();
1538 queue.pop();
1539 if (visited.count(cur)) {
1540 continue;
1541 }
1542 if (isLoadInst(TM.getInstrInfo(), cur) && isPrivateInst(TM.getInstrInfo(), cur)) {
1543 // If we are a private load and the register is
1544 // used in the address register, we need to
1545 // switch from private to constant pool load.
1546 if (mDebug) {
1547 dbgs() << "Found an instruction that is a private load "
1548 << "but should be a constant pool load.\n";
1549 cur->print(dbgs());
1550 dbgs() << "\n";
1551 }
1552 AMDILAS::InstrResEnc curRes;
1553 getAsmPrinterFlags(cur, curRes);
1554 curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::GLOBAL_ID);
1555 curRes.bits.ConflictPtr = 1;
1556 setAsmPrinterFlags(cur, curRes);
1557 cur->setDesc(TM.getInstrInfo()->get(
1558 (cur->getOpcode() - AMDIL::PRIVATEAEXTLOAD_f32)
1559 + AMDIL::CPOOLAEXTLOAD_f32));
1560 } else {
1561 if (cur->getOperand(0).isReg()) {
1562 MachineOperand* ptr = cur->getOperand(0).getNextOperandForReg();
1563 while (ptr && !ptr->isDef() && ptr->isReg()) {
1564 queue.push(ptr->getParent());
1565 ptr = ptr->getNextOperandForReg();
1566 }
1567 }
1568 }
1569 visited.insert(cur);
1570 }
1571 }
1572 }
1573 // Function that detects fully cacheable pointers. Fully cacheable pointers
1574 // are pointers that have no writes to them and -fno-alias is specified.
1575 void
1576 detectFullyCacheablePointers(
1577 const AMDILTargetMachine *ATM,
1578 PtrIMap &PtrToInstMap,
1579 RawSet &rawPtrs,
1580 CacheableSet &cacheablePtrs,
1581 ConflictSet &conflictPtrs,
1582 bool mDebug
1583 )
1584 {
1585 if (PtrToInstMap.empty()) {
1586 return;
1587 }
1588 const AMDILSubtarget *STM
1589 = ATM->getSubtargetImpl();
1590 // 4XXX hardware doesn't support cached uav opcodes and we assume
1591 // no aliasing for this to work. Also in debug mode we don't do
1592 // any caching.
1593 if (STM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX
1594 || !STM->device()->isSupported(AMDILDeviceInfo::CachedMem)) {
1595 return;
1596 }
1597 if (STM->device()->isSupported(AMDILDeviceInfo::NoAlias)) {
1598 for (PtrIMap::iterator mapIter = PtrToInstMap.begin(),
1599 iterEnd = PtrToInstMap.end(); mapIter != iterEnd; ++mapIter) {
1600 if (mDebug) {
1601 dbgs() << "Instruction: ";
1602 mapIter->first->dump();
1603 }
1604 // Skip the pointer if we have already detected it.
1605 if (cacheablePtrs.count(mapIter->first)) {
1606 continue;
1607 }
1608 bool cacheable = true;
1609 for (std::vector<MachineInstr*>::iterator
1610 miBegin = mapIter->second.begin(),
1611 miEnd = mapIter->second.end(); miBegin != miEnd; ++miBegin) {
1612 if (isStoreInst(ATM->getInstrInfo(), *miBegin) ||
1613 isImageInst(ATM->getInstrInfo(), *miBegin) ||
1614 isAtomicInst(ATM->getInstrInfo(), *miBegin)) {
1615 cacheable = false;
1616 break;
1617 }
1618 }
1619 // we aren't cacheable, so lets move on to the next instruction
1620 if (!cacheable) {
1621 continue;
1622 }
1623 // If we are in the conflict set, lets move to the next instruction
1624 // FIXME: we need to check to see if the pointers that conflict with
1625 // the current pointer are also cacheable. If they are, then add them
1626 // to the cacheable list and not fail.
1627 if (conflictPtrs.count(mapIter->first)) {
1628 continue;
1629 }
1630 // Otherwise if we have no stores and no conflicting pointers, we can
1631 // be added to the cacheable set.
1632 if (mDebug) {
1633 dbgs() << "Adding pointer " << mapIter->first->getName();
1634 dbgs() << " to cached set!\n";
1635 }
1636 const PointerType *PT = dyn_cast<PointerType>(mapIter->first->getType());
1637 if (PT) {
1638 cacheablePtrs.insert(mapIter->first);
1639 }
1640 }
1641 }
1642 }
1643
1644 // Are any of the pointers in PtrSet also in the BytePtrs or the CachePtrs?
1645 static bool
1646 ptrSetIntersectsByteOrCache(
1647 PtrSet &cacheSet,
1648 ByteSet &bytePtrs,
1649 CacheableSet &cacheablePtrs
1650 )
1651 {
1652 for (PtrSet::const_iterator psit = cacheSet.begin(),
1653 psitend = cacheSet.end();
1654 psit != psitend;
1655 psit++) {
1656 if (bytePtrs.find(*psit) != bytePtrs.end() ||
1657 cacheablePtrs.find(*psit) != cacheablePtrs.end()) {
1658 return true;
1659 }
1660 }
1661 return false;
1662 }
1663
1664 // Function that detects which instructions are cacheable even if
1665 // all instructions of the pointer are not cacheable. The resulting
1666 // set of instructions will not contain Ptrs that are in the cacheable
1667 // ptr set (under the assumption they will get marked cacheable already)
1668 // or pointers in the byte set, since they are not cacheable.
1669 void
1670 detectCacheableInstrs(
1671 MBBCacheableMap &bbCacheable,
1672 InstPMap &InstToPtrMap,
1673 CacheableSet &cacheablePtrs,
1674 ByteSet &bytePtrs,
1675 CacheableInstrSet &cacheableSet,
1676 bool mDebug
1677 )
1678
1679 {
1680 for (MBBCacheableMap::const_iterator mbbcit = bbCacheable.begin(),
1681 mbbcitend = bbCacheable.end();
1682 mbbcit != mbbcitend;
1683 mbbcit++) {
1684 for (CacheableInstrSet::const_iterator bciit
1685 = mbbcit->second.cacheableBegin(),
1686 bciitend
1687 = mbbcit->second.cacheableEnd();
1688 bciit != bciitend;
1689 bciit++) {
1690 if (!ptrSetIntersectsByteOrCache(InstToPtrMap[*bciit],
1691 bytePtrs,
1692 cacheablePtrs)) {
1693 cacheableSet.insert(*bciit);
1694 }
1695 }
1696 }
1697 }
1698 // This function annotates the cacheable pointers with the
1699 // CacheableRead bit. The cacheable read bit is set
1700 // when the number of write images is not equal to the max
1701 // or if the default RAW_UAV_ID is equal to 11. The first
1702 // condition means that there is a raw uav between 0 and 7
1703 // that is available for cacheable reads and the second
1704 // condition means that UAV 11 is available for cacheable
1705 // reads.
1706 void
1707 annotateCacheablePtrs(
1708 TargetMachine &TM,
1709 PtrIMap &PtrToInstMap,
1710 CacheableSet &cacheablePtrs,
1711 ByteSet &bytePtrs,
1712 uint32_t numWriteImages,
1713 bool mDebug)
1714 {
1715 const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
1716 // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager();
1717 PtrSet::iterator siBegin, siEnd;
1718 std::vector<MachineInstr*>::iterator miBegin, miEnd;
1719 AMDILMachineFunctionInfo *mMFI = NULL;
1720 // First we can check the cacheable pointers
1721 for (siBegin = cacheablePtrs.begin(), siEnd = cacheablePtrs.end();
1722 siBegin != siEnd; ++siBegin) {
1723 assert(!bytePtrs.count(*siBegin) && "Found a cacheable pointer "
1724 "that also exists as a byte pointer!");
1725 for (miBegin = PtrToInstMap[*siBegin].begin(),
1726 miEnd = PtrToInstMap[*siBegin].end();
1727 miBegin != miEnd; ++miBegin) {
1728 if (mDebug) {
1729 dbgs() << "Annotating pointer as cacheable. Inst: ";
1730 (*miBegin)->dump();
1731 }
1732 AMDILAS::InstrResEnc curRes;
1733 getAsmPrinterFlags(*miBegin, curRes);
1734 assert(!curRes.bits.ByteStore && "No cacheable pointers should have the "
1735 "byte Store flag set!");
1736 // If UAV11 is enabled, then we can enable cached reads.
1737 if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) {
1738 curRes.bits.CacheableRead = 1;
1739 curRes.bits.ResourceID = 11;
1740 setAsmPrinterFlags(*miBegin, curRes);
1741 if (!mMFI) {
1742 mMFI = (*miBegin)->getParent()->getParent()
1743 ->getInfo<AMDILMachineFunctionInfo>();
1744 }
1745 mMFI->uav_insert(curRes.bits.ResourceID);
1746 }
1747 }
1748 }
1749 }
1750
1751 // A byte pointer is a pointer that along the pointer path has a
1752 // byte store assigned to it.
1753 void
1754 annotateBytePtrs(
1755 TargetMachine &TM,
1756 PtrIMap &PtrToInstMap,
1757 ByteSet &bytePtrs,
1758 RawSet &rawPtrs,
1759 bool mDebug
1760 )
1761 {
1762 const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
1763 AMDILKernelManager *KM = STM->getKernelManager();
1764 PtrSet::iterator siBegin, siEnd;
1765 std::vector<MachineInstr*>::iterator miBegin, miEnd;
1766 uint32_t arenaID = STM->device()
1767 ->getResourceID(AMDILDevice::ARENA_UAV_ID);
1768 if (STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
1769 arenaID = ARENA_SEGMENT_RESERVED_UAVS + 1;
1770 }
1771 AMDILMachineFunctionInfo *mMFI = NULL;
1772 for (siBegin = bytePtrs.begin(), siEnd = bytePtrs.end();
1773 siBegin != siEnd; ++siBegin) {
1774 const Value* val = (*siBegin);
1775 const PointerType *PT = dyn_cast<PointerType>(val->getType());
1776 if (!PT) {
1777 continue;
1778 }
1779 const Argument *curArg = dyn_cast<Argument>(val);
1780 assert(!rawPtrs.count(*siBegin) && "Found a byte pointer "
1781 "that also exists as a raw pointer!");
1782 bool arenaInc = false;
1783 for (miBegin = PtrToInstMap[*siBegin].begin(),
1784 miEnd = PtrToInstMap[*siBegin].end();
1785 miBegin != miEnd; ++miBegin) {
1786 if (mDebug) {
1787 dbgs() << "Annotating pointer as arena. Inst: ";
1788 (*miBegin)->dump();
1789 }
1790 AMDILAS::InstrResEnc curRes;
1791 getAsmPrinterFlags(*miBegin, curRes);
1792
1793 if (STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)
1794 && PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) {
1795 // If hardware constant mem is enabled, then we need to
1796 // get the constant pointer CB number and use that to specify
1797 // the resource ID.
1798 AMDILGlobalManager *GM = STM->getGlobalManager();
1799 const StringRef funcName = (*miBegin)->getParent()->getParent()
1800 ->getFunction()->getName();
1801 if (GM->isKernel(funcName)) {
1802 const kernel &krnl = GM->getKernel(funcName);
1803 curRes.bits.ResourceID = GM->getConstPtrCB(krnl,
1804 (*siBegin)->getName());
1805 curRes.bits.HardwareInst = 1;
1806 } else {
1807 curRes.bits.ResourceID = STM->device()
1808 ->getResourceID(AMDILDevice::CONSTANT_ID);
1809 }
1810 } else if (STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)
1811 && PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) {
1812 // If hardware local mem is enabled, get the local mem ID from
1813 // the device to use as the ResourceID
1814 curRes.bits.ResourceID = STM->device()
1815 ->getResourceID(AMDILDevice::LDS_ID);
1816 if (isAtomicInst(TM.getInstrInfo(), *miBegin)) {
1817 assert(curRes.bits.ResourceID && "Atomic resource ID "
1818 "cannot be non-zero!");
1819 (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
1820 .setImm(curRes.bits.ResourceID);
1821 }
1822 } else if (STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)
1823 && PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) {
1824 // If hardware region mem is enabled, get the gds mem ID from
1825 // the device to use as the ResourceID
1826 curRes.bits.ResourceID = STM->device()
1827 ->getResourceID(AMDILDevice::GDS_ID);
1828 if (isAtomicInst(TM.getInstrInfo(), *miBegin)) {
1829 assert(curRes.bits.ResourceID && "Atomic resource ID "
1830 "cannot be non-zero!");
1831 (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
1832 .setImm(curRes.bits.ResourceID);
1833 }
1834 } else if (STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)
1835 && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
1836 curRes.bits.ResourceID = STM->device()
1837 ->getResourceID(AMDILDevice::SCRATCH_ID);
1838 } else {
1839 if (mDebug) {
1840 dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
1841 (*miBegin)->print(dbgs());
1842 }
1843 curRes.bits.ByteStore = 1;
1844 curRes.bits.ResourceID = (curArg && curArg->hasNoAliasAttr()) ? arenaID
1845 : STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
1846 if (STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
1847 arenaInc = true;
1848 }
1849 if (isAtomicInst(TM.getInstrInfo(), *miBegin) &&
1850 STM->device()->isSupported(AMDILDeviceInfo::ArenaUAV)) {
1851 (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
1852 .setImm(curRes.bits.ResourceID);
1853 // If we are an arena instruction, we need to switch the atomic opcode
1854 // from the global version to the arena version.
1855 MachineInstr *MI = *miBegin;
1856 MI->setDesc(
1857 TM.getInstrInfo()->get(
1858 (MI->getOpcode() - AMDIL::ATOM_G_ADD) + AMDIL::ATOM_A_ADD));
1859 }
1860 if (mDebug) {
1861 dbgs() << "Annotating pointer as arena. Inst: ";
1862 (*miBegin)->dump();
1863 }
1864 }
1865 setAsmPrinterFlags(*miBegin, curRes);
1866 KM->setUAVID(*siBegin, curRes.bits.ResourceID);
1867 if (!mMFI) {
1868 mMFI = (*miBegin)->getParent()->getParent()
1869 ->getInfo<AMDILMachineFunctionInfo>();
1870 }
1871 mMFI->uav_insert(curRes.bits.ResourceID);
1872 }
1873 if (arenaInc) {
1874 ++arenaID;
1875 }
1876 }
1877 }
1878 // An append pointer is a opaque object that has append instructions
1879 // in its path.
1880 void
1881 annotateAppendPtrs(
1882 TargetMachine &TM,
1883 PtrIMap &PtrToInstMap,
1884 AppendSet &appendPtrs,
1885 bool mDebug)
1886 {
1887 unsigned currentCounter = 0;
1888 // const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
1889 // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager();
1890 MachineFunction *MF = NULL;
1891 for (AppendSet::iterator asBegin = appendPtrs.begin(),
1892 asEnd = appendPtrs.end(); asBegin != asEnd; ++asBegin)
1893 {
1894 bool usesWrite = false;
1895 bool usesRead = false;
1896 const Value* curVal = *asBegin;
1897 if (mDebug) {
1898 dbgs() << "Counter: " << curVal->getName()
1899 << " assigned the counter " << currentCounter << "\n";
1900 }
1901 for (std::vector<MachineInstr*>::iterator
1902 miBegin = PtrToInstMap[curVal].begin(),
1903 miEnd = PtrToInstMap[curVal].end(); miBegin != miEnd; ++miBegin) {
1904 MachineInstr *MI = *miBegin;
1905 if (!MF) {
1906 MF = MI->getParent()->getParent();
1907 }
1908 unsigned opcode = MI->getOpcode();
1909 switch (opcode) {
1910 default:
1911 if (mDebug) {
1912 dbgs() << "Skipping instruction: ";
1913 MI->dump();
1914 }
1915 break;
1916 case AMDIL::APPEND_ALLOC:
1917 case AMDIL::APPEND_ALLOC_NORET:
1918 usesWrite = true;
1919 MI->getOperand(1).ChangeToImmediate(currentCounter);
1920 if (mDebug) {
1921 dbgs() << "Assing to counter " << currentCounter << " Inst: ";
1922 MI->dump();
1923 }
1924 break;
1925 case AMDIL::APPEND_CONSUME:
1926 case AMDIL::APPEND_CONSUME_NORET:
1927 usesRead = true;
1928 MI->getOperand(1).ChangeToImmediate(currentCounter);
1929 if (mDebug) {
1930 dbgs() << "Assing to counter " << currentCounter << " Inst: ";
1931 MI->dump();
1932 }
1933 break;
1934 };
1935 }
1936 if (usesWrite && usesRead && MF) {
1937 MF->getInfo<AMDILMachineFunctionInfo>()->addErrorMsg(
1938 amd::CompilerErrorMessage[INCORRECT_COUNTER_USAGE]);
1939 }
1940 ++currentCounter;
1941 }
1942 }
1943 // A raw pointer is any pointer that does not have byte store in its path.
1944 static void
1945 annotateRawPtrs(
1946 TargetMachine &TM,
1947 PtrIMap &PtrToInstMap,
1948 RawSet &rawPtrs,
1949 ByteSet &bytePtrs,
1950 uint32_t numWriteImages,
1951 bool mDebug
1952 )
1953 {
1954 const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
1955 AMDILKernelManager *KM = STM->getKernelManager();
1956 PtrSet::iterator siBegin, siEnd;
1957 std::vector<MachineInstr*>::iterator miBegin, miEnd;
1958 AMDILMachineFunctionInfo *mMFI = NULL;
1959
1960 // Now all of the raw pointers will go to the raw uav.
1961 for (siBegin = rawPtrs.begin(), siEnd = rawPtrs.end();
1962 siBegin != siEnd; ++siBegin) {
1963 const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType());
1964 if (!PT) {
1965 continue;
1966 }
1967 assert(!bytePtrs.count(*siBegin) && "Found a raw pointer "
1968 " that also exists as a byte pointers!");
1969 for (miBegin = PtrToInstMap[*siBegin].begin(),
1970 miEnd = PtrToInstMap[*siBegin].end();
1971 miBegin != miEnd; ++miBegin) {
1972 if (mDebug) {
1973 dbgs() << "Annotating pointer as raw. Inst: ";
1974 (*miBegin)->dump();
1975 }
1976 AMDILAS::InstrResEnc curRes;
1977 getAsmPrinterFlags(*miBegin, curRes);
1978 if (!curRes.bits.ConflictPtr) {
1979 assert(!curRes.bits.ByteStore
1980 && "Found a instruction that is marked as "
1981 "raw but has a byte store bit set!");
1982 } else if (curRes.bits.ConflictPtr) {
1983 if (curRes.bits.ByteStore) {
1984 curRes.bits.ByteStore = 0;
1985 }
1986 }
1987 if (STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)
1988 && PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) {
1989 // If hardware constant mem is enabled, then we need to
1990 // get the constant pointer CB number and use that to specify
1991 // the resource ID.
1992 AMDILGlobalManager *GM = STM->getGlobalManager();
1993 const StringRef funcName = (*miBegin)->getParent()->getParent()
1994 ->getFunction()->getName();
1995 if (GM->isKernel(funcName)) {
1996 const kernel &krnl = GM->getKernel(funcName);
1997 curRes.bits.ResourceID = GM->getConstPtrCB(krnl,
1998 (*siBegin)->getName());
1999 curRes.bits.HardwareInst = 1;
2000 } else {
2001 curRes.bits.ResourceID = STM->device()
2002 ->getResourceID(AMDILDevice::CONSTANT_ID);
2003 }
2004 } else if (STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)
2005 && PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) {
2006 // If hardware local mem is enabled, get the local mem ID from
2007 // the device to use as the ResourceID
2008 curRes.bits.ResourceID = STM->device()
2009 ->getResourceID(AMDILDevice::LDS_ID);
2010 if (isAtomicInst(TM.getInstrInfo(), *miBegin)) {
2011 assert(curRes.bits.ResourceID && "Atomic resource ID "
2012 "cannot be non-zero!");
2013 (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
2014 .setImm(curRes.bits.ResourceID);
2015 }
2016 } else if (STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)
2017 && PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) {
2018 // If hardware region mem is enabled, get the gds mem ID from
2019 // the device to use as the ResourceID
2020 curRes.bits.ResourceID = STM->device()
2021 ->getResourceID(AMDILDevice::GDS_ID);
2022 if (isAtomicInst(TM.getInstrInfo(), *miBegin)) {
2023 assert(curRes.bits.ResourceID && "Atomic resource ID "
2024 "cannot be non-zero!");
2025 (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
2026 .setImm(curRes.bits.ResourceID);
2027 }
2028 } else if (STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)
2029 && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
2030 curRes.bits.ResourceID = STM->device()
2031 ->getResourceID(AMDILDevice::SCRATCH_ID);
2032 } else if (!STM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
2033 // If multi uav is enabled, then the resource ID is either the
2034 // number of write images that are available or the device
2035 // raw uav id if it is 11.
2036 if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) >
2037 STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
2038 curRes.bits.ResourceID = STM->device()
2039 ->getResourceID(AMDILDevice::RAW_UAV_ID);
2040 } else if (numWriteImages != OPENCL_MAX_WRITE_IMAGES) {
2041 if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
2042 < numWriteImages) {
2043 curRes.bits.ResourceID = numWriteImages;
2044 } else {
2045 curRes.bits.ResourceID = STM->device()
2046 ->getResourceID(AMDILDevice::RAW_UAV_ID);
2047 }
2048 } else {
2049 if (mDebug) {
2050 dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
2051 (*miBegin)->print(dbgs());
2052 }
2053 curRes.bits.ByteStore = 1;
2054 curRes.bits.ResourceID = STM->device()
2055 ->getResourceID(AMDILDevice::ARENA_UAV_ID);
2056 }
2057 if (isAtomicInst(TM.getInstrInfo(), *miBegin)) {
2058 (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
2059 .setImm(curRes.bits.ResourceID);
2060 if (curRes.bits.ResourceID
2061 == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
2062 assert(0 && "Found an atomic instruction that has "
2063 "an arena uav id!");
2064 }
2065 }
2066 KM->setUAVID(*siBegin, curRes.bits.ResourceID);
2067 if (!mMFI) {
2068 mMFI = (*miBegin)->getParent()->getParent()
2069 ->getInfo<AMDILMachineFunctionInfo>();
2070 }
2071 mMFI->uav_insert(curRes.bits.ResourceID);
2072 }
2073 setAsmPrinterFlags(*miBegin, curRes);
2074 }
2075 }
2076
2077 }
2078
2079 void
2080 annotateCacheableInstrs(
2081 TargetMachine &TM,
2082 CacheableInstrSet &cacheableSet,
2083 bool mDebug)
2084 {
2085 const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
2086 // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager();
2087
2088 CacheableInstrSet::iterator miBegin, miEnd;
2089
2090 for (miBegin = cacheableSet.begin(),
2091 miEnd = cacheableSet.end();
2092 miBegin != miEnd; ++miBegin) {
2093 if (mDebug) {
2094 dbgs() << "Annotating instr as cacheable. Inst: ";
2095 (*miBegin)->dump();
2096 }
2097 AMDILAS::InstrResEnc curRes;
2098 getAsmPrinterFlags(*miBegin, curRes);
2099 // If UAV11 is enabled, then we can enable cached reads.
2100 if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) {
2101 curRes.bits.CacheableRead = 1;
2102 curRes.bits.ResourceID = 11;
2103 setAsmPrinterFlags(*miBegin, curRes);
2104 }
2105 }
2106 }
2107
2108 // Annotate the instructions along various pointer paths. The paths that
2109 // are handled are the raw, byte and cacheable pointer paths.
2110 static void
2111 annotatePtrPath(
2112 TargetMachine &TM,
2113 PtrIMap &PtrToInstMap,
2114 RawSet &rawPtrs,
2115 ByteSet &bytePtrs,
2116 CacheableSet &cacheablePtrs,
2117 uint32_t numWriteImages,
2118 bool mDebug
2119 )
2120 {
2121 if (PtrToInstMap.empty()) {
2122 return;
2123 }
2124 // First we can check the cacheable pointers
2125 annotateCacheablePtrs(TM, PtrToInstMap, cacheablePtrs,
2126 bytePtrs, numWriteImages, mDebug);
2127
2128 // Next we annotate the byte pointers
2129 annotateBytePtrs(TM, PtrToInstMap, bytePtrs, rawPtrs, mDebug);
2130
2131 // Next we annotate the raw pointers
2132 annotateRawPtrs(TM, PtrToInstMap, rawPtrs, bytePtrs,
2133 numWriteImages, mDebug);
2134 }
2135 // Allocate MultiUAV pointer ID's for the raw/conflict pointers.
2136 static void
2137 allocateMultiUAVPointers(
2138 MachineFunction &MF,
2139 const AMDILTargetMachine *ATM,
2140 PtrIMap &PtrToInstMap,
2141 RawSet &rawPtrs,
2142 ConflictSet &conflictPtrs,
2143 CacheableSet &cacheablePtrs,
2144 uint32_t numWriteImages,
2145 bool mDebug)
2146 {
2147 if (PtrToInstMap.empty()) {
2148 return;
2149 }
2150 AMDILMachineFunctionInfo *mMFI = MF.getInfo<AMDILMachineFunctionInfo>();
2151 uint32_t curUAV = numWriteImages;
2152 bool increment = true;
2153 const AMDILSubtarget *STM
2154 = ATM->getSubtargetImpl();
2155 // If the RAW_UAV_ID is a value that is larger than the max number of write
2156 // images, then we use that UAV ID.
2157 if (numWriteImages >= OPENCL_MAX_WRITE_IMAGES) {
2158 curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
2159 increment = false;
2160 }
2161 AMDILKernelManager *KM = STM->getKernelManager();
2162 PtrSet::iterator siBegin, siEnd;
2163 std::vector<MachineInstr*>::iterator miBegin, miEnd;
2164 // First lets handle the raw pointers.
2165 for (siBegin = rawPtrs.begin(), siEnd = rawPtrs.end();
2166 siBegin != siEnd; ++siBegin) {
2167 assert((*siBegin)->getType()->isPointerTy() && "We must be a pointer type "
2168 "to be processed at this point!");
2169 const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType());
2170 if (conflictPtrs.count(*siBegin) || !PT) {
2171 continue;
2172 }
2173 // We only want to process global address space pointers
2174 if (PT->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS) {
2175 if ((PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS
2176 && STM->device()->usesSoftware(AMDILDeviceInfo::LocalMem))
2177 || (PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS
2178 && STM->device()->usesSoftware(AMDILDeviceInfo::ConstantMem))
2179 || (PT->getAddressSpace() == AMDILAS::REGION_ADDRESS
2180 && STM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))) {
2181 // If we are using software emulated hardware features, then
2182 // we need to specify that they use the raw uav and not
2183 // zero-copy uav. The easiest way to do this is to assume they
2184 // conflict with another pointer. Any pointer that conflicts
2185 // with another pointer is assigned to the raw uav or the
2186 // arena uav if no raw uav exists.
2187 const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType());
2188 if (PT) {
2189 conflictPtrs.insert(*siBegin);
2190 }
2191 }
2192 if (PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
2193 if (STM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)) {
2194 const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType());
2195 if (PT) {
2196 conflictPtrs.insert(*siBegin);
2197 }
2198 } else {
2199 if (mDebug) {
2200 dbgs() << "Scratch Pointer '" << (*siBegin)->getName()
2201 << "' being assigned uav "<<
2202 STM->device()->getResourceID(AMDILDevice::SCRATCH_ID) << "\n";
2203 }
2204 for (miBegin = PtrToInstMap[*siBegin].begin(),
2205 miEnd = PtrToInstMap[*siBegin].end();
2206 miBegin != miEnd; ++miBegin) {
2207 AMDILAS::InstrResEnc curRes;
2208 getAsmPrinterFlags(*miBegin, curRes);
2209 curRes.bits.ResourceID = STM->device()
2210 ->getResourceID(AMDILDevice::SCRATCH_ID);
2211 if (mDebug) {
2212 dbgs() << "Updated instruction to bitmask ";
2213 dbgs().write_hex(curRes.u16all);
2214 dbgs() << " with ResID " << curRes.bits.ResourceID;
2215 dbgs() << ". Inst: ";
2216 (*miBegin)->dump();
2217 }
2218 setAsmPrinterFlags((*miBegin), curRes);
2219 KM->setUAVID(*siBegin, curRes.bits.ResourceID);
2220 mMFI->uav_insert(curRes.bits.ResourceID);
2221 }
2222 }
2223 }
2224 continue;
2225 }
2226 // If more than just UAV 11 is cacheable, then we can remove
2227 // this check.
2228 if (cacheablePtrs.count(*siBegin)) {
2229 if (mDebug) {
2230 dbgs() << "Raw Pointer '" << (*siBegin)->getName()
2231 << "' is cacheable, not allocating a multi-uav for it!\n";
2232 }
2233 continue;
2234 }
2235 if (mDebug) {
2236 dbgs() << "Raw Pointer '" << (*siBegin)->getName()
2237 << "' being assigned uav " << curUAV << "\n";
2238 }
2239 if (PtrToInstMap[*siBegin].empty()) {
2240 KM->setUAVID(*siBegin, curUAV);
2241 mMFI->uav_insert(curUAV);
2242 }
2243 // For all instructions here, we are going to set the new UAV to the curUAV
2244 // number and not the value that it currently is set to.
2245 for (miBegin = PtrToInstMap[*siBegin].begin(),
2246 miEnd = PtrToInstMap[*siBegin].end();
2247 miBegin != miEnd; ++miBegin) {
2248 AMDILAS::InstrResEnc curRes;
2249 getAsmPrinterFlags(*miBegin, curRes);
2250 curRes.bits.ResourceID = curUAV;
2251 if (isAtomicInst(ATM->getInstrInfo(), *miBegin)) {
2252 (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
2253 .setImm(curRes.bits.ResourceID);
2254 if (curRes.bits.ResourceID
2255 == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
2256 assert(0 && "Found an atomic instruction that has "
2257 "an arena uav id!");
2258 }
2259 }
2260 if (curUAV == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
2261 if (mDebug) {
2262 dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
2263 (*miBegin)->print(dbgs());
2264 }
2265 curRes.bits.ByteStore = 1;
2266 curRes.bits.CacheableRead = 0;
2267 }
2268 if (mDebug) {
2269 dbgs() << "Updated instruction to bitmask ";
2270 dbgs().write_hex(curRes.u16all);
2271 dbgs() << " with ResID " << curRes.bits.ResourceID;
2272 dbgs() << ". Inst: ";
2273 (*miBegin)->dump();
2274 }
2275 setAsmPrinterFlags(*miBegin, curRes);
2276 KM->setUAVID(*siBegin, curRes.bits.ResourceID);
2277 mMFI->uav_insert(curRes.bits.ResourceID);
2278 }
2279 // If we make it here, we can increment the uav counter if we are less
2280 // than the max write image count. Otherwise we set it to the default
2281 // UAV and leave it.
2282 if (increment && curUAV < (OPENCL_MAX_WRITE_IMAGES - 1)) {
2283 ++curUAV;
2284 } else {
2285 curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
2286 increment = false;
2287 }
2288 }
2289 if (numWriteImages == 8) {
2290 curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
2291 }
2292 // Now lets handle the conflict pointers
2293 for (siBegin = conflictPtrs.begin(), siEnd = conflictPtrs.end();
2294 siBegin != siEnd; ++siBegin) {
2295 assert((*siBegin)->getType()->isPointerTy() && "We must be a pointer type "
2296 "to be processed at this point!");
2297 const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType());
2298 // We only want to process global address space pointers
2299 if (!PT || PT->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS) {
2300 continue;
2301 }
2302 if (mDebug) {
2303 dbgs() << "Conflict Pointer '" << (*siBegin)->getName()
2304 << "' being assigned uav " << curUAV << "\n";
2305 }
2306 if (PtrToInstMap[*siBegin].empty()) {
2307 KM->setUAVID(*siBegin, curUAV);
2308 mMFI->uav_insert(curUAV);
2309 }
2310 for (miBegin = PtrToInstMap[*siBegin].begin(),
2311 miEnd = PtrToInstMap[*siBegin].end();
2312 miBegin != miEnd; ++miBegin) {
2313 AMDILAS::InstrResEnc curRes;
2314 getAsmPrinterFlags(*miBegin, curRes);
2315 curRes.bits.ResourceID = curUAV;
2316 if (isAtomicInst(ATM->getInstrInfo(), *miBegin)) {
2317 (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
2318 .setImm(curRes.bits.ResourceID);
2319 if (curRes.bits.ResourceID
2320 == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
2321 assert(0 && "Found an atomic instruction that has "
2322 "an arena uav id!");
2323 }
2324 }
2325 if (curUAV == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
2326 if (mDebug) {
2327 dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
2328 (*miBegin)->print(dbgs());
2329 }
2330 curRes.bits.ByteStore = 1;
2331 }
2332 if (mDebug) {
2333 dbgs() << "Updated instruction to bitmask ";
2334 dbgs().write_hex(curRes.u16all);
2335 dbgs() << " with ResID " << curRes.bits.ResourceID;
2336 dbgs() << ". Inst: ";
2337 (*miBegin)->dump();
2338 }
2339 setAsmPrinterFlags(*miBegin, curRes);
2340 KM->setUAVID(*siBegin, curRes.bits.ResourceID);
2341 mMFI->uav_insert(curRes.bits.ResourceID);
2342 }
2343 }
2344 }
2345 // The first thing we should do is to allocate the default
2346 // ID for each load/store/atomic instruction so that
2347 // it is correctly allocated. Everything else after this
2348 // is just an optimization to more efficiently allocate
2349 // resource ID's.
2350 void
2351 allocateDefaultIDs(
2352 const AMDILTargetMachine *ATM,
2353 MachineFunction &MF,
2354 bool mDebug)
2355 {
2356 for (MachineFunction::iterator mfBegin = MF.begin(),
2357 mfEnd = MF.end(); mfBegin != mfEnd; ++mfBegin) {
2358 MachineBasicBlock *MB = mfBegin;
2359 for (MachineBasicBlock::iterator mbb = MB->begin(), mbe = MB->end();
2360 mbb != mbe; ++mbb) {
2361 MachineInstr *MI = mbb;
2362 if (isLoadInst(ATM->getInstrInfo(), MI)
2363 || isStoreInst(ATM->getInstrInfo(), MI)
2364 || isAtomicInst(ATM->getInstrInfo(), MI)) {
2365 AMDILAS::InstrResEnc curRes;
2366 getAsmPrinterFlags(MI, curRes);
2367 allocateDefaultID(ATM, curRes, MI, mDebug);
2368 }
2369 }
2370 }
2371 }
2372
2373 bool
2374 AMDILEGPointerManager::runOnMachineFunction(MachineFunction &MF)
2375 {
2376 bool changed = false;
2377 const AMDILTargetMachine *ATM
2378 = reinterpret_cast<const AMDILTargetMachine*>(&TM);
2379 AMDILMachineFunctionInfo *mMFI =
2380 MF.getInfo<AMDILMachineFunctionInfo>();
2381 if (mDebug) {
2382 dbgs() << getPassName() << "\n";
2383 dbgs() << MF.getFunction()->getName() << "\n";
2384 MF.dump();
2385 }
2386 // Start out by allocating the default ID's to all instructions in the
2387 // function.
2388 allocateDefaultIDs(ATM, MF, mDebug);
2389
2390 // A set of all pointers are tracked in this map and
2391 // if multiple pointers are detected, they go to the same
2392 // set.
2393 PtrIMap PtrToInstMap;
2394
2395 // All of the instructions that are loads, stores or pointer
2396 // conflicts are tracked in the map with a set of all values
2397 // that reference the instruction stored.
2398 InstPMap InstToPtrMap;
2399
2400 // In order to track across stack entries, we need a map between a
2401 // frame index and a pointer. That way when we load from a frame
2402 // index, we know what pointer was stored to the frame index.
2403 FIPMap FIToPtrMap;
2404
2405 // Set of all the pointers that are byte pointers. Byte pointers
2406 // are required to have their instructions go to the arena.
2407 ByteSet bytePtrs;
2408
2409 // Set of all the pointers that are cacheable. All of the cache pointers
2410 // are required to go to a raw uav and cannot go to arena.
2411 CacheableSet cacheablePtrs;
2412
2413 // Set of all the pointers that go into a raw buffer. A pointer can
2414 // exist in either rawPtrs or bytePtrs but not both.
2415 RawSet rawPtrs;
2416
2417 // Set of all the pointers that end up having a conflicting instruction
2418 // somewhere in the pointer path.
2419 ConflictSet conflictPtrs;
2420
2421 // Set of all pointers that are images
2422 ImageSet images;
2423
2424 // Set of all pointers that are counters
2425 AppendSet counters;
2426
2427 // Set of all pointers that load from a constant pool
2428 CPoolSet cpool;
2429
2430 // Mapping from BB to infomation about the cacheability of the
2431 // global load instructions in it.
2432 MBBCacheableMap bbCacheable;
2433
2434 // A set of load instructions that are cacheable
2435 // even if all the load instructions of the ptr are not.
2436 CacheableInstrSet cacheableSet;
2437
2438 // The lookup table holds all of the registers that
2439 // are used as we assign pointers values to them.
2440 // If two pointers collide on the lookup table, then
2441 // we assign them to the same UAV. If one of the
2442 // pointers is byte addressable, then we assign
2443 // them to arena, otherwise we assign them to raw.
2444 RVPVec lookupTable;
2445
2446 // First we need to go through all of the arguments and assign the
2447 // live in registers to the lookup table and the pointer mapping.
2448 uint32_t numWriteImages = parseArguments(MF, lookupTable, ATM,
2449 cacheablePtrs, images, counters, mDebug);
2450
2451 // Lets do some error checking on the results of the parsing.
2452 if (counters.size() > OPENCL_MAX_NUM_ATOMIC_COUNTERS) {
2453 mMFI->addErrorMsg(
2454 amd::CompilerErrorMessage[INSUFFICIENT_COUNTER_RESOURCES]);
2455 }
2456 if (numWriteImages > OPENCL_MAX_WRITE_IMAGES
2457 || (images.size() - numWriteImages > OPENCL_MAX_READ_IMAGES)) {
2458 mMFI->addErrorMsg(
2459 amd::CompilerErrorMessage[INSUFFICIENT_IMAGE_RESOURCES]);
2460 }
2461
2462 // Now lets parse all of the instructions and update our
2463 // lookup tables.
2464 parseFunction(this, ATM, MF, InstToPtrMap, PtrToInstMap,
2465 FIToPtrMap, lookupTable, bytePtrs, conflictPtrs, cpool,
2466 bbCacheable, mDebug);
2467
2468 // We need to go over our pointer map and find all the conflicting
2469 // pointers that have byte stores and put them in the bytePtr map.
2470 // All conflicting pointers that don't have byte stores go into
2471 // the rawPtr map.
2472 detectConflictingPointers(ATM, InstToPtrMap, bytePtrs, rawPtrs,
2473 conflictPtrs, mDebug);
2474
2475 // The next step is to detect whether the pointer should be added to
2476 // the fully cacheable set or not. A pointer is marked as cacheable if
2477 // no store instruction exists.
2478 detectFullyCacheablePointers(ATM, PtrToInstMap, rawPtrs,
2479 cacheablePtrs, conflictPtrs, mDebug);
2480
2481 // Disable partially cacheable for now when multiUAV is on.
2482 // SC versions before SC139 have a bug that generates incorrect
2483 // addressing for some cached accesses.
2484 if (!ATM->getSubtargetImpl()
2485 ->device()->isSupported(AMDILDeviceInfo::MultiUAV) &&
2486 ATM->getSubtargetImpl()->calVersion() >= CAL_VERSION_SC_139) {
2487 // Now we take the set of loads that have no reachable stores and
2488 // create a list of additional instructions (those that aren't already
2489 // in a cacheablePtr set) that are safe to mark as cacheable.
2490 detectCacheableInstrs(bbCacheable, InstToPtrMap, cacheablePtrs,
2491 bytePtrs, cacheableSet, mDebug);
2492
2493 // Annotate the additional instructions computed above as cacheable.
2494 // Note that this should not touch any instructions annotated in
2495 // annotatePtrPath.
2496 annotateCacheableInstrs(TM, cacheableSet, mDebug);
2497 }
2498
2499 // Now that we have detected everything we need to detect, lets go through an
2500 // annotate the instructions along the pointer path for each of the
2501 // various pointer types.
2502 annotatePtrPath(TM, PtrToInstMap, rawPtrs, bytePtrs,
2503 cacheablePtrs, numWriteImages, mDebug);
2504
2505 // Annotate the atomic counter path if any exists.
2506 annotateAppendPtrs(TM, PtrToInstMap, counters, mDebug);
2507
2508 // If we support MultiUAV, then we need to determine how
2509 // many write images exist so that way we know how many UAV are
2510 // left to allocate to buffers.
2511 if (ATM->getSubtargetImpl()
2512 ->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
2513 // We now have (OPENCL_MAX_WRITE_IMAGES - numPtrs) buffers open for
2514 // multi-uav allocation.
2515 allocateMultiUAVPointers(MF, ATM, PtrToInstMap, rawPtrs,
2516 conflictPtrs, cacheablePtrs, numWriteImages, mDebug);
2517 }
2518
2519 // The last step is to detect if we have any alias constant pool operations.
2520 // This is not likely, but does happen on occasion with double precision
2521 // operations.
2522 detectAliasedCPoolOps(TM, cpool, mDebug);
2523 if (mDebug) {
2524 dumpPointers(bytePtrs, "Byte Store Ptrs");
2525 dumpPointers(rawPtrs, "Raw Ptrs");
2526 dumpPointers(cacheablePtrs, "Cache Load Ptrs");
2527 dumpPointers(counters, "Atomic Counters");
2528 dumpPointers(images, "Images");
2529 }
2530 return changed;
2531 }
2532
2533 // The default pointer manager just assigns the default ID's to
2534 // each load/store instruction and does nothing else. This is
2535 // the pointer manager for the 7XX series of cards.
2536 bool
2537 AMDILPointerManager::runOnMachineFunction(MachineFunction &MF)
2538 {
2539 bool changed = false;
2540 const AMDILTargetMachine *ATM
2541 = reinterpret_cast<const AMDILTargetMachine*>(&TM);
2542 if (mDebug) {
2543 dbgs() << getPassName() << "\n";
2544 dbgs() << MF.getFunction()->getName() << "\n";
2545 MF.dump();
2546 }
2547 // On the 7XX we don't have to do any special processing, so we
2548 // can just allocate the default ID and be done with it.
2549 allocateDefaultIDs(ATM, MF, mDebug);
2550 return changed;
2551 }