1 //===-------- AMDILPointerManager.cpp - Manage Pointers for HW-------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
9 // Implementation for the AMDILPointerManager classes. See header file for
10 // more documentation of class.
11 // TODO: This fails when function calls are enabled, must always be inlined
12 //===----------------------------------------------------------------------===//
13 #include "AMDILPointerManager.h"
14 #include "AMDILCompilerErrors.h"
15 #include "AMDILDeviceInfo.h"
16 #include "AMDILGlobalManager.h"
17 #include "AMDILKernelManager.h"
18 #include "AMDILMachineFunctionInfo.h"
19 #include "AMDILTargetMachine.h"
20 #include "AMDILUtilityFunctions.h"
21 #include "llvm/ADT/PostOrderIterator.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/ADT/ValueMap.h"
24 #include "llvm/CodeGen/MachineDominators.h"
25 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/Passes.h"
29 #include "llvm/DerivedTypes.h"
30 #include "llvm/Function.h"
31 #include "llvm/GlobalValue.h"
32 #include "llvm/Instructions.h"
33 #include "llvm/Metadata.h"
34 #include "llvm/Module.h"
35 #include "llvm/Support/FormattedStream.h"
39 char AMDILPointerManager::ID
= 0;
42 createAMDILPointerManager(TargetMachine
&tm AMDIL_OPT_LEVEL_DECL
)
44 return tm
.getSubtarget
<AMDILSubtarget
>()
45 .device()->getPointerManager(tm AMDIL_OPT_LEVEL_VAR
);
49 AMDILPointerManager::AMDILPointerManager(
51 AMDIL_OPT_LEVEL_DECL
) :
52 MachineFunctionPass(ID
),
56 initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
59 AMDILPointerManager::~AMDILPointerManager()
64 AMDILPointerManager::getPassName() const
66 return "AMD IL Default Pointer Manager Pass";
70 AMDILPointerManager::getAnalysisUsage(AnalysisUsage
&AU
) const
73 AU
.addRequiredID(MachineDominatorsID
);
74 MachineFunctionPass::getAnalysisUsage(AU
);
77 AMDILEGPointerManager::AMDILEGPointerManager(
79 AMDIL_OPT_LEVEL_DECL
) :
80 AMDILPointerManager(tm AMDIL_OPT_LEVEL_VAR
),
85 AMDILEGPointerManager::~AMDILEGPointerManager()
89 findSamplerName(MachineInstr
* MI
,
92 const TargetMachine
*TM
)
94 std::string sampler
= "unknown";
95 assert(MI
->getNumOperands() == 5 && "Only an "
96 "image read instruction with 5 arguments can "
98 assert(MI
->getOperand(3).isReg() &&
99 "Argument 3 must be a register to call this function");
100 unsigned reg
= MI
->getOperand(3).getReg();
101 // If this register points to an argument, then
102 // we can return the argument name.
103 if (lookupTable
[reg
].second
&& dyn_cast
<Argument
>(lookupTable
[reg
].second
)) {
104 return lookupTable
[reg
].second
->getName();
106 // Otherwise the sampler is coming from memory somewhere.
107 // If the sampler memory location can be tracked, then
108 // we ascertain the sampler name that way.
109 // The most common case is when optimizations are disabled
110 // or mem2reg is not enabled, then the sampler when it is
111 // an argument is passed through the frame index.
113 // In the optimized case, the instruction that defined
114 // register from operand #3 is a private load.
115 MachineRegisterInfo
®Info
= MI
->getParent()->getParent()->getRegInfo();
116 assert(!regInfo
.def_empty(reg
)
117 && "We don't have any defs of this register, but we aren't an argument!");
118 MachineOperand
*defOp
= regInfo
.getRegUseDefListHead(reg
);
119 MachineInstr
*defMI
= defOp
->getParent();
120 if (isPrivateInst(TM
->getInstrInfo(), defMI
) && isLoadInst(TM
->getInstrInfo(), defMI
)) {
121 if (defMI
->getOperand(1).isFI()) {
122 RegValPair
&fiRVP
= FIToPtrMap
[reg
];
123 if (fiRVP
.second
&& dyn_cast
<Argument
>(fiRVP
.second
)) {
124 return fiRVP
.second
->getName();
126 // FIXME: Fix the case where the value stored is not a kernel argument.
127 assert(!"Found a private load of a sampler where the value isn't an argument!");
130 // FIXME: Fix the case where someone dynamically loads a sampler value
131 // from private memory. This is problematic because we need to know the
132 // sampler value at compile time and if it is dynamically loaded, we won't
133 // know what sampler value to use.
134 assert(!"Found a private load of a sampler that isn't from a frame index!");
137 // FIXME: Handle the case where the def is neither a private instruction
138 // and not a load instruction. This shouldn't occur, but putting an assertion
139 // just to make sure that it doesn't.
140 assert(!"Found a case which we don't handle.");
146 AMDILEGPointerManager::getPassName() const
148 return "AMD IL EG Pointer Manager Pass";
151 // Helper function to determine if the current pointer is from the
152 // local, region or private address spaces.
154 isLRPInst(MachineInstr
*MI
,
155 const AMDILTargetMachine
*ATM
)
157 const AMDILSubtarget
*STM
158 = ATM
->getSubtargetImpl();
162 if ((isRegionInst(ATM
->getInstrInfo(), MI
)
163 && STM
->device()->usesHardware(AMDILDeviceInfo::RegionMem
))
164 || (isLocalInst(ATM
->getInstrInfo(), MI
)
165 && STM
->device()->usesHardware(AMDILDeviceInfo::LocalMem
))
166 || (isPrivateInst(ATM
->getInstrInfo(), MI
)
167 && STM
->device()->usesHardware(AMDILDeviceInfo::PrivateMem
))) {
173 /// Helper function to determine if the I/O instruction uses
174 /// global device memory or not.
177 const AMDILTargetMachine
*ATM
,
179 const AMDILSubtarget
*STM
180 = ATM
->getSubtargetImpl();
181 switch(MI
->getOpcode()) {
182 ExpandCaseToAllTypes(AMDIL::GLOBALSTORE
);
183 ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE
);
184 ExpandCaseToAllTypes(AMDIL::GLOBALLOAD
);
185 ExpandCaseToAllTypes(AMDIL::GLOBALSEXTLOAD
);
186 ExpandCaseToAllTypes(AMDIL::GLOBALZEXTLOAD
);
187 ExpandCaseToAllTypes(AMDIL::GLOBALAEXTLOAD
);
189 ExpandCaseToAllTypes(AMDIL::REGIONLOAD
);
190 ExpandCaseToAllTypes(AMDIL::REGIONSEXTLOAD
);
191 ExpandCaseToAllTypes(AMDIL::REGIONZEXTLOAD
);
192 ExpandCaseToAllTypes(AMDIL::REGIONAEXTLOAD
);
193 ExpandCaseToAllTypes(AMDIL::REGIONSTORE
);
194 ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE
);
195 return !STM
->device()->usesHardware(AMDILDeviceInfo::RegionMem
);
196 ExpandCaseToAllTypes(AMDIL::LOCALLOAD
);
197 ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD
);
198 ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD
);
199 ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD
);
200 ExpandCaseToAllTypes(AMDIL::LOCALSTORE
);
201 ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE
);
202 return !STM
->device()->usesHardware(AMDILDeviceInfo::LocalMem
);
203 ExpandCaseToAllTypes(AMDIL::CPOOLLOAD
);
204 ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD
);
205 ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD
);
206 ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD
);
207 ExpandCaseToAllTypes(AMDIL::CONSTANTLOAD
);
208 ExpandCaseToAllTypes(AMDIL::CONSTANTSEXTLOAD
);
209 ExpandCaseToAllTypes(AMDIL::CONSTANTAEXTLOAD
);
210 ExpandCaseToAllTypes(AMDIL::CONSTANTZEXTLOAD
);
211 return !STM
->device()->usesHardware(AMDILDeviceInfo::ConstantMem
);
212 ExpandCaseToAllTypes(AMDIL::PRIVATELOAD
);
213 ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD
);
214 ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD
);
215 ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD
);
216 ExpandCaseToAllTypes(AMDIL::PRIVATESTORE
);
217 ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE
);
218 return !STM
->device()->usesHardware(AMDILDeviceInfo::PrivateMem
);
225 // Helper function that allocates the default resource ID for the
226 // respective I/O types.
229 const AMDILTargetMachine
*ATM
,
230 AMDILAS::InstrResEnc
&curRes
,
234 AMDILMachineFunctionInfo
*mMFI
=
235 MI
->getParent()->getParent()->getInfo
<AMDILMachineFunctionInfo
>();
236 const AMDILSubtarget
*STM
237 = ATM
->getSubtargetImpl();
239 dbgs() << "Assigning instruction to default ID. Inst:";
242 // If we use global memory, lets set the Operand to
244 if (usesGlobal(ATM
, MI
)) {
245 curRes
.bits
.ResourceID
=
246 STM
->device()->getResourceID(AMDILDevice::GLOBAL_ID
);
247 if (isAtomicInst(ATM
->getInstrInfo(), MI
)) {
248 MI
->getOperand(MI
->getNumOperands()-1)
249 .setImm(curRes
.bits
.ResourceID
);
251 AMDILKernelManager
*KM
= STM
->getKernelManager();
252 if (curRes
.bits
.ResourceID
== 8
253 && !STM
->device()->isSupported(AMDILDeviceInfo::ArenaSegment
)) {
254 KM
->setUAVID(NULL
, curRes
.bits
.ResourceID
);
255 mMFI
->uav_insert(curRes
.bits
.ResourceID
);
257 } else if (isPrivateInst(ATM
->getInstrInfo(), MI
)) {
258 curRes
.bits
.ResourceID
=
259 STM
->device()->getResourceID(AMDILDevice::SCRATCH_ID
);
260 } else if (isLocalInst(ATM
->getInstrInfo(), MI
) || isLocalAtomic(ATM
->getInstrInfo(), MI
)) {
261 curRes
.bits
.ResourceID
=
262 STM
->device()->getResourceID(AMDILDevice::LDS_ID
);
263 AMDILMachineFunctionInfo
*mMFI
=
264 MI
->getParent()->getParent()->getInfo
<AMDILMachineFunctionInfo
>();
265 mMFI
->setUsesLocal();
266 if (isAtomicInst(ATM
->getInstrInfo(), MI
)) {
267 assert(curRes
.bits
.ResourceID
&& "Atomic resource ID "
269 MI
->getOperand(MI
->getNumOperands()-1)
270 .setImm(curRes
.bits
.ResourceID
);
272 } else if (isRegionInst(ATM
->getInstrInfo(), MI
) || isRegionAtomic(ATM
->getInstrInfo(), MI
)) {
273 curRes
.bits
.ResourceID
=
274 STM
->device()->getResourceID(AMDILDevice::GDS_ID
);
275 AMDILMachineFunctionInfo
*mMFI
=
276 MI
->getParent()->getParent()->getInfo
<AMDILMachineFunctionInfo
>();
277 mMFI
->setUsesRegion();
278 if (isAtomicInst(ATM
->getInstrInfo(), MI
)) {
279 assert(curRes
.bits
.ResourceID
&& "Atomic resource ID "
281 (MI
)->getOperand((MI
)->getNumOperands()-1)
282 .setImm(curRes
.bits
.ResourceID
);
284 } else if (isConstantInst(ATM
->getInstrInfo(), MI
)) {
285 // If we are unknown constant instruction and the base pointer is known.
286 // Set the resource ID accordingly, otherwise use the default constant ID.
287 // FIXME: this should not require the base pointer to know what constant
289 AMDILGlobalManager
*GM
= STM
->getGlobalManager();
290 MachineFunction
*MF
= MI
->getParent()->getParent();
291 if (GM
->isKernel(MF
->getFunction()->getName())) {
292 const kernel
&krnl
= GM
->getKernel(MF
->getFunction()->getName());
293 const Value
*V
= getBasePointerValue(MI
);
294 if (V
&& !dyn_cast
<AllocaInst
>(V
)) {
295 curRes
.bits
.ResourceID
= GM
->getConstPtrCB(krnl
, V
->getName());
296 curRes
.bits
.HardwareInst
= 1;
297 } else if (V
&& dyn_cast
<AllocaInst
>(V
)) {
298 // FIXME: Need a better way to fix this. Requires a rewrite of how
299 // we lower global addresses to various address spaces.
300 // So for now, lets assume that there is only a single
301 // constant buffer that can be accessed from a load instruction
302 // that is derived from an alloca instruction.
303 curRes
.bits
.ResourceID
= 2;
304 curRes
.bits
.HardwareInst
= 1;
306 if (isStoreInst(ATM
->getInstrInfo(), MI
)) {
308 dbgs() << __LINE__
<< ": Setting byte store bit on instruction: ";
311 curRes
.bits
.ByteStore
= 1;
313 curRes
.bits
.ResourceID
= STM
->device()->getResourceID(AMDILDevice::CONSTANT_ID
);
316 if (isStoreInst(ATM
->getInstrInfo(), MI
)) {
318 dbgs() << __LINE__
<< ": Setting byte store bit on instruction: ";
321 curRes
.bits
.ByteStore
= 1;
323 curRes
.bits
.ResourceID
= STM
->device()->getResourceID(AMDILDevice::GLOBAL_ID
);
324 AMDILKernelManager
*KM
= STM
->getKernelManager();
325 KM
->setUAVID(NULL
, curRes
.bits
.ResourceID
);
326 mMFI
->uav_insert(curRes
.bits
.ResourceID
);
328 } else if (isAppendInst(ATM
->getInstrInfo(), MI
)) {
329 unsigned opcode
= MI
->getOpcode();
330 if (opcode
== AMDIL::APPEND_ALLOC
331 || opcode
== AMDIL::APPEND_ALLOC_NORET
) {
332 curRes
.bits
.ResourceID
= 1;
334 curRes
.bits
.ResourceID
= 2;
337 setAsmPrinterFlags(MI
, curRes
);
340 // Function that parses the arguments and updates the lookupTable with the
341 // pointer -> register mapping. This function also checks for cacheable
342 // pointers and updates the CacheableSet with the arguments that
343 // can be cached based on the readonlypointer annotation. The final
344 // purpose of this function is to update the imageSet and counterSet
345 // with all pointers that are either images or atomic counters.
347 parseArguments(MachineFunction
&MF
,
349 const AMDILTargetMachine
*ATM
,
350 CacheableSet
&cacheablePtrs
,
352 AppendSet
&counterSet
,
355 const AMDILSubtarget
*STM
356 = ATM
->getSubtargetImpl();
357 uint32_t writeOnlyImages
= 0;
358 uint32_t readOnlyImages
= 0;
359 std::string cachedKernelName
= "llvm.readonlypointer.annotations.";
360 cachedKernelName
.append(MF
.getFunction()->getName());
361 GlobalVariable
*GV
= MF
.getFunction()->getParent()
362 ->getGlobalVariable(cachedKernelName
);
364 unsigned regNum
= AMDIL::R1
;
365 AMDILMachineFunctionInfo
*mMFI
= MF
.getInfo
<AMDILMachineFunctionInfo
>();
366 for (Function::const_arg_iterator I
= MF
.getFunction()->arg_begin(),
367 E
= MF
.getFunction()->arg_end(); I
!= E
; ++I
) {
368 const Argument
*curArg
= I
;
370 dbgs() << "Argument: ";
373 Type
*curType
= curArg
->getType();
374 // We are either a scalar or vector type that
375 // is passed by value that is not a opaque/struct
376 // type. We just need to increment regNum
377 // the correct number of times to match the number
378 // of registers that it takes up.
379 if (curType
->isFPOrFPVectorTy() ||
380 curType
->isIntOrIntVectorTy()) {
381 // We are scalar, so increment once and
383 if (!curType
->isVectorTy()) {
384 lookupTable
[regNum
] = std::make_pair
<unsigned, const Value
*>(~0U, curArg
);
389 VectorType
*VT
= dyn_cast
<VectorType
>(curType
);
390 // We are a vector type. If we are 64bit type, then
391 // we increment length / 2 times, otherwise we
392 // increment length / 4 times. The only corner case
393 // is with vec3 where the vector gets scalarized and
394 // therefor we need a loop count of 3.
395 size_t loopCount
= VT
->getNumElements();
396 if (loopCount
!= 3) {
397 if (VT
->getScalarSizeInBits() == 64) {
398 loopCount
= loopCount
>> 1;
400 loopCount
= (loopCount
+ 2) >> 2;
406 while (loopCount
--) {
407 lookupTable
[regNum
] = std::make_pair
<unsigned, const Value
*>(~0U, curArg
);
410 } else if (curType
->isPointerTy()) {
411 Type
*CT
= dyn_cast
<PointerType
>(curType
)->getElementType();
412 const StructType
*ST
= dyn_cast
<StructType
>(CT
);
413 if (ST
&& ST
->isOpaque()) {
414 StringRef name
= ST
->getName();
415 bool i1d_type
= name
== "struct._image1d_t";
416 bool i1da_type
= name
== "struct._image1d_array_t";
417 bool i1db_type
= name
== "struct._image1d_buffer_t";
418 bool i2d_type
= name
== "struct._image2d_t";
419 bool i2da_type
= name
== "struct._image2d_array_t";
420 bool i3d_type
= name
== "struct._image3d_t";
421 bool c32_type
= name
== "struct._counter32_t";
422 bool c64_type
= name
== "struct._counter64_t";
423 if (i2d_type
|| i3d_type
|| i2da_type
||
424 i1d_type
|| i1db_type
|| i1da_type
) {
426 uint32_t imageNum
= readOnlyImages
+ writeOnlyImages
;
427 if (STM
->getGlobalManager()
428 ->isReadOnlyImage(MF
.getFunction()->getName(), imageNum
)) {
430 dbgs() << "Pointer: '" << curArg
->getName()
431 << "' is a read only image # " << readOnlyImages
<< "!\n";
433 // We store the cbNum along with the image number so that we can
434 // correctly encode the 'info' intrinsics.
435 lookupTable
[regNum
] = std::make_pair
<unsigned, const Value
*>
436 ((cbNum
<< 16 | readOnlyImages
++), curArg
);
437 } else if (STM
->getGlobalManager()
438 ->isWriteOnlyImage(MF
.getFunction()->getName(), imageNum
)) {
440 dbgs() << "Pointer: '" << curArg
->getName()
441 << "' is a write only image # " << writeOnlyImages
<< "!\n";
443 // We store the cbNum along with the image number so that we can
444 // correctly encode the 'info' intrinsics.
445 lookupTable
[regNum
] = std::make_pair
<unsigned, const Value
*>
446 ((cbNum
<< 16 | writeOnlyImages
++), curArg
);
448 assert(!"Read/Write images are not supported!");
453 } else if (c32_type
|| c64_type
) {
455 dbgs() << "Pointer: '" << curArg
->getName()
456 << "' is a " << (c32_type
? "32" : "64")
457 << " bit atomic counter type!\n";
459 counterSet
.push_back(I
);
463 if (STM
->device()->isSupported(AMDILDeviceInfo::CachedMem
)
464 && GV
&& GV
->hasInitializer()) {
465 const ConstantArray
*nameArray
466 = dyn_cast_or_null
<ConstantArray
>(GV
->getInitializer());
468 for (unsigned x
= 0, y
= nameArray
->getNumOperands(); x
< y
; ++x
) {
469 const GlobalVariable
*gV
= dyn_cast_or_null
<GlobalVariable
>(
470 nameArray
->getOperand(x
)->getOperand(0));
471 const ConstantDataArray
*argName
=
472 dyn_cast_or_null
<ConstantDataArray
>(gV
->getInitializer());
476 std::string argStr
= argName
->getAsString();
477 std::string curStr
= curArg
->getName();
478 if (!strcmp(argStr
.data(), curStr
.data())) {
480 dbgs() << "Pointer: '" << curArg
->getName()
481 << "' is cacheable!\n";
483 cacheablePtrs
.insert(curArg
);
488 uint32_t as
= dyn_cast
<PointerType
>(curType
)->getAddressSpace();
489 // Handle the case where the kernel argument is a pointer
491 dbgs() << "Pointer: " << curArg
->getName() << " is assigned ";
492 if (as
== AMDILAS::GLOBAL_ADDRESS
) {
493 dbgs() << "uav " << STM
->device()
494 ->getResourceID(AMDILDevice::GLOBAL_ID
);
495 } else if (as
== AMDILAS::PRIVATE_ADDRESS
) {
496 dbgs() << "scratch " << STM
->device()
497 ->getResourceID(AMDILDevice::SCRATCH_ID
);
498 } else if (as
== AMDILAS::LOCAL_ADDRESS
) {
499 dbgs() << "lds " << STM
->device()
500 ->getResourceID(AMDILDevice::LDS_ID
);
501 } else if (as
== AMDILAS::CONSTANT_ADDRESS
) {
502 dbgs() << "cb " << STM
->device()
503 ->getResourceID(AMDILDevice::CONSTANT_ID
);
504 } else if (as
== AMDILAS::REGION_ADDRESS
) {
505 dbgs() << "gds " << STM
->device()
506 ->getResourceID(AMDILDevice::GDS_ID
);
508 assert(!"Found an address space that we don't support!");
510 dbgs() << " @ register " << regNum
<< ". Inst: ";
515 lookupTable
[regNum
] = std::make_pair
<unsigned, const Value
*>
516 (STM
->device()->getResourceID(AMDILDevice::GLOBAL_ID
), curArg
);
518 case AMDILAS::LOCAL_ADDRESS
:
519 lookupTable
[regNum
] = std::make_pair
<unsigned, const Value
*>
520 (STM
->device()->getResourceID(AMDILDevice::LDS_ID
), curArg
);
521 mMFI
->setHasLocalArg();
523 case AMDILAS::REGION_ADDRESS
:
524 lookupTable
[regNum
] = std::make_pair
<unsigned, const Value
*>
525 (STM
->device()->getResourceID(AMDILDevice::GDS_ID
), curArg
);
526 mMFI
->setHasRegionArg();
528 case AMDILAS::CONSTANT_ADDRESS
:
529 lookupTable
[regNum
] = std::make_pair
<unsigned, const Value
*>
530 (STM
->device()->getResourceID(AMDILDevice::CONSTANT_ID
), curArg
);
532 case AMDILAS::PRIVATE_ADDRESS
:
533 lookupTable
[regNum
] = std::make_pair
<unsigned, const Value
*>
534 (STM
->device()->getResourceID(AMDILDevice::SCRATCH_ID
), curArg
);
537 // In this case we need to increment it once.
541 // Is anything missing that is legal in CL?
542 assert(0 && "Current type is not supported!");
543 lookupTable
[regNum
] = std::make_pair
<unsigned, const Value
*>
544 (STM
->device()->getResourceID(AMDILDevice::GLOBAL_ID
), curArg
);
549 return writeOnlyImages
;
551 // The call stack is interesting in that even in SSA form, it assigns
552 // registers to the same value's over and over again. So we need to
553 // ignore the values that are assigned and just deal with the input
554 // and return registers.
557 const AMDILTargetMachine
*ATM
,
558 InstPMap
&InstToPtrMap
,
559 PtrIMap
&PtrToInstMap
,
561 MachineBasicBlock::iterator
&mBegin
,
562 MachineBasicBlock::iterator mEnd
,
565 SmallVector
<unsigned, 8> inputRegs
;
566 AMDILAS::InstrResEnc curRes
;
568 dbgs() << "Parsing Call Stack Start.\n";
570 MachineBasicBlock::iterator callInst
= mBegin
;
571 MachineInstr
*CallMI
= callInst
;
572 getAsmPrinterFlags(CallMI
, curRes
);
573 MachineInstr
*MI
= --mBegin
;
574 unsigned reg
= AMDIL::R1
;
575 // First we need to check the input registers.
577 // We stop if we hit the beginning of the call stack
579 if (MI
->getOpcode() == AMDIL::ADJCALLSTACKDOWN
580 || MI
->getOpcode() == AMDIL::ADJCALLSTACKUP
581 || MI
->getNumOperands() != 2
582 || !MI
->getOperand(0).isReg()) {
585 reg
= MI
->getOperand(0).getReg();
586 if (MI
->getOperand(1).isReg()) {
587 unsigned reg1
= MI
->getOperand(1).getReg();
588 inputRegs
.push_back(reg1
);
589 if (lookupTable
[reg1
].second
) {
590 curRes
.bits
.PointerPath
= 1;
593 lookupTable
.erase(reg
);
595 || mBegin
== CallMI
->getParent()->begin()) {
602 // If the next registers operand 1 is not a register or that register
603 // is not R1, then we don't have any return values.
604 if (MI
->getNumOperands() == 2
605 && MI
->getOperand(1).isReg()
606 && MI
->getOperand(1).getReg() == AMDIL::R1
) {
607 // Next we check the output register.
608 reg
= MI
->getOperand(0).getReg();
609 // Now we link the inputs to the output.
610 for (unsigned x
= 0; x
< inputRegs
.size(); ++x
) {
611 if (lookupTable
[inputRegs
[x
]].second
) {
612 curRes
.bits
.PointerPath
= 1;
613 lookupTable
[reg
] = lookupTable
[inputRegs
[x
]];
614 InstToPtrMap
[CallMI
].insert(
615 lookupTable
[reg
].second
);
619 lookupTable
.erase(MI
->getOperand(1).getReg());
621 setAsmPrinterFlags(CallMI
, curRes
);
623 dbgs() << "Parsing Call Stack End.\n";
628 // Detect if the current instruction conflicts with another instruction
629 // and add the instruction to the correct location accordingly.
633 AMDILAS::InstrResEnc
&curRes
,
635 InstPMap
&InstToPtrMap
,
641 // If the instruction does not have a point path flag
642 // associated with it, then we know that no other pointer
643 // hits this instruciton.
644 if (!curRes
.bits
.PointerPath
) {
645 if (dyn_cast
<PointerType
>(lookupTable
[reg
].second
->getType())) {
646 curRes
.bits
.PointerPath
= 1;
648 // We don't want to transfer to the register number
649 // between load/store because the load dest can be completely
650 // different pointer path and the store doesn't have a real
651 // destination register.
654 if (dyn_cast
<PointerType
>(lookupTable
[reg
].second
->getType())) {
655 dbgs() << "Pointer: " << lookupTable
[reg
].second
->getName();
656 assert(dyn_cast
<PointerType
>(lookupTable
[reg
].second
->getType())
657 && "Must be a pointer type for an instruction!");
658 switch (dyn_cast
<PointerType
>(
659 lookupTable
[reg
].second
->getType())->getAddressSpace())
661 case AMDILAS::GLOBAL_ADDRESS
: dbgs() << " UAV: "; break;
662 case AMDILAS::LOCAL_ADDRESS
: dbgs() << " LDS: "; break;
663 case AMDILAS::REGION_ADDRESS
: dbgs() << " GDS: "; break;
664 case AMDILAS::PRIVATE_ADDRESS
: dbgs() << " SCRATCH: "; break;
665 case AMDILAS::CONSTANT_ADDRESS
: dbgs() << " CB: "; break;
668 dbgs() << lookupTable
[reg
].first
<< " Reg: " << reg
669 << " assigned to reg " << dstReg
<< ". Inst: ";
673 // We don't want to do any copies if the register is not virtual
674 // as it is the result of a CALL. ParseCallInst handles the
675 // case where the input and output need to be linked up
676 // if it occurs. The easiest way to check for virtual
677 // is to check the top bit.
678 lookupTable
[dstReg
] = lookupTable
[reg
];
681 if (dyn_cast
<PointerType
>(lookupTable
[reg
].second
->getType())) {
682 // Otherwise we have a conflict between two pointers somehow.
683 curRes
.bits
.ConflictPtr
= 1;
685 dbgs() << "Pointer: " << lookupTable
[reg
].second
->getName();
686 assert(dyn_cast
<PointerType
>(lookupTable
[reg
].second
->getType())
687 && "Must be a pointer type for a conflict instruction!");
688 switch (dyn_cast
<PointerType
>(
689 lookupTable
[reg
].second
->getType())->getAddressSpace())
691 case AMDILAS::GLOBAL_ADDRESS
: dbgs() << " UAV: "; break;
692 case AMDILAS::LOCAL_ADDRESS
: dbgs() << " LDS: "; break;
693 case AMDILAS::REGION_ADDRESS
: dbgs() << " GDS: "; break;
694 case AMDILAS::PRIVATE_ADDRESS
: dbgs() << " SCRATCH: "; break;
695 case AMDILAS::CONSTANT_ADDRESS
: dbgs() << " CB: "; break;
698 dbgs() << lookupTable
[reg
].first
<< " Reg: " << reg
;
699 if (InstToPtrMap
[MI
].size() > 1) {
700 dbgs() << " conflicts with:\n ";
701 for (PtrSet::iterator psib
= InstToPtrMap
[MI
].begin(),
702 psie
= InstToPtrMap
[MI
].end(); psib
!= psie
; ++psib
) {
703 dbgs() << "\t\tPointer: " << (*psib
)->getName() << " ";
704 assert(dyn_cast
<PointerType
>((*psib
)->getType())
705 && "Must be a pointer type for a conflict instruction!");
715 // Add the conflicting values to the pointer set for the instruction
716 InstToPtrMap
[MI
].insert(lookupTable
[reg
].second
);
717 // We don't want to add the destination register if
718 // we are a load or store.
720 InstToPtrMap
[MI
].insert(lookupTable
[dstReg
].second
);
723 setAsmPrinterFlags(MI
, curRes
);
726 // In this case we want to handle a load instruction.
729 const AMDILTargetMachine
*ATM
,
730 InstPMap
&InstToPtrMap
,
731 PtrIMap
&PtrToInstMap
,
735 BlockCacheableInfo
&bci
,
739 assert(isLoadInst(ATM
->getInstrInfo(), MI
) && "Only a load instruction can be parsed by "
740 "the parseLoadInst function.");
741 AMDILAS::InstrResEnc curRes
;
742 getAsmPrinterFlags(MI
, curRes
);
743 unsigned dstReg
= MI
->getOperand(0).getReg();
745 const Value
*basePtr
= NULL
;
746 if (MI
->getOperand(1).isReg()) {
747 idx
= MI
->getOperand(1).getReg();
748 basePtr
= lookupTable
[idx
].second
;
749 // If we don't know what value the register
750 // is assigned to, then we need to special case
752 } else if (MI
->getOperand(1).isFI()) {
753 idx
= MI
->getOperand(1).getIndex();
754 lookupTable
[dstReg
] = FIToPtrMap
[idx
];
755 } else if (MI
->getOperand(1).isCPI()) {
758 // If we are a hardware local, then we don't need to track as there
759 // is only one resource ID that we need to know about, so we
760 // map it using allocateDefaultID, which maps it to the default.
761 // This is also the case for REGION_ADDRESS and PRIVATE_ADDRESS.
762 if (isLRPInst(MI
, ATM
) || !basePtr
) {
763 allocateDefaultID(ATM
, curRes
, MI
, mDebug
);
766 // We have a load instruction so we map this instruction
767 // to the pointer and insert it into the set of known
768 // load instructions.
769 InstToPtrMap
[MI
].insert(basePtr
);
770 PtrToInstMap
[basePtr
].push_back(MI
);
772 if (isGlobalInst(ATM
->getInstrInfo(), MI
)) {
773 // Add to the cacheable set for the block. If there was a store earlier
774 // in the block, this call won't actually add it to the cacheable set.
775 bci
.addPossiblyCacheableInst(ATM
, MI
);
779 dbgs() << "Assigning instruction to pointer ";
780 dbgs() << basePtr
->getName() << ". Inst: ";
783 detectConflictInst(MI
, curRes
, lookupTable
, InstToPtrMap
, true,
784 idx
, dstReg
, mDebug
);
787 // In this case we want to handle a store instruction.
790 const AMDILTargetMachine
*ATM
,
791 InstPMap
&InstToPtrMap
,
792 PtrIMap
&PtrToInstMap
,
796 BlockCacheableInfo
&bci
,
799 ConflictSet
&conflictPtrs
,
802 assert(isStoreInst(ATM
->getInstrInfo(), MI
) && "Only a store instruction can be parsed by "
803 "the parseStoreInst function.");
804 AMDILAS::InstrResEnc curRes
;
805 getAsmPrinterFlags(MI
, curRes
);
806 unsigned dstReg
= MI
->getOperand(0).getReg();
808 // If the data part of the store instruction is known to
809 // be a pointer, then we need to mark this pointer as being
810 // a byte pointer. This is the conservative case that needs
811 // to be handled correctly.
812 if (lookupTable
[dstReg
].second
&& lookupTable
[dstReg
].first
!= ~0U) {
813 curRes
.bits
.ConflictPtr
= 1;
815 dbgs() << "Found a case where the pointer is being stored!\n";
817 dbgs() << "Pointer is ";
818 lookupTable
[dstReg
].second
->print(dbgs());
821 //PtrToInstMap[lookupTable[dstReg].second].push_back(MI);
822 if (lookupTable
[dstReg
].second
->getType()->isPointerTy()) {
823 conflictPtrs
.insert(lookupTable
[dstReg
].second
);
827 // Before we go through the special cases, for the cacheable information
828 // all we care is if the store if global or not.
829 if (!isLRPInst(MI
, ATM
)) {
830 bci
.setReachesExit();
833 // If the address is not a register address,
834 // then we need to lower it as an unknown id.
835 if (!MI
->getOperand(1).isReg()) {
836 if (MI
->getOperand(1).isCPI()) {
838 dbgs() << "Found an instruction with a CPI index #"
839 << MI
->getOperand(1).getIndex() << "!\n";
842 } else if (MI
->getOperand(1).isFI()) {
844 dbgs() << "Found an instruction with a frame index #"
845 << MI
->getOperand(1).getIndex() << "!\n";
847 // If we are a frame index and we are storing a pointer there, lets
848 // go ahead and assign the pointer to the location within the frame
849 // index map so that we can get the value out later.
850 FIToPtrMap
[MI
->getOperand(1).getIndex()] = lookupTable
[dstReg
];
853 allocateDefaultID(ATM
, curRes
, MI
, mDebug
);
856 unsigned reg
= MI
->getOperand(1).getReg();
857 // If we don't know what value the register
858 // is assigned to, then we need to special case
860 if (!lookupTable
[reg
].second
) {
861 allocateDefaultID(ATM
, curRes
, MI
, mDebug
);
864 // const Value *basePtr = lookupTable[reg].second;
865 // If we are a hardware local, then we don't need to track as there
866 // is only one resource ID that we need to know about, so we
867 // map it using allocateDefaultID, which maps it to the default.
868 // This is also the case for REGION_ADDRESS and PRIVATE_ADDRESS.
869 if (isLRPInst(MI
, ATM
)) {
870 allocateDefaultID(ATM
, curRes
, MI
, mDebug
);
874 // We have a store instruction so we map this instruction
875 // to the pointer and insert it into the set of known
876 // store instructions.
877 InstToPtrMap
[MI
].insert(lookupTable
[reg
].second
);
878 PtrToInstMap
[lookupTable
[reg
].second
].push_back(MI
);
879 uint16_t RegClass
= MI
->getDesc().OpInfo
[0].RegClass
;
883 case AMDIL::GPRI8RegClassID
:
884 case AMDIL::GPRV2I8RegClassID
:
885 case AMDIL::GPRI16RegClassID
:
886 if (usesGlobal(ATM
, MI
)) {
888 dbgs() << "Annotating instruction as Byte Store. Inst: ";
891 curRes
.bits
.ByteStore
= 1;
892 setAsmPrinterFlags(MI
, curRes
);
893 const PointerType
*PT
= dyn_cast
<PointerType
>(
894 lookupTable
[reg
].second
->getType());
896 bytePtrs
.insert(lookupTable
[reg
].second
);
901 // If we are a truncating store, then we need to determine the
902 // size of the pointer that we are truncating to, and if we
903 // are less than 32 bits, we need to mark the pointer as a
904 // byte store pointer.
905 switch (MI
->getOpcode()) {
906 case AMDIL::GLOBALTRUNCSTORE_i16i8
:
907 case AMDIL::GLOBALTRUNCSTORE_v2i16i8
:
908 case AMDIL::GLOBALTRUNCSTORE_i32i8
:
909 case AMDIL::GLOBALTRUNCSTORE_v2i32i8
:
910 case AMDIL::GLOBALTRUNCSTORE_i64i8
:
911 case AMDIL::GLOBALTRUNCSTORE_v2i64i8
:
912 case AMDIL::GLOBALTRUNCSTORE_i32i16
:
913 case AMDIL::GLOBALTRUNCSTORE_i64i16
:
914 case AMDIL::GLOBALSTORE_i8
:
915 case AMDIL::GLOBALSTORE_i16
:
916 curRes
.bits
.ByteStore
= 1;
917 setAsmPrinterFlags(MI
, curRes
);
918 bytePtrs
.insert(lookupTable
[reg
].second
);
925 dbgs() << "Assigning instruction to pointer ";
926 dbgs() << lookupTable
[reg
].second
->getName() << ". Inst: ";
929 detectConflictInst(MI
, curRes
, lookupTable
, InstToPtrMap
, true,
930 reg
, dstReg
, mDebug
);
933 // In this case we want to handle an atomic instruction.
936 const AMDILTargetMachine
*ATM
,
937 InstPMap
&InstToPtrMap
,
938 PtrIMap
&PtrToInstMap
,
940 BlockCacheableInfo
&bci
,
945 assert(isAtomicInst(ATM
->getInstrInfo(), MI
) && "Only an atomic instruction can be parsed by "
946 "the parseAtomicInst function.");
947 AMDILAS::InstrResEnc curRes
;
948 unsigned dstReg
= MI
->getOperand(0).getReg();
950 getAsmPrinterFlags(MI
, curRes
);
951 unsigned numOps
= MI
->getNumOperands();
954 MachineOperand
&Op
= MI
->getOperand(numOps
);
959 // If the register is not known to be owned by a pointer
960 // then we can ignore it
961 if (!lookupTable
[reg
].second
) {
964 // if the pointer is known to be local, region or private, then we
965 // can ignore it. Although there are no private atomics, we still
966 // do this check so we don't have to write a new function to check
967 // for only local and region.
968 if (isLRPInst(MI
, ATM
)) {
972 InstToPtrMap
[MI
].insert(lookupTable
[reg
].second
);
973 PtrToInstMap
[lookupTable
[reg
].second
].push_back(MI
);
975 // We now know we have an atomic operation on global memory.
976 // This is a store so must update the cacheable information.
977 bci
.setReachesExit();
979 // Only do if have SC with arena atomic bug fix (EPR 326883).
980 // TODO: enable once SC with EPR 326883 has been promoted to CAL.
981 if (ATM
->getSubtargetImpl()->calVersion() >= CAL_VERSION_SC_150
) {
982 // Force pointers that are used by atomics to be in the arena.
983 // If they were allowed to be accessed as RAW they would cause
984 // all access to use the slow complete path.
986 dbgs() << __LINE__
<< ": Setting byte store bit on atomic instruction: ";
989 curRes
.bits
.ByteStore
= 1;
990 bytePtrs
.insert(lookupTable
[reg
].second
);
994 dbgs() << "Assigning instruction to pointer ";
995 dbgs() << lookupTable
[reg
].second
->getName() << ". Inst: ";
998 detectConflictInst(MI
, curRes
, lookupTable
, InstToPtrMap
, true,
999 reg
, dstReg
, mDebug
);
1002 allocateDefaultID(ATM
, curRes
, MI
, mDebug
);
1005 // In this case we want to handle a counter instruction.
1008 const AMDILTargetMachine
*ATM
,
1009 InstPMap
&InstToPtrMap
,
1010 PtrIMap
&PtrToInstMap
,
1011 RVPVec
&lookupTable
,
1015 assert(isAppendInst(ATM
->getInstrInfo(), MI
) && "Only an atomic counter instruction can be "
1016 "parsed by the parseAppendInst function.");
1017 AMDILAS::InstrResEnc curRes
;
1018 unsigned dstReg
= MI
->getOperand(0).getReg();
1019 unsigned reg
= MI
->getOperand(1).getReg();
1020 getAsmPrinterFlags(MI
, curRes
);
1021 // If the register is not known to be owned by a pointer
1022 // then we set it to the default
1023 if (!lookupTable
[reg
].second
) {
1024 allocateDefaultID(ATM
, curRes
, MI
, mDebug
);
1027 InstToPtrMap
[MI
].insert(lookupTable
[reg
].second
);
1028 PtrToInstMap
[lookupTable
[reg
].second
].push_back(MI
);
1030 dbgs() << "Assigning instruction to pointer ";
1031 dbgs() << lookupTable
[reg
].second
->getName() << ". Inst: ";
1034 detectConflictInst(MI
, curRes
, lookupTable
, InstToPtrMap
, true,
1035 reg
, dstReg
, mDebug
);
1037 // In this case we want to handle an Image instruction.
1040 const AMDILTargetMachine
*ATM
,
1041 InstPMap
&InstToPtrMap
,
1042 PtrIMap
&PtrToInstMap
,
1044 RVPVec
&lookupTable
,
1048 assert(isImageInst(ATM
->getInstrInfo(), MI
) && "Only an image instruction can be "
1049 "parsed by the parseImageInst function.");
1050 AMDILAS::InstrResEnc curRes
;
1051 getAsmPrinterFlags(MI
, curRes
);
1052 // AMDILKernelManager *km =
1053 // (AMDILKernelManager *)ATM->getSubtargetImpl()->getKernelManager();
1054 AMDILMachineFunctionInfo
*mMFI
= MI
->getParent()->getParent()
1055 ->getInfo
<AMDILMachineFunctionInfo
>();
1056 if (MI
->getOpcode() == AMDIL::IMAGE2D_WRITE
1057 || MI
->getOpcode() == AMDIL::IMAGE3D_WRITE
) {
1058 unsigned dstReg
= MI
->getOperand(0).getReg();
1059 curRes
.bits
.ResourceID
= lookupTable
[dstReg
].first
& 0xFFFF;
1060 curRes
.bits
.isImage
= 1;
1061 InstToPtrMap
[MI
].insert(lookupTable
[dstReg
].second
);
1062 PtrToInstMap
[lookupTable
[dstReg
].second
].push_back(MI
);
1064 dbgs() << "Assigning instruction to pointer ";
1065 dbgs() << lookupTable
[dstReg
].second
->getName() << ". Inst: ";
1069 // unsigned dstReg = MI->getOperand(0).getReg();
1070 unsigned reg
= MI
->getOperand(1).getReg();
1072 // If the register is not known to be owned by a pointer
1073 // then we set it to the default
1074 if (!lookupTable
[reg
].second
) {
1075 assert(!"This should not happen for images!");
1076 allocateDefaultID(ATM
, curRes
, MI
, mDebug
);
1079 InstToPtrMap
[MI
].insert(lookupTable
[reg
].second
);
1080 PtrToInstMap
[lookupTable
[reg
].second
].push_back(MI
);
1082 dbgs() << "Assigning instruction to pointer ";
1083 dbgs() << lookupTable
[reg
].second
->getName() << ". Inst: ";
1086 switch (MI
->getOpcode()) {
1087 case AMDIL::IMAGE2D_READ
:
1088 case AMDIL::IMAGE2D_READ_UNNORM
:
1089 case AMDIL::IMAGE3D_READ
:
1090 case AMDIL::IMAGE3D_READ_UNNORM
:
1091 curRes
.bits
.ResourceID
= lookupTable
[reg
].first
& 0xFFFF;
1092 if (MI
->getOperand(3).isReg()) {
1093 // Our sampler is not a literal value.
1095 memset(buffer
, 0, sizeof(buffer
));
1096 std::string sampler_name
= "";
1097 unsigned reg
= MI
->getOperand(3).getReg();
1098 if (lookupTable
[reg
].second
) {
1099 sampler_name
= lookupTable
[reg
].second
->getName();
1101 if (sampler_name
.empty()) {
1102 sampler_name
= findSamplerName(MI
, lookupTable
, FIToPtrMap
, ATM
);
1104 uint32_t val
= mMFI
->addSampler(sampler_name
, ~0U);
1106 dbgs() << "Mapping kernel sampler " << sampler_name
1107 << " to sampler number " << val
<< " for Inst:\n";
1110 MI
->getOperand(3).ChangeToImmediate(val
);
1112 // Our sampler is known at runtime as a literal, lets make sure
1113 // that the metadata for it is known.
1115 memset(buffer
, 0, sizeof(buffer
));
1116 sprintf(buffer
,"_%d", (int32_t)MI
->getOperand(3).getImm());
1117 std::string sampler_name
= std::string("unknown") + std::string(buffer
);
1118 uint32_t val
= mMFI
->addSampler(sampler_name
, MI
->getOperand(3).getImm());
1120 dbgs() << "Mapping internal sampler " << sampler_name
1121 << " to sampler number " << val
<< " for Inst:\n";
1124 MI
->getOperand(3).setImm(val
);
1127 case AMDIL::IMAGE2D_INFO0
:
1128 case AMDIL::IMAGE3D_INFO0
:
1129 curRes
.bits
.ResourceID
= lookupTable
[reg
].first
>> 16;
1131 case AMDIL::IMAGE2D_INFO1
:
1132 case AMDIL::IMAGE2DA_INFO1
:
1133 curRes
.bits
.ResourceID
= (lookupTable
[reg
].first
>> 16) + 1;
1136 curRes
.bits
.isImage
= 1;
1138 setAsmPrinterFlags(MI
, curRes
);
1140 // This case handles the rest of the instructions
1143 const AMDILTargetMachine
*ATM
,
1144 InstPMap
&InstToPtrMap
,
1145 PtrIMap
&PtrToInstMap
,
1146 RVPVec
&lookupTable
,
1151 assert(!isAtomicInst(ATM
->getInstrInfo(), MI
) && !isStoreInst(ATM
->getInstrInfo(), MI
) && !isLoadInst(ATM
->getInstrInfo(), MI
) &&
1152 !isAppendInst(ATM
->getInstrInfo(), MI
) && !isImageInst(ATM
->getInstrInfo(), MI
) &&
1153 "Atomic/Load/Store/Append/Image insts should not be handled here!");
1154 unsigned numOps
= MI
->getNumOperands();
1155 // If we don't have any operands, we can skip this instruction
1159 // if the dst operand is not a register, then we can skip
1160 // this instruction. That is because we are probably a branch
1161 // or jump instruction.
1162 if (!MI
->getOperand(0).isReg()) {
1165 // If we are a LOADCONST_i32, we might be a sampler, so we need
1166 // to propogate the LOADCONST to IMAGE[2|3]D_READ instructions.
1167 if (MI
->getOpcode() == AMDIL::LOADCONST_i32
) {
1168 uint32_t val
= MI
->getOperand(1).getImm();
1169 MachineOperand
* oldPtr
= &MI
->getOperand(0);
1170 MachineOperand
* moPtr
= oldPtr
->getNextOperandForReg();
1173 moPtr
= oldPtr
->getNextOperandForReg();
1174 switch (oldPtr
->getParent()->getOpcode()) {
1177 case AMDIL::IMAGE2D_READ
:
1178 case AMDIL::IMAGE2D_READ_UNNORM
:
1179 case AMDIL::IMAGE3D_READ
:
1180 case AMDIL::IMAGE3D_READ_UNNORM
:
1182 dbgs() << "Found a constant sampler for image read inst: ";
1183 oldPtr
->getParent()->print(dbgs());
1185 oldPtr
->ChangeToImmediate(val
);
1190 AMDILAS::InstrResEnc curRes
;
1191 getAsmPrinterFlags(MI
, curRes
);
1192 unsigned dstReg
= MI
->getOperand(0).getReg();
1195 MachineOperand
&Op
= MI
->getOperand(numOps
);
1196 // if the operand is not a register, then we can ignore it
1204 // If the register is not known to be owned by a pointer
1205 // then we can ignore it
1206 if (!lookupTable
[reg
].second
) {
1209 detectConflictInst(MI
, curRes
, lookupTable
, InstToPtrMap
, false,
1210 reg
, dstReg
, mDebug
);
1215 // This function parses the basic block and based on the instruction type,
1216 // calls the function to finish parsing the instruction.
1219 const AMDILTargetMachine
*ATM
,
1220 MachineBasicBlock
*MB
,
1221 InstPMap
&InstToPtrMap
,
1222 PtrIMap
&PtrToInstMap
,
1224 RVPVec
&lookupTable
,
1226 ConflictSet
&conflictPtrs
,
1228 BlockCacheableInfo
&bci
,
1231 for (MachineBasicBlock::iterator mbb
= MB
->begin(), mbe
= MB
->end();
1232 mbb
!= mbe
; ++mbb
) {
1233 MachineInstr
*MI
= mbb
;
1234 if (MI
->getOpcode() == AMDIL::CALL
) {
1235 parseCall(ATM
, InstToPtrMap
, PtrToInstMap
, lookupTable
,
1238 else if (isLoadInst(ATM
->getInstrInfo(), MI
)) {
1239 parseLoadInst(ATM
, InstToPtrMap
, PtrToInstMap
,
1240 FIToPtrMap
, lookupTable
, cpool
, bci
, MI
, mDebug
);
1241 } else if (isStoreInst(ATM
->getInstrInfo(), MI
)) {
1242 parseStoreInst(ATM
, InstToPtrMap
, PtrToInstMap
,
1243 FIToPtrMap
, lookupTable
, cpool
, bci
, MI
, bytePtrs
, conflictPtrs
, mDebug
);
1244 } else if (isAtomicInst(ATM
->getInstrInfo(), MI
)) {
1245 parseAtomicInst(ATM
, InstToPtrMap
, PtrToInstMap
,
1246 lookupTable
, bci
, MI
, bytePtrs
, mDebug
);
1247 } else if (isAppendInst(ATM
->getInstrInfo(), MI
)) {
1248 parseAppendInst(ATM
, InstToPtrMap
, PtrToInstMap
,
1249 lookupTable
, MI
, mDebug
);
1250 } else if (isImageInst(ATM
->getInstrInfo(), MI
)) {
1251 parseImageInst(ATM
, InstToPtrMap
, PtrToInstMap
,
1252 FIToPtrMap
, lookupTable
, MI
, mDebug
);
1254 parseInstruction(ATM
, InstToPtrMap
, PtrToInstMap
,
1255 lookupTable
, cpool
, MI
, mDebug
);
1260 // Follows the Reverse Post Order Traversal of the basic blocks to
1261 // determine which order to parse basic blocks in.
1264 const AMDILPointerManager
*PM
,
1265 const AMDILTargetMachine
*ATM
,
1266 MachineFunction
&MF
,
1267 InstPMap
&InstToPtrMap
,
1268 PtrIMap
&PtrToInstMap
,
1270 RVPVec
&lookupTable
,
1272 ConflictSet
&conflictPtrs
,
1274 MBBCacheableMap
&mbbCacheable
,
1278 MachineDominatorTree
*dominatorTree
= &PM
1279 ->getAnalysis
<MachineDominatorTree
>();
1280 dominatorTree
->dump();
1283 std::list
<MachineBasicBlock
*> prop_worklist
;
1285 ReversePostOrderTraversal
<MachineFunction
*> RPOT(&MF
);
1286 for (ReversePostOrderTraversal
<MachineFunction
*>::rpo_iterator
1287 curBlock
= RPOT
.begin(), endBlock
= RPOT
.end();
1288 curBlock
!= endBlock
; ++curBlock
) {
1289 MachineBasicBlock
*MB
= (*curBlock
);
1290 BlockCacheableInfo
&bci
= mbbCacheable
[MB
];
1291 for (MachineBasicBlock::pred_iterator mbbit
= MB
->pred_begin(),
1292 mbbitend
= MB
->pred_end();
1295 MBBCacheableMap::const_iterator mbbcmit
= mbbCacheable
.find(*mbbit
);
1296 if (mbbcmit
!= mbbCacheable
.end() &&
1297 mbbcmit
->second
.storeReachesExit()) {
1298 bci
.setReachesTop();
1304 dbgs() << "[BlockOrdering] Parsing CurrentBlock: "
1305 << MB
->getNumber() << "\n";
1307 parseBasicBlock(ATM
, MB
, InstToPtrMap
, PtrToInstMap
,
1308 FIToPtrMap
, lookupTable
, bytePtrs
, conflictPtrs
, cpool
, bci
, mDebug
);
1310 if (bci
.storeReachesExit())
1311 prop_worklist
.push_back(MB
);
1314 dbgs() << "BCI info: Top: " << bci
.storeReachesTop() << " Exit: "
1315 << bci
.storeReachesExit() << "\n Instructions:\n";
1316 for (CacheableInstrSet::const_iterator cibit
= bci
.cacheableBegin(),
1317 cibitend
= bci
.cacheableEnd();
1326 // This loop pushes any "storeReachesExit" flags into successor
1327 // blocks until the flags have been fully propagated. This will
1328 // ensure that blocks that have reachable stores due to loops
1329 // are labeled appropriately.
1330 while (!prop_worklist
.empty()) {
1331 MachineBasicBlock
*wlb
= prop_worklist
.front();
1332 prop_worklist
.pop_front();
1333 for (MachineBasicBlock::succ_iterator mbbit
= wlb
->succ_begin(),
1334 mbbitend
= wlb
->succ_end();
1338 BlockCacheableInfo
&blockCache
= mbbCacheable
[*mbbit
];
1339 if (!blockCache
.storeReachesTop()) {
1340 blockCache
.setReachesTop();
1341 prop_worklist
.push_back(*mbbit
);
1344 dbgs() << "BCI Prop info: " << (*mbbit
)->getNumber() << " Top: "
1345 << blockCache
.storeReachesTop() << " Exit: "
1346 << blockCache
.storeReachesExit()
1353 // Helper function that dumps to dbgs() information about
1356 dumpPointers(AppendSet
&Ptrs
, const char *str
)
1361 dbgs() << "[Dump]" << str
<< " found: " << "\n";
1362 for (AppendSet::iterator sb
= Ptrs
.begin();
1363 sb
!= Ptrs
.end(); ++sb
) {
1368 // Helper function that dumps to dbgs() information about
1371 dumpPointers(PtrSet
&Ptrs
, const char *str
)
1376 dbgs() << "[Dump]" << str
<< " found: " << "\n";
1377 for (PtrSet::iterator sb
= Ptrs
.begin();
1378 sb
!= Ptrs
.end(); ++sb
) {
1383 // Function that detects all the conflicting pointers and adds
1384 // the pointers that are detected to the conflict set, otherwise
1385 // they are added to the raw or byte set based on their usage.
1387 detectConflictingPointers(
1388 const AMDILTargetMachine
*ATM
,
1389 InstPMap
&InstToPtrMap
,
1392 ConflictSet
&conflictPtrs
,
1395 if (InstToPtrMap
.empty()) {
1399 const AMDILSubtarget
*STM
= ATM
->getSubtargetImpl();
1400 for (InstPMap::iterator
1401 mapIter
= InstToPtrMap
.begin(), iterEnd
= InstToPtrMap
.end();
1402 mapIter
!= iterEnd
; ++mapIter
) {
1404 dbgs() << "Instruction: ";
1405 (mapIter
)->first
->dump();
1407 MachineInstr
* MI
= mapIter
->first
;
1408 AMDILAS::InstrResEnc curRes
;
1409 getAsmPrinterFlags(MI
, curRes
);
1410 if (curRes
.bits
.isImage
) {
1414 // We might have a case where more than 1 pointers is going to the same
1417 dbgs() << "Base Pointer[s]:\n";
1419 for (PtrSet::iterator cfIter
= mapIter
->second
.begin(),
1420 cfEnd
= mapIter
->second
.end(); cfIter
!= cfEnd
; ++cfIter
) {
1424 if (bytePtrs
.count(*cfIter
)) {
1426 dbgs() << "Byte pointer found!\n";
1433 for (PtrSet::iterator cfIter
= mapIter
->second
.begin(),
1434 cfEnd
= mapIter
->second
.end(); cfIter
!= cfEnd
; ++cfIter
) {
1435 const Value
*ptr
= (*cfIter
);
1436 if (isLRPInst(mapIter
->first
, ATM
)) {
1437 // We don't need to deal with pointers to local/region/private
1442 dbgs() << "Adding pointer " << (ptr
)->getName()
1443 << " to byte set!\n";
1445 const PointerType
*PT
= dyn_cast
<PointerType
>(ptr
->getType());
1447 bytePtrs
.insert(ptr
);
1451 for (PtrSet::iterator cfIter
= mapIter
->second
.begin(),
1452 cfEnd
= mapIter
->second
.end(); cfIter
!= cfEnd
; ++cfIter
) {
1453 const Value
*ptr
= (*cfIter
);
1454 // bool aliased = false;
1455 if (isLRPInst(mapIter
->first
, ATM
)) {
1456 // We don't need to deal with pointers to local/region/private
1460 const Argument
*arg
= dyn_cast_or_null
<Argument
>(*cfIter
);
1464 if (!STM
->device()->isSupported(AMDILDeviceInfo::NoAlias
)
1465 && !arg
->hasNoAliasAttr()) {
1467 dbgs() << "Possible aliased pointer found!\n";
1469 aliasedPtrs
.insert(ptr
);
1471 if (mapIter
->second
.size() > 1) {
1473 dbgs() << "Adding pointer " << ptr
->getName()
1474 << " to conflict set!\n";
1476 const PointerType
*PT
= dyn_cast
<PointerType
>(ptr
->getType());
1478 conflictPtrs
.insert(ptr
);
1482 dbgs() << "Adding pointer " << ptr
->getName()
1483 << " to raw set!\n";
1485 const PointerType
*PT
= dyn_cast
<PointerType
>(ptr
->getType());
1487 rawPtrs
.insert(ptr
);
1495 // If we have any aliased pointers and byte pointers exist,
1496 // then make sure that all of the aliased pointers are
1497 // part of the byte pointer set.
1498 if (!bytePtrs
.empty()) {
1499 for (PtrSet::iterator aIter
= aliasedPtrs
.begin(),
1500 aEnd
= aliasedPtrs
.end(); aIter
!= aEnd
; ++aIter
) {
1502 dbgs() << "Moving " << (*aIter
)->getName()
1503 << " from raw to byte.\n";
1505 bytePtrs
.insert(*aIter
);
1506 rawPtrs
.erase(*aIter
);
1510 // Function that detects aliased constant pool operations.
1512 detectAliasedCPoolOps(
1518 const AMDILSubtarget
*STM
= &TM
.getSubtarget
<AMDILSubtarget
>();
1519 if (mDebug
&& !cpool
.empty()) {
1520 dbgs() << "Instructions w/ CPool Ops: \n";
1522 // The algorithm for detecting aliased cpool is as follows.
1523 // For each instruction that has a cpool argument
1524 // follow def-use chain
1525 // if instruction is a load and load is a private load,
1526 // switch to constant pool load
1527 for (CPoolSet::iterator cpb
= cpool
.begin(), cpe
= cpool
.end();
1528 cpb
!= cpe
; ++cpb
) {
1532 std::queue
<MachineInstr
*> queue
;
1533 std::set
<MachineInstr
*> visited
;
1536 while (!queue
.empty()) {
1537 cur
= queue
.front();
1539 if (visited
.count(cur
)) {
1542 if (isLoadInst(TM
.getInstrInfo(), cur
) && isPrivateInst(TM
.getInstrInfo(), cur
)) {
1543 // If we are a private load and the register is
1544 // used in the address register, we need to
1545 // switch from private to constant pool load.
1547 dbgs() << "Found an instruction that is a private load "
1548 << "but should be a constant pool load.\n";
1552 AMDILAS::InstrResEnc curRes
;
1553 getAsmPrinterFlags(cur
, curRes
);
1554 curRes
.bits
.ResourceID
= STM
->device()->getResourceID(AMDILDevice::GLOBAL_ID
);
1555 curRes
.bits
.ConflictPtr
= 1;
1556 setAsmPrinterFlags(cur
, curRes
);
1557 cur
->setDesc(TM
.getInstrInfo()->get(
1558 (cur
->getOpcode() - AMDIL::PRIVATEAEXTLOAD_f32
)
1559 + AMDIL::CPOOLAEXTLOAD_f32
));
1561 if (cur
->getOperand(0).isReg()) {
1562 MachineOperand
* ptr
= cur
->getOperand(0).getNextOperandForReg();
1563 while (ptr
&& !ptr
->isDef() && ptr
->isReg()) {
1564 queue
.push(ptr
->getParent());
1565 ptr
= ptr
->getNextOperandForReg();
1569 visited
.insert(cur
);
1573 // Function that detects fully cacheable pointers. Fully cacheable pointers
1574 // are pointers that have no writes to them and -fno-alias is specified.
1576 detectFullyCacheablePointers(
1577 const AMDILTargetMachine
*ATM
,
1578 PtrIMap
&PtrToInstMap
,
1580 CacheableSet
&cacheablePtrs
,
1581 ConflictSet
&conflictPtrs
,
1585 if (PtrToInstMap
.empty()) {
1588 const AMDILSubtarget
*STM
1589 = ATM
->getSubtargetImpl();
1590 // 4XXX hardware doesn't support cached uav opcodes and we assume
1591 // no aliasing for this to work. Also in debug mode we don't do
1593 if (STM
->device()->getGeneration() == AMDILDeviceInfo::HD4XXX
1594 || !STM
->device()->isSupported(AMDILDeviceInfo::CachedMem
)) {
1597 if (STM
->device()->isSupported(AMDILDeviceInfo::NoAlias
)) {
1598 for (PtrIMap::iterator mapIter
= PtrToInstMap
.begin(),
1599 iterEnd
= PtrToInstMap
.end(); mapIter
!= iterEnd
; ++mapIter
) {
1601 dbgs() << "Instruction: ";
1602 mapIter
->first
->dump();
1604 // Skip the pointer if we have already detected it.
1605 if (cacheablePtrs
.count(mapIter
->first
)) {
1608 bool cacheable
= true;
1609 for (std::vector
<MachineInstr
*>::iterator
1610 miBegin
= mapIter
->second
.begin(),
1611 miEnd
= mapIter
->second
.end(); miBegin
!= miEnd
; ++miBegin
) {
1612 if (isStoreInst(ATM
->getInstrInfo(), *miBegin
) ||
1613 isImageInst(ATM
->getInstrInfo(), *miBegin
) ||
1614 isAtomicInst(ATM
->getInstrInfo(), *miBegin
)) {
1619 // we aren't cacheable, so lets move on to the next instruction
1623 // If we are in the conflict set, lets move to the next instruction
1624 // FIXME: we need to check to see if the pointers that conflict with
1625 // the current pointer are also cacheable. If they are, then add them
1626 // to the cacheable list and not fail.
1627 if (conflictPtrs
.count(mapIter
->first
)) {
1630 // Otherwise if we have no stores and no conflicting pointers, we can
1631 // be added to the cacheable set.
1633 dbgs() << "Adding pointer " << mapIter
->first
->getName();
1634 dbgs() << " to cached set!\n";
1636 const PointerType
*PT
= dyn_cast
<PointerType
>(mapIter
->first
->getType());
1638 cacheablePtrs
.insert(mapIter
->first
);
1644 // Are any of the pointers in PtrSet also in the BytePtrs or the CachePtrs?
1646 ptrSetIntersectsByteOrCache(
1649 CacheableSet
&cacheablePtrs
1652 for (PtrSet::const_iterator psit
= cacheSet
.begin(),
1653 psitend
= cacheSet
.end();
1656 if (bytePtrs
.find(*psit
) != bytePtrs
.end() ||
1657 cacheablePtrs
.find(*psit
) != cacheablePtrs
.end()) {
1664 // Function that detects which instructions are cacheable even if
1665 // all instructions of the pointer are not cacheable. The resulting
1666 // set of instructions will not contain Ptrs that are in the cacheable
1667 // ptr set (under the assumption they will get marked cacheable already)
1668 // or pointers in the byte set, since they are not cacheable.
1670 detectCacheableInstrs(
1671 MBBCacheableMap
&bbCacheable
,
1672 InstPMap
&InstToPtrMap
,
1673 CacheableSet
&cacheablePtrs
,
1675 CacheableInstrSet
&cacheableSet
,
1680 for (MBBCacheableMap::const_iterator mbbcit
= bbCacheable
.begin(),
1681 mbbcitend
= bbCacheable
.end();
1682 mbbcit
!= mbbcitend
;
1684 for (CacheableInstrSet::const_iterator bciit
1685 = mbbcit
->second
.cacheableBegin(),
1687 = mbbcit
->second
.cacheableEnd();
1690 if (!ptrSetIntersectsByteOrCache(InstToPtrMap
[*bciit
],
1693 cacheableSet
.insert(*bciit
);
1698 // This function annotates the cacheable pointers with the
1699 // CacheableRead bit. The cacheable read bit is set
1700 // when the number of write images is not equal to the max
1701 // or if the default RAW_UAV_ID is equal to 11. The first
1702 // condition means that there is a raw uav between 0 and 7
1703 // that is available for cacheable reads and the second
1704 // condition means that UAV 11 is available for cacheable
1707 annotateCacheablePtrs(
1709 PtrIMap
&PtrToInstMap
,
1710 CacheableSet
&cacheablePtrs
,
1712 uint32_t numWriteImages
,
1715 const AMDILSubtarget
*STM
= &TM
.getSubtarget
<AMDILSubtarget
>();
1716 // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager();
1717 PtrSet::iterator siBegin
, siEnd
;
1718 std::vector
<MachineInstr
*>::iterator miBegin
, miEnd
;
1719 AMDILMachineFunctionInfo
*mMFI
= NULL
;
1720 // First we can check the cacheable pointers
1721 for (siBegin
= cacheablePtrs
.begin(), siEnd
= cacheablePtrs
.end();
1722 siBegin
!= siEnd
; ++siBegin
) {
1723 assert(!bytePtrs
.count(*siBegin
) && "Found a cacheable pointer "
1724 "that also exists as a byte pointer!");
1725 for (miBegin
= PtrToInstMap
[*siBegin
].begin(),
1726 miEnd
= PtrToInstMap
[*siBegin
].end();
1727 miBegin
!= miEnd
; ++miBegin
) {
1729 dbgs() << "Annotating pointer as cacheable. Inst: ";
1732 AMDILAS::InstrResEnc curRes
;
1733 getAsmPrinterFlags(*miBegin
, curRes
);
1734 assert(!curRes
.bits
.ByteStore
&& "No cacheable pointers should have the "
1735 "byte Store flag set!");
1736 // If UAV11 is enabled, then we can enable cached reads.
1737 if (STM
->device()->getResourceID(AMDILDevice::RAW_UAV_ID
) == 11) {
1738 curRes
.bits
.CacheableRead
= 1;
1739 curRes
.bits
.ResourceID
= 11;
1740 setAsmPrinterFlags(*miBegin
, curRes
);
1742 mMFI
= (*miBegin
)->getParent()->getParent()
1743 ->getInfo
<AMDILMachineFunctionInfo
>();
1745 mMFI
->uav_insert(curRes
.bits
.ResourceID
);
1751 // A byte pointer is a pointer that along the pointer path has a
1752 // byte store assigned to it.
1756 PtrIMap
&PtrToInstMap
,
1762 const AMDILSubtarget
*STM
= &TM
.getSubtarget
<AMDILSubtarget
>();
1763 AMDILKernelManager
*KM
= STM
->getKernelManager();
1764 PtrSet::iterator siBegin
, siEnd
;
1765 std::vector
<MachineInstr
*>::iterator miBegin
, miEnd
;
1766 uint32_t arenaID
= STM
->device()
1767 ->getResourceID(AMDILDevice::ARENA_UAV_ID
);
1768 if (STM
->device()->isSupported(AMDILDeviceInfo::ArenaSegment
)) {
1769 arenaID
= ARENA_SEGMENT_RESERVED_UAVS
+ 1;
1771 AMDILMachineFunctionInfo
*mMFI
= NULL
;
1772 for (siBegin
= bytePtrs
.begin(), siEnd
= bytePtrs
.end();
1773 siBegin
!= siEnd
; ++siBegin
) {
1774 const Value
* val
= (*siBegin
);
1775 const PointerType
*PT
= dyn_cast
<PointerType
>(val
->getType());
1779 const Argument
*curArg
= dyn_cast
<Argument
>(val
);
1780 assert(!rawPtrs
.count(*siBegin
) && "Found a byte pointer "
1781 "that also exists as a raw pointer!");
1782 bool arenaInc
= false;
1783 for (miBegin
= PtrToInstMap
[*siBegin
].begin(),
1784 miEnd
= PtrToInstMap
[*siBegin
].end();
1785 miBegin
!= miEnd
; ++miBegin
) {
1787 dbgs() << "Annotating pointer as arena. Inst: ";
1790 AMDILAS::InstrResEnc curRes
;
1791 getAsmPrinterFlags(*miBegin
, curRes
);
1793 if (STM
->device()->usesHardware(AMDILDeviceInfo::ConstantMem
)
1794 && PT
->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS
) {
1795 // If hardware constant mem is enabled, then we need to
1796 // get the constant pointer CB number and use that to specify
1798 AMDILGlobalManager
*GM
= STM
->getGlobalManager();
1799 const StringRef funcName
= (*miBegin
)->getParent()->getParent()
1800 ->getFunction()->getName();
1801 if (GM
->isKernel(funcName
)) {
1802 const kernel
&krnl
= GM
->getKernel(funcName
);
1803 curRes
.bits
.ResourceID
= GM
->getConstPtrCB(krnl
,
1804 (*siBegin
)->getName());
1805 curRes
.bits
.HardwareInst
= 1;
1807 curRes
.bits
.ResourceID
= STM
->device()
1808 ->getResourceID(AMDILDevice::CONSTANT_ID
);
1810 } else if (STM
->device()->usesHardware(AMDILDeviceInfo::LocalMem
)
1811 && PT
->getAddressSpace() == AMDILAS::LOCAL_ADDRESS
) {
1812 // If hardware local mem is enabled, get the local mem ID from
1813 // the device to use as the ResourceID
1814 curRes
.bits
.ResourceID
= STM
->device()
1815 ->getResourceID(AMDILDevice::LDS_ID
);
1816 if (isAtomicInst(TM
.getInstrInfo(), *miBegin
)) {
1817 assert(curRes
.bits
.ResourceID
&& "Atomic resource ID "
1818 "cannot be non-zero!");
1819 (*miBegin
)->getOperand((*miBegin
)->getNumOperands()-1)
1820 .setImm(curRes
.bits
.ResourceID
);
1822 } else if (STM
->device()->usesHardware(AMDILDeviceInfo::RegionMem
)
1823 && PT
->getAddressSpace() == AMDILAS::REGION_ADDRESS
) {
1824 // If hardware region mem is enabled, get the gds mem ID from
1825 // the device to use as the ResourceID
1826 curRes
.bits
.ResourceID
= STM
->device()
1827 ->getResourceID(AMDILDevice::GDS_ID
);
1828 if (isAtomicInst(TM
.getInstrInfo(), *miBegin
)) {
1829 assert(curRes
.bits
.ResourceID
&& "Atomic resource ID "
1830 "cannot be non-zero!");
1831 (*miBegin
)->getOperand((*miBegin
)->getNumOperands()-1)
1832 .setImm(curRes
.bits
.ResourceID
);
1834 } else if (STM
->device()->usesHardware(AMDILDeviceInfo::PrivateMem
)
1835 && PT
->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS
) {
1836 curRes
.bits
.ResourceID
= STM
->device()
1837 ->getResourceID(AMDILDevice::SCRATCH_ID
);
1840 dbgs() << __LINE__
<< ": Setting byte store bit on instruction: ";
1841 (*miBegin
)->print(dbgs());
1843 curRes
.bits
.ByteStore
= 1;
1844 curRes
.bits
.ResourceID
= (curArg
&& curArg
->hasNoAliasAttr()) ? arenaID
1845 : STM
->device()->getResourceID(AMDILDevice::ARENA_UAV_ID
);
1846 if (STM
->device()->isSupported(AMDILDeviceInfo::ArenaSegment
)) {
1849 if (isAtomicInst(TM
.getInstrInfo(), *miBegin
) &&
1850 STM
->device()->isSupported(AMDILDeviceInfo::ArenaUAV
)) {
1851 (*miBegin
)->getOperand((*miBegin
)->getNumOperands()-1)
1852 .setImm(curRes
.bits
.ResourceID
);
1853 // If we are an arena instruction, we need to switch the atomic opcode
1854 // from the global version to the arena version.
1855 MachineInstr
*MI
= *miBegin
;
1857 TM
.getInstrInfo()->get(
1858 (MI
->getOpcode() - AMDIL::ATOM_G_ADD
) + AMDIL::ATOM_A_ADD
));
1861 dbgs() << "Annotating pointer as arena. Inst: ";
1865 setAsmPrinterFlags(*miBegin
, curRes
);
1866 KM
->setUAVID(*siBegin
, curRes
.bits
.ResourceID
);
1868 mMFI
= (*miBegin
)->getParent()->getParent()
1869 ->getInfo
<AMDILMachineFunctionInfo
>();
1871 mMFI
->uav_insert(curRes
.bits
.ResourceID
);
1878 // An append pointer is a opaque object that has append instructions
1883 PtrIMap
&PtrToInstMap
,
1884 AppendSet
&appendPtrs
,
1887 unsigned currentCounter
= 0;
1888 // const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
1889 // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager();
1890 MachineFunction
*MF
= NULL
;
1891 for (AppendSet::iterator asBegin
= appendPtrs
.begin(),
1892 asEnd
= appendPtrs
.end(); asBegin
!= asEnd
; ++asBegin
)
1894 bool usesWrite
= false;
1895 bool usesRead
= false;
1896 const Value
* curVal
= *asBegin
;
1898 dbgs() << "Counter: " << curVal
->getName()
1899 << " assigned the counter " << currentCounter
<< "\n";
1901 for (std::vector
<MachineInstr
*>::iterator
1902 miBegin
= PtrToInstMap
[curVal
].begin(),
1903 miEnd
= PtrToInstMap
[curVal
].end(); miBegin
!= miEnd
; ++miBegin
) {
1904 MachineInstr
*MI
= *miBegin
;
1906 MF
= MI
->getParent()->getParent();
1908 unsigned opcode
= MI
->getOpcode();
1912 dbgs() << "Skipping instruction: ";
1916 case AMDIL::APPEND_ALLOC
:
1917 case AMDIL::APPEND_ALLOC_NORET
:
1919 MI
->getOperand(1).ChangeToImmediate(currentCounter
);
1921 dbgs() << "Assing to counter " << currentCounter
<< " Inst: ";
1925 case AMDIL::APPEND_CONSUME
:
1926 case AMDIL::APPEND_CONSUME_NORET
:
1928 MI
->getOperand(1).ChangeToImmediate(currentCounter
);
1930 dbgs() << "Assing to counter " << currentCounter
<< " Inst: ";
1936 if (usesWrite
&& usesRead
&& MF
) {
1937 MF
->getInfo
<AMDILMachineFunctionInfo
>()->addErrorMsg(
1938 amd::CompilerErrorMessage
[INCORRECT_COUNTER_USAGE
]);
1943 // A raw pointer is any pointer that does not have byte store in its path.
1947 PtrIMap
&PtrToInstMap
,
1950 uint32_t numWriteImages
,
1954 const AMDILSubtarget
*STM
= &TM
.getSubtarget
<AMDILSubtarget
>();
1955 AMDILKernelManager
*KM
= STM
->getKernelManager();
1956 PtrSet::iterator siBegin
, siEnd
;
1957 std::vector
<MachineInstr
*>::iterator miBegin
, miEnd
;
1958 AMDILMachineFunctionInfo
*mMFI
= NULL
;
1960 // Now all of the raw pointers will go to the raw uav.
1961 for (siBegin
= rawPtrs
.begin(), siEnd
= rawPtrs
.end();
1962 siBegin
!= siEnd
; ++siBegin
) {
1963 const PointerType
*PT
= dyn_cast
<PointerType
>((*siBegin
)->getType());
1967 assert(!bytePtrs
.count(*siBegin
) && "Found a raw pointer "
1968 " that also exists as a byte pointers!");
1969 for (miBegin
= PtrToInstMap
[*siBegin
].begin(),
1970 miEnd
= PtrToInstMap
[*siBegin
].end();
1971 miBegin
!= miEnd
; ++miBegin
) {
1973 dbgs() << "Annotating pointer as raw. Inst: ";
1976 AMDILAS::InstrResEnc curRes
;
1977 getAsmPrinterFlags(*miBegin
, curRes
);
1978 if (!curRes
.bits
.ConflictPtr
) {
1979 assert(!curRes
.bits
.ByteStore
1980 && "Found a instruction that is marked as "
1981 "raw but has a byte store bit set!");
1982 } else if (curRes
.bits
.ConflictPtr
) {
1983 if (curRes
.bits
.ByteStore
) {
1984 curRes
.bits
.ByteStore
= 0;
1987 if (STM
->device()->usesHardware(AMDILDeviceInfo::ConstantMem
)
1988 && PT
->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS
) {
1989 // If hardware constant mem is enabled, then we need to
1990 // get the constant pointer CB number and use that to specify
1992 AMDILGlobalManager
*GM
= STM
->getGlobalManager();
1993 const StringRef funcName
= (*miBegin
)->getParent()->getParent()
1994 ->getFunction()->getName();
1995 if (GM
->isKernel(funcName
)) {
1996 const kernel
&krnl
= GM
->getKernel(funcName
);
1997 curRes
.bits
.ResourceID
= GM
->getConstPtrCB(krnl
,
1998 (*siBegin
)->getName());
1999 curRes
.bits
.HardwareInst
= 1;
2001 curRes
.bits
.ResourceID
= STM
->device()
2002 ->getResourceID(AMDILDevice::CONSTANT_ID
);
2004 } else if (STM
->device()->usesHardware(AMDILDeviceInfo::LocalMem
)
2005 && PT
->getAddressSpace() == AMDILAS::LOCAL_ADDRESS
) {
2006 // If hardware local mem is enabled, get the local mem ID from
2007 // the device to use as the ResourceID
2008 curRes
.bits
.ResourceID
= STM
->device()
2009 ->getResourceID(AMDILDevice::LDS_ID
);
2010 if (isAtomicInst(TM
.getInstrInfo(), *miBegin
)) {
2011 assert(curRes
.bits
.ResourceID
&& "Atomic resource ID "
2012 "cannot be non-zero!");
2013 (*miBegin
)->getOperand((*miBegin
)->getNumOperands()-1)
2014 .setImm(curRes
.bits
.ResourceID
);
2016 } else if (STM
->device()->usesHardware(AMDILDeviceInfo::RegionMem
)
2017 && PT
->getAddressSpace() == AMDILAS::REGION_ADDRESS
) {
2018 // If hardware region mem is enabled, get the gds mem ID from
2019 // the device to use as the ResourceID
2020 curRes
.bits
.ResourceID
= STM
->device()
2021 ->getResourceID(AMDILDevice::GDS_ID
);
2022 if (isAtomicInst(TM
.getInstrInfo(), *miBegin
)) {
2023 assert(curRes
.bits
.ResourceID
&& "Atomic resource ID "
2024 "cannot be non-zero!");
2025 (*miBegin
)->getOperand((*miBegin
)->getNumOperands()-1)
2026 .setImm(curRes
.bits
.ResourceID
);
2028 } else if (STM
->device()->usesHardware(AMDILDeviceInfo::PrivateMem
)
2029 && PT
->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS
) {
2030 curRes
.bits
.ResourceID
= STM
->device()
2031 ->getResourceID(AMDILDevice::SCRATCH_ID
);
2032 } else if (!STM
->device()->isSupported(AMDILDeviceInfo::MultiUAV
)) {
2033 // If multi uav is enabled, then the resource ID is either the
2034 // number of write images that are available or the device
2035 // raw uav id if it is 11.
2036 if (STM
->device()->getResourceID(AMDILDevice::RAW_UAV_ID
) >
2037 STM
->device()->getResourceID(AMDILDevice::ARENA_UAV_ID
)) {
2038 curRes
.bits
.ResourceID
= STM
->device()
2039 ->getResourceID(AMDILDevice::RAW_UAV_ID
);
2040 } else if (numWriteImages
!= OPENCL_MAX_WRITE_IMAGES
) {
2041 if (STM
->device()->getResourceID(AMDILDevice::RAW_UAV_ID
)
2043 curRes
.bits
.ResourceID
= numWriteImages
;
2045 curRes
.bits
.ResourceID
= STM
->device()
2046 ->getResourceID(AMDILDevice::RAW_UAV_ID
);
2050 dbgs() << __LINE__
<< ": Setting byte store bit on instruction: ";
2051 (*miBegin
)->print(dbgs());
2053 curRes
.bits
.ByteStore
= 1;
2054 curRes
.bits
.ResourceID
= STM
->device()
2055 ->getResourceID(AMDILDevice::ARENA_UAV_ID
);
2057 if (isAtomicInst(TM
.getInstrInfo(), *miBegin
)) {
2058 (*miBegin
)->getOperand((*miBegin
)->getNumOperands()-1)
2059 .setImm(curRes
.bits
.ResourceID
);
2060 if (curRes
.bits
.ResourceID
2061 == STM
->device()->getResourceID(AMDILDevice::ARENA_UAV_ID
)) {
2062 assert(0 && "Found an atomic instruction that has "
2063 "an arena uav id!");
2066 KM
->setUAVID(*siBegin
, curRes
.bits
.ResourceID
);
2068 mMFI
= (*miBegin
)->getParent()->getParent()
2069 ->getInfo
<AMDILMachineFunctionInfo
>();
2071 mMFI
->uav_insert(curRes
.bits
.ResourceID
);
2073 setAsmPrinterFlags(*miBegin
, curRes
);
2080 annotateCacheableInstrs(
2082 CacheableInstrSet
&cacheableSet
,
2085 const AMDILSubtarget
*STM
= &TM
.getSubtarget
<AMDILSubtarget
>();
2086 // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager();
2088 CacheableInstrSet::iterator miBegin
, miEnd
;
2090 for (miBegin
= cacheableSet
.begin(),
2091 miEnd
= cacheableSet
.end();
2092 miBegin
!= miEnd
; ++miBegin
) {
2094 dbgs() << "Annotating instr as cacheable. Inst: ";
2097 AMDILAS::InstrResEnc curRes
;
2098 getAsmPrinterFlags(*miBegin
, curRes
);
2099 // If UAV11 is enabled, then we can enable cached reads.
2100 if (STM
->device()->getResourceID(AMDILDevice::RAW_UAV_ID
) == 11) {
2101 curRes
.bits
.CacheableRead
= 1;
2102 curRes
.bits
.ResourceID
= 11;
2103 setAsmPrinterFlags(*miBegin
, curRes
);
2108 // Annotate the instructions along various pointer paths. The paths that
2109 // are handled are the raw, byte and cacheable pointer paths.
2113 PtrIMap
&PtrToInstMap
,
2116 CacheableSet
&cacheablePtrs
,
2117 uint32_t numWriteImages
,
2121 if (PtrToInstMap
.empty()) {
2124 // First we can check the cacheable pointers
2125 annotateCacheablePtrs(TM
, PtrToInstMap
, cacheablePtrs
,
2126 bytePtrs
, numWriteImages
, mDebug
);
2128 // Next we annotate the byte pointers
2129 annotateBytePtrs(TM
, PtrToInstMap
, bytePtrs
, rawPtrs
, mDebug
);
2131 // Next we annotate the raw pointers
2132 annotateRawPtrs(TM
, PtrToInstMap
, rawPtrs
, bytePtrs
,
2133 numWriteImages
, mDebug
);
2135 // Allocate MultiUAV pointer ID's for the raw/conflict pointers.
2137 allocateMultiUAVPointers(
2138 MachineFunction
&MF
,
2139 const AMDILTargetMachine
*ATM
,
2140 PtrIMap
&PtrToInstMap
,
2142 ConflictSet
&conflictPtrs
,
2143 CacheableSet
&cacheablePtrs
,
2144 uint32_t numWriteImages
,
2147 if (PtrToInstMap
.empty()) {
2150 AMDILMachineFunctionInfo
*mMFI
= MF
.getInfo
<AMDILMachineFunctionInfo
>();
2151 uint32_t curUAV
= numWriteImages
;
2152 bool increment
= true;
2153 const AMDILSubtarget
*STM
2154 = ATM
->getSubtargetImpl();
2155 // If the RAW_UAV_ID is a value that is larger than the max number of write
2156 // images, then we use that UAV ID.
2157 if (numWriteImages
>= OPENCL_MAX_WRITE_IMAGES
) {
2158 curUAV
= STM
->device()->getResourceID(AMDILDevice::RAW_UAV_ID
);
2161 AMDILKernelManager
*KM
= STM
->getKernelManager();
2162 PtrSet::iterator siBegin
, siEnd
;
2163 std::vector
<MachineInstr
*>::iterator miBegin
, miEnd
;
2164 // First lets handle the raw pointers.
2165 for (siBegin
= rawPtrs
.begin(), siEnd
= rawPtrs
.end();
2166 siBegin
!= siEnd
; ++siBegin
) {
2167 assert((*siBegin
)->getType()->isPointerTy() && "We must be a pointer type "
2168 "to be processed at this point!");
2169 const PointerType
*PT
= dyn_cast
<PointerType
>((*siBegin
)->getType());
2170 if (conflictPtrs
.count(*siBegin
) || !PT
) {
2173 // We only want to process global address space pointers
2174 if (PT
->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS
) {
2175 if ((PT
->getAddressSpace() == AMDILAS::LOCAL_ADDRESS
2176 && STM
->device()->usesSoftware(AMDILDeviceInfo::LocalMem
))
2177 || (PT
->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS
2178 && STM
->device()->usesSoftware(AMDILDeviceInfo::ConstantMem
))
2179 || (PT
->getAddressSpace() == AMDILAS::REGION_ADDRESS
2180 && STM
->device()->usesSoftware(AMDILDeviceInfo::RegionMem
))) {
2181 // If we are using software emulated hardware features, then
2182 // we need to specify that they use the raw uav and not
2183 // zero-copy uav. The easiest way to do this is to assume they
2184 // conflict with another pointer. Any pointer that conflicts
2185 // with another pointer is assigned to the raw uav or the
2186 // arena uav if no raw uav exists.
2187 const PointerType
*PT
= dyn_cast
<PointerType
>((*siBegin
)->getType());
2189 conflictPtrs
.insert(*siBegin
);
2192 if (PT
->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS
) {
2193 if (STM
->device()->usesSoftware(AMDILDeviceInfo::PrivateMem
)) {
2194 const PointerType
*PT
= dyn_cast
<PointerType
>((*siBegin
)->getType());
2196 conflictPtrs
.insert(*siBegin
);
2200 dbgs() << "Scratch Pointer '" << (*siBegin
)->getName()
2201 << "' being assigned uav "<<
2202 STM
->device()->getResourceID(AMDILDevice::SCRATCH_ID
) << "\n";
2204 for (miBegin
= PtrToInstMap
[*siBegin
].begin(),
2205 miEnd
= PtrToInstMap
[*siBegin
].end();
2206 miBegin
!= miEnd
; ++miBegin
) {
2207 AMDILAS::InstrResEnc curRes
;
2208 getAsmPrinterFlags(*miBegin
, curRes
);
2209 curRes
.bits
.ResourceID
= STM
->device()
2210 ->getResourceID(AMDILDevice::SCRATCH_ID
);
2212 dbgs() << "Updated instruction to bitmask ";
2213 dbgs().write_hex(curRes
.u16all
);
2214 dbgs() << " with ResID " << curRes
.bits
.ResourceID
;
2215 dbgs() << ". Inst: ";
2218 setAsmPrinterFlags((*miBegin
), curRes
);
2219 KM
->setUAVID(*siBegin
, curRes
.bits
.ResourceID
);
2220 mMFI
->uav_insert(curRes
.bits
.ResourceID
);
2226 // If more than just UAV 11 is cacheable, then we can remove
2228 if (cacheablePtrs
.count(*siBegin
)) {
2230 dbgs() << "Raw Pointer '" << (*siBegin
)->getName()
2231 << "' is cacheable, not allocating a multi-uav for it!\n";
2236 dbgs() << "Raw Pointer '" << (*siBegin
)->getName()
2237 << "' being assigned uav " << curUAV
<< "\n";
2239 if (PtrToInstMap
[*siBegin
].empty()) {
2240 KM
->setUAVID(*siBegin
, curUAV
);
2241 mMFI
->uav_insert(curUAV
);
2243 // For all instructions here, we are going to set the new UAV to the curUAV
2244 // number and not the value that it currently is set to.
2245 for (miBegin
= PtrToInstMap
[*siBegin
].begin(),
2246 miEnd
= PtrToInstMap
[*siBegin
].end();
2247 miBegin
!= miEnd
; ++miBegin
) {
2248 AMDILAS::InstrResEnc curRes
;
2249 getAsmPrinterFlags(*miBegin
, curRes
);
2250 curRes
.bits
.ResourceID
= curUAV
;
2251 if (isAtomicInst(ATM
->getInstrInfo(), *miBegin
)) {
2252 (*miBegin
)->getOperand((*miBegin
)->getNumOperands()-1)
2253 .setImm(curRes
.bits
.ResourceID
);
2254 if (curRes
.bits
.ResourceID
2255 == STM
->device()->getResourceID(AMDILDevice::ARENA_UAV_ID
)) {
2256 assert(0 && "Found an atomic instruction that has "
2257 "an arena uav id!");
2260 if (curUAV
== STM
->device()->getResourceID(AMDILDevice::ARENA_UAV_ID
)) {
2262 dbgs() << __LINE__
<< ": Setting byte store bit on instruction: ";
2263 (*miBegin
)->print(dbgs());
2265 curRes
.bits
.ByteStore
= 1;
2266 curRes
.bits
.CacheableRead
= 0;
2269 dbgs() << "Updated instruction to bitmask ";
2270 dbgs().write_hex(curRes
.u16all
);
2271 dbgs() << " with ResID " << curRes
.bits
.ResourceID
;
2272 dbgs() << ". Inst: ";
2275 setAsmPrinterFlags(*miBegin
, curRes
);
2276 KM
->setUAVID(*siBegin
, curRes
.bits
.ResourceID
);
2277 mMFI
->uav_insert(curRes
.bits
.ResourceID
);
2279 // If we make it here, we can increment the uav counter if we are less
2280 // than the max write image count. Otherwise we set it to the default
2281 // UAV and leave it.
2282 if (increment
&& curUAV
< (OPENCL_MAX_WRITE_IMAGES
- 1)) {
2285 curUAV
= STM
->device()->getResourceID(AMDILDevice::RAW_UAV_ID
);
2289 if (numWriteImages
== 8) {
2290 curUAV
= STM
->device()->getResourceID(AMDILDevice::RAW_UAV_ID
);
2292 // Now lets handle the conflict pointers
2293 for (siBegin
= conflictPtrs
.begin(), siEnd
= conflictPtrs
.end();
2294 siBegin
!= siEnd
; ++siBegin
) {
2295 assert((*siBegin
)->getType()->isPointerTy() && "We must be a pointer type "
2296 "to be processed at this point!");
2297 const PointerType
*PT
= dyn_cast
<PointerType
>((*siBegin
)->getType());
2298 // We only want to process global address space pointers
2299 if (!PT
|| PT
->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS
) {
2303 dbgs() << "Conflict Pointer '" << (*siBegin
)->getName()
2304 << "' being assigned uav " << curUAV
<< "\n";
2306 if (PtrToInstMap
[*siBegin
].empty()) {
2307 KM
->setUAVID(*siBegin
, curUAV
);
2308 mMFI
->uav_insert(curUAV
);
2310 for (miBegin
= PtrToInstMap
[*siBegin
].begin(),
2311 miEnd
= PtrToInstMap
[*siBegin
].end();
2312 miBegin
!= miEnd
; ++miBegin
) {
2313 AMDILAS::InstrResEnc curRes
;
2314 getAsmPrinterFlags(*miBegin
, curRes
);
2315 curRes
.bits
.ResourceID
= curUAV
;
2316 if (isAtomicInst(ATM
->getInstrInfo(), *miBegin
)) {
2317 (*miBegin
)->getOperand((*miBegin
)->getNumOperands()-1)
2318 .setImm(curRes
.bits
.ResourceID
);
2319 if (curRes
.bits
.ResourceID
2320 == STM
->device()->getResourceID(AMDILDevice::ARENA_UAV_ID
)) {
2321 assert(0 && "Found an atomic instruction that has "
2322 "an arena uav id!");
2325 if (curUAV
== STM
->device()->getResourceID(AMDILDevice::ARENA_UAV_ID
)) {
2327 dbgs() << __LINE__
<< ": Setting byte store bit on instruction: ";
2328 (*miBegin
)->print(dbgs());
2330 curRes
.bits
.ByteStore
= 1;
2333 dbgs() << "Updated instruction to bitmask ";
2334 dbgs().write_hex(curRes
.u16all
);
2335 dbgs() << " with ResID " << curRes
.bits
.ResourceID
;
2336 dbgs() << ". Inst: ";
2339 setAsmPrinterFlags(*miBegin
, curRes
);
2340 KM
->setUAVID(*siBegin
, curRes
.bits
.ResourceID
);
2341 mMFI
->uav_insert(curRes
.bits
.ResourceID
);
2345 // The first thing we should do is to allocate the default
2346 // ID for each load/store/atomic instruction so that
2347 // it is correctly allocated. Everything else after this
2348 // is just an optimization to more efficiently allocate
2352 const AMDILTargetMachine
*ATM
,
2353 MachineFunction
&MF
,
2356 for (MachineFunction::iterator mfBegin
= MF
.begin(),
2357 mfEnd
= MF
.end(); mfBegin
!= mfEnd
; ++mfBegin
) {
2358 MachineBasicBlock
*MB
= mfBegin
;
2359 for (MachineBasicBlock::iterator mbb
= MB
->begin(), mbe
= MB
->end();
2360 mbb
!= mbe
; ++mbb
) {
2361 MachineInstr
*MI
= mbb
;
2362 if (isLoadInst(ATM
->getInstrInfo(), MI
)
2363 || isStoreInst(ATM
->getInstrInfo(), MI
)
2364 || isAtomicInst(ATM
->getInstrInfo(), MI
)) {
2365 AMDILAS::InstrResEnc curRes
;
2366 getAsmPrinterFlags(MI
, curRes
);
2367 allocateDefaultID(ATM
, curRes
, MI
, mDebug
);
2374 AMDILEGPointerManager::runOnMachineFunction(MachineFunction
&MF
)
2376 bool changed
= false;
2377 const AMDILTargetMachine
*ATM
2378 = reinterpret_cast<const AMDILTargetMachine
*>(&TM
);
2379 AMDILMachineFunctionInfo
*mMFI
=
2380 MF
.getInfo
<AMDILMachineFunctionInfo
>();
2382 dbgs() << getPassName() << "\n";
2383 dbgs() << MF
.getFunction()->getName() << "\n";
2386 // Start out by allocating the default ID's to all instructions in the
2388 allocateDefaultIDs(ATM
, MF
, mDebug
);
2390 // A set of all pointers are tracked in this map and
2391 // if multiple pointers are detected, they go to the same
2393 PtrIMap PtrToInstMap
;
2395 // All of the instructions that are loads, stores or pointer
2396 // conflicts are tracked in the map with a set of all values
2397 // that reference the instruction stored.
2398 InstPMap InstToPtrMap
;
2400 // In order to track across stack entries, we need a map between a
2401 // frame index and a pointer. That way when we load from a frame
2402 // index, we know what pointer was stored to the frame index.
2405 // Set of all the pointers that are byte pointers. Byte pointers
2406 // are required to have their instructions go to the arena.
2409 // Set of all the pointers that are cacheable. All of the cache pointers
2410 // are required to go to a raw uav and cannot go to arena.
2411 CacheableSet cacheablePtrs
;
2413 // Set of all the pointers that go into a raw buffer. A pointer can
2414 // exist in either rawPtrs or bytePtrs but not both.
2417 // Set of all the pointers that end up having a conflicting instruction
2418 // somewhere in the pointer path.
2419 ConflictSet conflictPtrs
;
2421 // Set of all pointers that are images
2424 // Set of all pointers that are counters
2427 // Set of all pointers that load from a constant pool
2430 // Mapping from BB to infomation about the cacheability of the
2431 // global load instructions in it.
2432 MBBCacheableMap bbCacheable
;
2434 // A set of load instructions that are cacheable
2435 // even if all the load instructions of the ptr are not.
2436 CacheableInstrSet cacheableSet
;
2438 // The lookup table holds all of the registers that
2439 // are used as we assign pointers values to them.
2440 // If two pointers collide on the lookup table, then
2441 // we assign them to the same UAV. If one of the
2442 // pointers is byte addressable, then we assign
2443 // them to arena, otherwise we assign them to raw.
2446 // First we need to go through all of the arguments and assign the
2447 // live in registers to the lookup table and the pointer mapping.
2448 uint32_t numWriteImages
= parseArguments(MF
, lookupTable
, ATM
,
2449 cacheablePtrs
, images
, counters
, mDebug
);
2451 // Lets do some error checking on the results of the parsing.
2452 if (counters
.size() > OPENCL_MAX_NUM_ATOMIC_COUNTERS
) {
2454 amd::CompilerErrorMessage
[INSUFFICIENT_COUNTER_RESOURCES
]);
2456 if (numWriteImages
> OPENCL_MAX_WRITE_IMAGES
2457 || (images
.size() - numWriteImages
> OPENCL_MAX_READ_IMAGES
)) {
2459 amd::CompilerErrorMessage
[INSUFFICIENT_IMAGE_RESOURCES
]);
2462 // Now lets parse all of the instructions and update our
2464 parseFunction(this, ATM
, MF
, InstToPtrMap
, PtrToInstMap
,
2465 FIToPtrMap
, lookupTable
, bytePtrs
, conflictPtrs
, cpool
,
2466 bbCacheable
, mDebug
);
2468 // We need to go over our pointer map and find all the conflicting
2469 // pointers that have byte stores and put them in the bytePtr map.
2470 // All conflicting pointers that don't have byte stores go into
2472 detectConflictingPointers(ATM
, InstToPtrMap
, bytePtrs
, rawPtrs
,
2473 conflictPtrs
, mDebug
);
2475 // The next step is to detect whether the pointer should be added to
2476 // the fully cacheable set or not. A pointer is marked as cacheable if
2477 // no store instruction exists.
2478 detectFullyCacheablePointers(ATM
, PtrToInstMap
, rawPtrs
,
2479 cacheablePtrs
, conflictPtrs
, mDebug
);
2481 // Disable partially cacheable for now when multiUAV is on.
2482 // SC versions before SC139 have a bug that generates incorrect
2483 // addressing for some cached accesses.
2484 if (!ATM
->getSubtargetImpl()
2485 ->device()->isSupported(AMDILDeviceInfo::MultiUAV
) &&
2486 ATM
->getSubtargetImpl()->calVersion() >= CAL_VERSION_SC_139
) {
2487 // Now we take the set of loads that have no reachable stores and
2488 // create a list of additional instructions (those that aren't already
2489 // in a cacheablePtr set) that are safe to mark as cacheable.
2490 detectCacheableInstrs(bbCacheable
, InstToPtrMap
, cacheablePtrs
,
2491 bytePtrs
, cacheableSet
, mDebug
);
2493 // Annotate the additional instructions computed above as cacheable.
2494 // Note that this should not touch any instructions annotated in
2496 annotateCacheableInstrs(TM
, cacheableSet
, mDebug
);
2499 // Now that we have detected everything we need to detect, lets go through an
2500 // annotate the instructions along the pointer path for each of the
2501 // various pointer types.
2502 annotatePtrPath(TM
, PtrToInstMap
, rawPtrs
, bytePtrs
,
2503 cacheablePtrs
, numWriteImages
, mDebug
);
2505 // Annotate the atomic counter path if any exists.
2506 annotateAppendPtrs(TM
, PtrToInstMap
, counters
, mDebug
);
2508 // If we support MultiUAV, then we need to determine how
2509 // many write images exist so that way we know how many UAV are
2510 // left to allocate to buffers.
2511 if (ATM
->getSubtargetImpl()
2512 ->device()->isSupported(AMDILDeviceInfo::MultiUAV
)) {
2513 // We now have (OPENCL_MAX_WRITE_IMAGES - numPtrs) buffers open for
2514 // multi-uav allocation.
2515 allocateMultiUAVPointers(MF
, ATM
, PtrToInstMap
, rawPtrs
,
2516 conflictPtrs
, cacheablePtrs
, numWriteImages
, mDebug
);
2519 // The last step is to detect if we have any alias constant pool operations.
2520 // This is not likely, but does happen on occasion with double precision
2522 detectAliasedCPoolOps(TM
, cpool
, mDebug
);
2524 dumpPointers(bytePtrs
, "Byte Store Ptrs");
2525 dumpPointers(rawPtrs
, "Raw Ptrs");
2526 dumpPointers(cacheablePtrs
, "Cache Load Ptrs");
2527 dumpPointers(counters
, "Atomic Counters");
2528 dumpPointers(images
, "Images");
2533 // The default pointer manager just assigns the default ID's to
2534 // each load/store instruction and does nothing else. This is
2535 // the pointer manager for the 7XX series of cards.
2537 AMDILPointerManager::runOnMachineFunction(MachineFunction
&MF
)
2539 bool changed
= false;
2540 const AMDILTargetMachine
*ATM
2541 = reinterpret_cast<const AMDILTargetMachine
*>(&TM
);
2543 dbgs() << getPassName() << "\n";
2544 dbgs() << MF
.getFunction()->getName() << "\n";
2547 // On the 7XX we don't have to do any special processing, so we
2548 // can just allocate the default ID and be done with it.
2549 allocateDefaultIDs(ATM
, MF
, mDebug
);