gpu-compute,mem-ruby: Refactor GPU coalescer
[gem5.git] / src / gpu-compute / gpu_static_inst.hh
1 /*
2 * Copyright (c) 2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #ifndef __GPU_STATIC_INST_HH__
35 #define __GPU_STATIC_INST_HH__
36
37 /*
38 * @file gpu_static_inst.hh
39 *
40 * Defines the base class representing static instructions for the GPU. The
41 * instructions are "static" because they contain no dynamic instruction
42 * information. GPUStaticInst corresponds to the StaticInst class for the CPU
43 * models.
44 */
45
46 #include <cstdint>
47 #include <string>
48
49 #include "enums/GPUStaticInstFlags.hh"
50 #include "enums/StorageClassType.hh"
51 #include "gpu-compute/gpu_dyn_inst.hh"
52 #include "gpu-compute/misc.hh"
53
54 class BaseOperand;
55 class BaseRegOperand;
56 class Wavefront;
57
58 class GPUStaticInst : public GPUStaticInstFlags
59 {
60 public:
61 GPUStaticInst(const std::string &opcode);
62 void instAddr(int inst_addr) { _instAddr = inst_addr; }
63 int instAddr() const { return _instAddr; }
64 int nextInstAddr() const { return _instAddr + instSize(); }
65
66 void instNum(int num) { _instNum = num; }
67
68 int instNum() { return _instNum; }
69
70 void ipdInstNum(int num) { _ipdInstNum = num; }
71
72 int ipdInstNum() const { return _ipdInstNum; }
73
74 virtual void execute(GPUDynInstPtr gpuDynInst) = 0;
75 virtual void generateDisassembly() = 0;
76 const std::string& disassemble();
77 virtual int getNumOperands() = 0;
78 virtual bool isCondRegister(int operandIndex) = 0;
79 virtual bool isScalarRegister(int operandIndex) = 0;
80 virtual bool isVectorRegister(int operandIndex) = 0;
81 virtual bool isSrcOperand(int operandIndex) = 0;
82 virtual bool isDstOperand(int operandIndex) = 0;
83 virtual int getOperandSize(int operandIndex) = 0;
84
85 virtual int getRegisterIndex(int operandIndex,
86 GPUDynInstPtr gpuDynInst) = 0;
87
88 virtual int numDstRegOperands() = 0;
89 virtual int numSrcRegOperands() = 0;
90
91 virtual bool isValid() const = 0;
92
93 bool isALU() const { return _flags[ALU]; }
94 bool isBranch() const { return _flags[Branch]; }
95 bool isNop() const { return _flags[Nop]; }
96 bool isReturn() const { return _flags[Return]; }
97
98 bool
99 isUnconditionalJump() const
100 {
101 return _flags[UnconditionalJump];
102 }
103
104 bool isSpecialOp() const { return _flags[SpecialOp]; }
105 bool isWaitcnt() const { return _flags[Waitcnt]; }
106
107 bool isBarrier() const { return _flags[MemBarrier]; }
108 bool isMemFence() const { return _flags[MemFence]; }
109 bool isMemRef() const { return _flags[MemoryRef]; }
110 bool isFlat() const { return _flags[Flat]; }
111 bool isLoad() const { return _flags[Load]; }
112 bool isStore() const { return _flags[Store]; }
113
114 bool
115 isAtomic() const
116 {
117 return _flags[AtomicReturn] || _flags[AtomicNoReturn];
118 }
119
120 bool isAtomicNoRet() const { return _flags[AtomicNoReturn]; }
121 bool isAtomicRet() const { return _flags[AtomicReturn]; }
122
123 bool isScalar() const { return _flags[Scalar]; }
124 bool readsSCC() const { return _flags[ReadsSCC]; }
125 bool writesSCC() const { return _flags[WritesSCC]; }
126 bool readsVCC() const { return _flags[ReadsVCC]; }
127 bool writesVCC() const { return _flags[WritesVCC]; }
128
129 bool isAtomicAnd() const { return _flags[AtomicAnd]; }
130 bool isAtomicOr() const { return _flags[AtomicOr]; }
131 bool isAtomicXor() const { return _flags[AtomicXor]; }
132 bool isAtomicCAS() const { return _flags[AtomicCAS]; }
133 bool isAtomicExch() const { return _flags[AtomicExch]; }
134 bool isAtomicAdd() const { return _flags[AtomicAdd]; }
135 bool isAtomicSub() const { return _flags[AtomicSub]; }
136 bool isAtomicInc() const { return _flags[AtomicInc]; }
137 bool isAtomicDec() const { return _flags[AtomicDec]; }
138 bool isAtomicMax() const { return _flags[AtomicMax]; }
139 bool isAtomicMin() const { return _flags[AtomicMin]; }
140
141 bool
142 isArgLoad() const
143 {
144 return (_flags[KernArgSegment] || _flags[ArgSegment]) && _flags[Load];
145 }
146
147 bool
148 isGlobalMem() const
149 {
150 return _flags[MemoryRef] && (_flags[GlobalSegment] ||
151 _flags[PrivateSegment] || _flags[ReadOnlySegment] ||
152 _flags[SpillSegment]);
153 }
154
155 bool
156 isLocalMem() const
157 {
158 return _flags[MemoryRef] && _flags[GroupSegment];
159 }
160
161 bool isArgSeg() const { return _flags[ArgSegment]; }
162 bool isGlobalSeg() const { return _flags[GlobalSegment]; }
163 bool isGroupSeg() const { return _flags[GroupSegment]; }
164 bool isKernArgSeg() const { return _flags[KernArgSegment]; }
165 bool isPrivateSeg() const { return _flags[PrivateSegment]; }
166 bool isReadOnlySeg() const { return _flags[ReadOnlySegment]; }
167 bool isSpillSeg() const { return _flags[SpillSegment]; }
168
169 bool isWorkitemScope() const { return _flags[WorkitemScope]; }
170 bool isWavefrontScope() const { return _flags[WavefrontScope]; }
171 bool isWorkgroupScope() const { return _flags[WorkgroupScope]; }
172 bool isDeviceScope() const { return _flags[DeviceScope]; }
173 bool isSystemScope() const { return _flags[SystemScope]; }
174 bool isNoScope() const { return _flags[NoScope]; }
175
176 bool isRelaxedOrder() const { return _flags[RelaxedOrder]; }
177 bool isAcquire() const { return _flags[Acquire]; }
178 bool isRelease() const { return _flags[Release]; }
179 bool isAcquireRelease() const { return _flags[AcquireRelease]; }
180 bool isNoOrder() const { return _flags[NoOrder]; }
181
182 /**
183 * Coherence domain of a memory instruction. Only valid for
184 * machine ISA. The coherence domain specifies where it is
185 * possible to perform memory synchronization, e.g., acquire
186 * or release, from the shader kernel.
187 *
188 * isGloballyCoherent(): returns true if kernel is sharing memory
189 * with other work-items on the same device (GPU)
190 *
191 * isSystemCoherent(): returns true if kernel is sharing memory
192 * with other work-items on a different device (GPU) or the host (CPU)
193 */
194 bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
195 bool isSystemCoherent() const { return _flags[SystemCoherent]; }
196
197 virtual int instSize() const = 0;
198
199 // only used for memory instructions
200 virtual void
201 initiateAcc(GPUDynInstPtr gpuDynInst)
202 {
203 fatal("calling initiateAcc() on a non-memory instruction.\n");
204 }
205
206 // only used for memory instructions
207 virtual void
208 completeAcc(GPUDynInstPtr gpuDynInst)
209 {
210 fatal("calling completeAcc() on a non-memory instruction.\n");
211 }
212
213 virtual uint32_t getTargetPc() { return 0; }
214
215 static uint64_t dynamic_id_count;
216
217 // For flat memory accesses
218 Enums::StorageClassType executed_as;
219
220 void setFlag(Flags flag) { _flags[flag] = true; }
221
222 virtual void
223 execLdAcq(GPUDynInstPtr gpuDynInst)
224 {
225 fatal("calling execLdAcq() on a non-load instruction.\n");
226 }
227
228 virtual void
229 execSt(GPUDynInstPtr gpuDynInst)
230 {
231 fatal("calling execLdAcq() on a non-load instruction.\n");
232 }
233
234 virtual void
235 execAtomic(GPUDynInstPtr gpuDynInst)
236 {
237 fatal("calling execAtomic() on a non-atomic instruction.\n");
238 }
239
240 virtual void
241 execAtomicAcq(GPUDynInstPtr gpuDynInst)
242 {
243 fatal("calling execAtomicAcq() on a non-atomic instruction.\n");
244 }
245
246 protected:
247 const std::string opcode;
248 std::string disassembly;
249 int _instNum;
250 int _instAddr;
251 /**
252 * Identifier of the immediate post-dominator instruction.
253 */
254 int _ipdInstNum;
255
256 std::bitset<Num_Flags> _flags;
257 };
258
259 class KernelLaunchStaticInst : public GPUStaticInst
260 {
261 public:
262 KernelLaunchStaticInst() : GPUStaticInst("kernel_launch")
263 {
264 setFlag(Nop);
265 setFlag(Scalar);
266 setFlag(Acquire);
267 setFlag(SystemScope);
268 setFlag(GlobalSegment);
269 }
270
271 void
272 execute(GPUDynInstPtr gpuDynInst) override
273 {
274 fatal("kernel launch instruction should not be executed\n");
275 }
276
277 void
278 generateDisassembly() override
279 {
280 disassembly = opcode;
281 }
282
283 int getNumOperands() override { return 0; }
284 bool isCondRegister(int operandIndex) override { return false; }
285 bool isScalarRegister(int operandIndex) override { return false; }
286 bool isVectorRegister(int operandIndex) override { return false; }
287 bool isSrcOperand(int operandIndex) override { return false; }
288 bool isDstOperand(int operandIndex) override { return false; }
289 int getOperandSize(int operandIndex) override { return 0; }
290
291 int
292 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
293 {
294 return 0;
295 }
296
297 int numDstRegOperands() override { return 0; }
298 int numSrcRegOperands() override { return 0; }
299 bool isValid() const override { return true; }
300 int instSize() const override { return 0; }
301 };
302
303 #endif // __GPU_STATIC_INST_HH__