79603f408f4a6f2d20f500d181355e0e5ad8bfe7
[gem5.git] / src / arch / hsail / insts / branch.hh
1 /*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36 #ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__
37 #define __ARCH_HSAIL_INSTS_BRANCH_HH__
38
39 #include "arch/hsail/insts/gpu_static_inst.hh"
40 #include "arch/hsail/operand.hh"
41 #include "gpu-compute/gpu_dyn_inst.hh"
42 #include "gpu-compute/wavefront.hh"
43
44 namespace HsailISA
45 {
46
47 // The main difference between a direct branch and an indirect branch
48 // is whether the target is a register or a label, so we can share a
49 // lot of code if we template the base implementation on that type.
50 template<typename TargetType>
51 class BrnInstBase : public HsailGPUStaticInst
52 {
53 public:
54 void generateDisassembly() override;
55
56 Brig::BrigWidth8_t width;
57 TargetType target;
58
59 BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
60 : HsailGPUStaticInst(obj, "brn")
61 {
62 setFlag(Branch);
63 setFlag(UnconditionalJump);
64 width = ((Brig::BrigInstBr*)ib)->width;
65 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
66 target.init(op_offs, obj);
67 }
68
69 uint32_t getTargetPc() override { return target.getTarget(0, 0); }
70
71 bool isVectorRegister(int operandIndex) override {
72 assert(operandIndex >= 0 && operandIndex < getNumOperands());
73 return target.isVectorRegister();
74 }
75 bool isCondRegister(int operandIndex) override {
76 assert(operandIndex >= 0 && operandIndex < getNumOperands());
77 return target.isCondRegister();
78 }
79 bool isScalarRegister(int operandIndex) override {
80 assert(operandIndex >= 0 && operandIndex < getNumOperands());
81 return target.isScalarRegister();
82 }
83
84 bool isSrcOperand(int operandIndex) override {
85 assert(operandIndex >= 0 && operandIndex < getNumOperands());
86 return true;
87 }
88
89 bool isDstOperand(int operandIndex) override {
90 return false;
91 }
92
93 int getOperandSize(int operandIndex) override {
94 assert(operandIndex >= 0 && operandIndex < getNumOperands());
95 return target.opSize();
96 }
97
98 int
99 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
100 {
101 assert(operandIndex >= 0 && operandIndex < getNumOperands());
102 return target.regIndex();
103 }
104
105 int getNumOperands() override {
106 return 1;
107 }
108
109 void execute(GPUDynInstPtr gpuDynInst) override;
110 };
111
112 template<typename TargetType>
113 void
114 BrnInstBase<TargetType>::generateDisassembly()
115 {
116 std::string widthClause;
117
118 if (width != 1) {
119 widthClause = csprintf("_width(%d)", width);
120 }
121
122 disassembly = csprintf("%s%s %s", opcode, widthClause,
123 target.disassemble());
124 }
125
126 template<typename TargetType>
127 void
128 BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
129 {
130 Wavefront *w = gpuDynInst->wavefront();
131
132 if (getTargetPc() == w->rpc()) {
133 w->popFromReconvergenceStack();
134 } else {
135 // Rpc and execution mask remain the same
136 w->pc(getTargetPc());
137 }
138 }
139
140 class BrnDirectInst : public BrnInstBase<LabelOperand>
141 {
142 public:
143 BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
144 : BrnInstBase<LabelOperand>(ib, obj)
145 {
146 }
147 int numSrcRegOperands() { return 0; }
148 int numDstRegOperands() { return 0; }
149 };
150
151 class BrnIndirectInst : public BrnInstBase<SRegOperand>
152 {
153 public:
154 BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
155 : BrnInstBase<SRegOperand>(ib, obj)
156 {
157 }
158 int numSrcRegOperands() { return target.isVectorRegister(); }
159 int numDstRegOperands() { return 0; }
160 };
161
162 GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib,
163 const BrigObject *obj);
164
165 template<typename TargetType>
166 class CbrInstBase : public HsailGPUStaticInst
167 {
168 public:
169 void generateDisassembly() override;
170
171 Brig::BrigWidth8_t width;
172 CRegOperand cond;
173 TargetType target;
174
175 CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
176 : HsailGPUStaticInst(obj, "cbr")
177 {
178 setFlag(Branch);
179 width = ((Brig::BrigInstBr *)ib)->width;
180 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
181 cond.init(op_offs, obj);
182 op_offs = obj->getOperandPtr(ib->operands, 1);
183 target.init(op_offs, obj);
184 }
185
186 uint32_t getTargetPc() override { return target.getTarget(0, 0); }
187
188 void execute(GPUDynInstPtr gpuDynInst) override;
189 // Assumption: Target is operand 0, Condition Register is operand 1
190 bool isVectorRegister(int operandIndex) override {
191 assert(operandIndex >= 0 && operandIndex < getNumOperands());
192 if (!operandIndex)
193 return target.isVectorRegister();
194 else
195 return false;
196 }
197 bool isCondRegister(int operandIndex) override {
198 assert(operandIndex >= 0 && operandIndex < getNumOperands());
199 if (!operandIndex)
200 return target.isCondRegister();
201 else
202 return true;
203 }
204 bool isScalarRegister(int operandIndex) override {
205 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
206 if (!operandIndex)
207 return target.isScalarRegister();
208 else
209 return false;
210 }
211 bool isSrcOperand(int operandIndex) override {
212 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
213 if (operandIndex == 0)
214 return true;
215 return false;
216 }
217 // both Condition Register and Target are source operands
218 bool isDstOperand(int operandIndex) override {
219 return false;
220 }
221 int getOperandSize(int operandIndex) override {
222 assert(operandIndex >= 0 && operandIndex < getNumOperands());
223 if (!operandIndex)
224 return target.opSize();
225 else
226 return 1;
227 }
228 int
229 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
230 {
231 assert(operandIndex >= 0 && operandIndex < getNumOperands());
232 if (!operandIndex)
233 return target.regIndex();
234 else
235 return -1;
236 }
237
238 // Operands = Target, Condition Register
239 int getNumOperands() override {
240 return 2;
241 }
242 };
243
244 template<typename TargetType>
245 void
246 CbrInstBase<TargetType>::generateDisassembly()
247 {
248 std::string widthClause;
249
250 if (width != 1) {
251 widthClause = csprintf("_width(%d)", width);
252 }
253
254 disassembly = csprintf("%s%s %s,%s", opcode, widthClause,
255 cond.disassemble(), target.disassemble());
256 }
257
258 template<typename TargetType>
259 void
260 CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
261 {
262 Wavefront *w = gpuDynInst->wavefront();
263
264 const uint32_t curr_pc M5_VAR_USED = w->pc();
265 const uint32_t curr_rpc = w->rpc();
266 const VectorMask curr_mask = w->execMask();
267
268 /**
269 * TODO: can we move this pop outside the instruction, and
270 * into the wavefront?
271 */
272 w->popFromReconvergenceStack();
273
274 // immediate post-dominator instruction
275 const uint32_t rpc = static_cast<uint32_t>(ipdInstNum());
276 if (curr_rpc != rpc) {
277 w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask);
278 }
279
280 // taken branch
281 const uint32_t true_pc = getTargetPc();
282 VectorMask true_mask;
283 for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
284 true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
285 }
286
287 // not taken branch
288 const uint32_t false_pc = nextInstAddr();
289 assert(true_pc != false_pc);
290 if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
291 VectorMask false_mask = curr_mask & ~true_mask;
292 w->pushToReconvergenceStack(false_pc, rpc, false_mask);
293 }
294
295 if (true_pc != rpc && true_mask.count()) {
296 w->pushToReconvergenceStack(true_pc, rpc, true_mask);
297 }
298 assert(w->pc() != curr_pc);
299 }
300
301
302 class CbrDirectInst : public CbrInstBase<LabelOperand>
303 {
304 public:
305 CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
306 : CbrInstBase<LabelOperand>(ib, obj)
307 {
308 }
309 // the source operand of a conditional branch is a Condition
310 // Register which is not stored in the VRF
311 // so we do not count it as a source-register operand
312 // even though, formally, it is one.
313 int numSrcRegOperands() { return 0; }
314 int numDstRegOperands() { return 0; }
315 };
316
317 class CbrIndirectInst : public CbrInstBase<SRegOperand>
318 {
319 public:
320 CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
321 : CbrInstBase<SRegOperand>(ib, obj)
322 {
323 }
324 // one source operand of the conditional indirect branch is a Condition
325 // register which is not stored in the VRF so we do not count it
326 // as a source-register operand even though, formally, it is one.
327 int numSrcRegOperands() { return target.isVectorRegister(); }
328 int numDstRegOperands() { return 0; }
329 };
330
331 GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib,
332 const BrigObject *obj);
333
334 template<typename TargetType>
335 class BrInstBase : public HsailGPUStaticInst
336 {
337 public:
338 void generateDisassembly() override;
339
340 ImmOperand<uint32_t> width;
341 TargetType target;
342
343 BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
344 : HsailGPUStaticInst(obj, "br")
345 {
346 setFlag(Branch);
347 setFlag(UnconditionalJump);
348 width.init(((Brig::BrigInstBr *)ib)->width, obj);
349 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
350 target.init(op_offs, obj);
351 }
352
353 uint32_t getTargetPc() override { return target.getTarget(0, 0); }
354
355 void execute(GPUDynInstPtr gpuDynInst) override;
356 bool isVectorRegister(int operandIndex) override {
357 assert(operandIndex >= 0 && operandIndex < getNumOperands());
358 return target.isVectorRegister();
359 }
360 bool isCondRegister(int operandIndex) override {
361 assert(operandIndex >= 0 && operandIndex < getNumOperands());
362 return target.isCondRegister();
363 }
364 bool isScalarRegister(int operandIndex) override {
365 assert(operandIndex >= 0 && operandIndex < getNumOperands());
366 return target.isScalarRegister();
367 }
368 bool isSrcOperand(int operandIndex) override {
369 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
370 return true;
371 }
372 bool isDstOperand(int operandIndex) override { return false; }
373 int getOperandSize(int operandIndex) override {
374 assert(operandIndex >= 0 && operandIndex < getNumOperands());
375 return target.opSize();
376 }
377 int
378 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
379 {
380 assert(operandIndex >= 0 && operandIndex < getNumOperands());
381 return target.regIndex();
382 }
383 int getNumOperands() override { return 1; }
384 };
385
386 template<typename TargetType>
387 void
388 BrInstBase<TargetType>::generateDisassembly()
389 {
390 std::string widthClause;
391
392 if (width.bits != 1) {
393 widthClause = csprintf("_width(%d)", width.bits);
394 }
395
396 disassembly = csprintf("%s%s %s", opcode, widthClause,
397 target.disassemble());
398 }
399
400 template<typename TargetType>
401 void
402 BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
403 {
404 Wavefront *w = gpuDynInst->wavefront();
405
406 if (getTargetPc() == w->rpc()) {
407 w->popFromReconvergenceStack();
408 } else {
409 // Rpc and execution mask remain the same
410 w->pc(getTargetPc());
411 }
412 }
413
414 class BrDirectInst : public BrInstBase<LabelOperand>
415 {
416 public:
417 BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
418 : BrInstBase<LabelOperand>(ib, obj)
419 {
420 }
421
422 int numSrcRegOperands() { return 0; }
423 int numDstRegOperands() { return 0; }
424 };
425
426 class BrIndirectInst : public BrInstBase<SRegOperand>
427 {
428 public:
429 BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
430 : BrInstBase<SRegOperand>(ib, obj)
431 {
432 }
433 int numSrcRegOperands() { return target.isVectorRegister(); }
434 int numDstRegOperands() { return 0; }
435 };
436
437 GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib,
438 const BrigObject *obj);
439 } // namespace HsailISA
440
441 #endif // __ARCH_HSAIL_INSTS_BRANCH_HH__