2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
33 * Author: Steve Reinhardt
36 #ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__
37 #define __ARCH_HSAIL_INSTS_BRANCH_HH__
39 #include "arch/hsail/insts/gpu_static_inst.hh"
40 #include "arch/hsail/operand.hh"
41 #include "gpu-compute/gpu_dyn_inst.hh"
42 #include "gpu-compute/wavefront.hh"
47 // The main difference between a direct branch and an indirect branch
48 // is whether the target is a register or a label, so we can share a
49 // lot of code if we template the base implementation on that type.
50 template<typename TargetType>
51 class BrnInstBase : public HsailGPUStaticInst
54 void generateDisassembly() override;
56 Brig::BrigWidth8_t width;
59 BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
60 : HsailGPUStaticInst(obj, "brn")
63 setFlag(UnconditionalJump);
64 width = ((Brig::BrigInstBr*)ib)->width;
65 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
66 target.init(op_offs, obj);
69 uint32_t getTargetPc() override { return target.getTarget(0, 0); }
71 bool isVectorRegister(int operandIndex) override {
72 assert(operandIndex >= 0 && operandIndex < getNumOperands());
73 return target.isVectorRegister();
75 bool isCondRegister(int operandIndex) override {
76 assert(operandIndex >= 0 && operandIndex < getNumOperands());
77 return target.isCondRegister();
79 bool isScalarRegister(int operandIndex) override {
80 assert(operandIndex >= 0 && operandIndex < getNumOperands());
81 return target.isScalarRegister();
84 bool isSrcOperand(int operandIndex) override {
85 assert(operandIndex >= 0 && operandIndex < getNumOperands());
89 bool isDstOperand(int operandIndex) override {
93 int getOperandSize(int operandIndex) override {
94 assert(operandIndex >= 0 && operandIndex < getNumOperands());
95 return target.opSize();
99 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
101 assert(operandIndex >= 0 && operandIndex < getNumOperands());
102 return target.regIndex();
105 int getNumOperands() override {
109 void execute(GPUDynInstPtr gpuDynInst) override;
112 template<typename TargetType>
114 BrnInstBase<TargetType>::generateDisassembly()
116 std::string widthClause;
119 widthClause = csprintf("_width(%d)", width);
122 disassembly = csprintf("%s%s %s", opcode, widthClause,
123 target.disassemble());
126 template<typename TargetType>
128 BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
130 Wavefront *w = gpuDynInst->wavefront();
132 if (getTargetPc() == w->rpc()) {
133 w->popFromReconvergenceStack();
135 // Rpc and execution mask remain the same
136 w->pc(getTargetPc());
140 class BrnDirectInst : public BrnInstBase<LabelOperand>
143 BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
144 : BrnInstBase<LabelOperand>(ib, obj)
147 int numSrcRegOperands() { return 0; }
148 int numDstRegOperands() { return 0; }
151 class BrnIndirectInst : public BrnInstBase<SRegOperand>
154 BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
155 : BrnInstBase<SRegOperand>(ib, obj)
158 int numSrcRegOperands() { return target.isVectorRegister(); }
159 int numDstRegOperands() { return 0; }
162 GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib,
163 const BrigObject *obj);
165 template<typename TargetType>
166 class CbrInstBase : public HsailGPUStaticInst
169 void generateDisassembly() override;
171 Brig::BrigWidth8_t width;
175 CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
176 : HsailGPUStaticInst(obj, "cbr")
179 width = ((Brig::BrigInstBr *)ib)->width;
180 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
181 cond.init(op_offs, obj);
182 op_offs = obj->getOperandPtr(ib->operands, 1);
183 target.init(op_offs, obj);
186 uint32_t getTargetPc() override { return target.getTarget(0, 0); }
188 void execute(GPUDynInstPtr gpuDynInst) override;
189 // Assumption: Target is operand 0, Condition Register is operand 1
190 bool isVectorRegister(int operandIndex) override {
191 assert(operandIndex >= 0 && operandIndex < getNumOperands());
193 return target.isVectorRegister();
197 bool isCondRegister(int operandIndex) override {
198 assert(operandIndex >= 0 && operandIndex < getNumOperands());
200 return target.isCondRegister();
204 bool isScalarRegister(int operandIndex) override {
205 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
207 return target.isScalarRegister();
211 bool isSrcOperand(int operandIndex) override {
212 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
213 if (operandIndex == 0)
217 // both Condition Register and Target are source operands
218 bool isDstOperand(int operandIndex) override {
221 int getOperandSize(int operandIndex) override {
222 assert(operandIndex >= 0 && operandIndex < getNumOperands());
224 return target.opSize();
229 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
231 assert(operandIndex >= 0 && operandIndex < getNumOperands());
233 return target.regIndex();
238 // Operands = Target, Condition Register
239 int getNumOperands() override {
244 template<typename TargetType>
246 CbrInstBase<TargetType>::generateDisassembly()
248 std::string widthClause;
251 widthClause = csprintf("_width(%d)", width);
254 disassembly = csprintf("%s%s %s,%s", opcode, widthClause,
255 cond.disassemble(), target.disassemble());
258 template<typename TargetType>
260 CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
262 Wavefront *w = gpuDynInst->wavefront();
264 const uint32_t curr_pc M5_VAR_USED = w->pc();
265 const uint32_t curr_rpc = w->rpc();
266 const VectorMask curr_mask = w->execMask();
269 * TODO: can we move this pop outside the instruction, and
270 * into the wavefront?
272 w->popFromReconvergenceStack();
274 // immediate post-dominator instruction
275 const uint32_t rpc = static_cast<uint32_t>(ipdInstNum());
276 if (curr_rpc != rpc) {
277 w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask);
281 const uint32_t true_pc = getTargetPc();
282 VectorMask true_mask;
283 for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
284 true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
288 const uint32_t false_pc = nextInstAddr();
289 assert(true_pc != false_pc);
290 if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
291 VectorMask false_mask = curr_mask & ~true_mask;
292 w->pushToReconvergenceStack(false_pc, rpc, false_mask);
295 if (true_pc != rpc && true_mask.count()) {
296 w->pushToReconvergenceStack(true_pc, rpc, true_mask);
298 assert(w->pc() != curr_pc);
302 class CbrDirectInst : public CbrInstBase<LabelOperand>
305 CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
306 : CbrInstBase<LabelOperand>(ib, obj)
309 // the source operand of a conditional branch is a Condition
310 // Register which is not stored in the VRF
311 // so we do not count it as a source-register operand
312 // even though, formally, it is one.
313 int numSrcRegOperands() { return 0; }
314 int numDstRegOperands() { return 0; }
317 class CbrIndirectInst : public CbrInstBase<SRegOperand>
320 CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
321 : CbrInstBase<SRegOperand>(ib, obj)
324 // one source operand of the conditional indirect branch is a Condition
325 // register which is not stored in the VRF so we do not count it
326 // as a source-register operand even though, formally, it is one.
327 int numSrcRegOperands() { return target.isVectorRegister(); }
328 int numDstRegOperands() { return 0; }
331 GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib,
332 const BrigObject *obj);
334 template<typename TargetType>
335 class BrInstBase : public HsailGPUStaticInst
338 void generateDisassembly() override;
340 ImmOperand<uint32_t> width;
343 BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
344 : HsailGPUStaticInst(obj, "br")
347 setFlag(UnconditionalJump);
348 width.init(((Brig::BrigInstBr *)ib)->width, obj);
349 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
350 target.init(op_offs, obj);
353 uint32_t getTargetPc() override { return target.getTarget(0, 0); }
355 void execute(GPUDynInstPtr gpuDynInst) override;
356 bool isVectorRegister(int operandIndex) override {
357 assert(operandIndex >= 0 && operandIndex < getNumOperands());
358 return target.isVectorRegister();
360 bool isCondRegister(int operandIndex) override {
361 assert(operandIndex >= 0 && operandIndex < getNumOperands());
362 return target.isCondRegister();
364 bool isScalarRegister(int operandIndex) override {
365 assert(operandIndex >= 0 && operandIndex < getNumOperands());
366 return target.isScalarRegister();
368 bool isSrcOperand(int operandIndex) override {
369 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
372 bool isDstOperand(int operandIndex) override { return false; }
373 int getOperandSize(int operandIndex) override {
374 assert(operandIndex >= 0 && operandIndex < getNumOperands());
375 return target.opSize();
378 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
380 assert(operandIndex >= 0 && operandIndex < getNumOperands());
381 return target.regIndex();
383 int getNumOperands() override { return 1; }
386 template<typename TargetType>
388 BrInstBase<TargetType>::generateDisassembly()
390 std::string widthClause;
392 if (width.bits != 1) {
393 widthClause = csprintf("_width(%d)", width.bits);
396 disassembly = csprintf("%s%s %s", opcode, widthClause,
397 target.disassemble());
400 template<typename TargetType>
402 BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
404 Wavefront *w = gpuDynInst->wavefront();
406 if (getTargetPc() == w->rpc()) {
407 w->popFromReconvergenceStack();
409 // Rpc and execution mask remain the same
410 w->pc(getTargetPc());
414 class BrDirectInst : public BrInstBase<LabelOperand>
417 BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
418 : BrInstBase<LabelOperand>(ib, obj)
422 int numSrcRegOperands() { return 0; }
423 int numDstRegOperands() { return 0; }
426 class BrIndirectInst : public BrInstBase<SRegOperand>
429 BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
430 : BrInstBase<SRegOperand>(ib, obj)
433 int numSrcRegOperands() { return target.isVectorRegister(); }
434 int numDstRegOperands() { return 0; }
437 GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib,
438 const BrigObject *obj);
439 } // namespace HsailISA
441 #endif // __ARCH_HSAIL_INSTS_BRANCH_HH__