gpu-compute: AMD's baseline GPU model
[gem5.git] / src / arch / hsail / insts / mem.hh
1 /*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36 #ifndef __ARCH_HSAIL_INSTS_MEM_HH__
37 #define __ARCH_HSAIL_INSTS_MEM_HH__
38
39 #include "arch/hsail/insts/decl.hh"
40 #include "arch/hsail/insts/gpu_static_inst.hh"
41 #include "arch/hsail/operand.hh"
42
43 namespace HsailISA
44 {
45 class MemInst
46 {
47 public:
48 MemInst() : size(0), addr_operand(nullptr) { }
49
50 MemInst(Enums::MemType m_type)
51 {
52 if (m_type == Enums::M_U64 ||
53 m_type == Enums::M_S64 ||
54 m_type == Enums::M_F64) {
55 size = 8;
56 } else if (m_type == Enums::M_U32 ||
57 m_type == Enums::M_S32 ||
58 m_type == Enums::M_F32) {
59 size = 4;
60 } else if (m_type == Enums::M_U16 ||
61 m_type == Enums::M_S16 ||
62 m_type == Enums::M_F16) {
63 size = 2;
64 } else {
65 size = 1;
66 }
67
68 addr_operand = nullptr;
69 }
70
71 void
72 init_addr(AddrOperandBase *_addr_operand)
73 {
74 addr_operand = _addr_operand;
75 }
76
77 private:
78 int size;
79 AddrOperandBase *addr_operand;
80
81 public:
82 int getMemOperandSize() { return size; }
83 AddrOperandBase *getAddressOperand() { return addr_operand; }
84 };
85
86 template<typename DestOperandType, typename AddrOperandType>
87 class LdaInstBase : public HsailGPUStaticInst
88 {
89 public:
90 typename DestOperandType::DestOperand dest;
91 AddrOperandType addr;
92
93 LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
94 const char *_opcode)
95 : HsailGPUStaticInst(obj, _opcode)
96 {
97 using namespace Brig;
98
99 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
100 dest.init(op_offs, obj);
101 op_offs = obj->getOperandPtr(ib->operands, 1);
102 addr.init(op_offs, obj);
103 }
104
105 int numSrcRegOperands() { return(this->addr.isVectorRegister()); }
106 int numDstRegOperands() { return dest.isVectorRegister(); }
107 bool isVectorRegister(int operandIndex)
108 {
109 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
110 return((operandIndex == 0) ? dest.isVectorRegister() :
111 this->addr.isVectorRegister());
112 }
113 bool isCondRegister(int operandIndex)
114 {
115 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
116 return((operandIndex == 0) ? dest.isCondRegister() :
117 this->addr.isCondRegister());
118 }
119 bool isScalarRegister(int operandIndex)
120 {
121 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
122 return((operandIndex == 0) ? dest.isScalarRegister() :
123 this->addr.isScalarRegister());
124 }
125 bool isSrcOperand(int operandIndex)
126 {
127 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
128 if (operandIndex > 0)
129 return(this->addr.isVectorRegister());
130 return false;
131 }
132 bool isDstOperand(int operandIndex) {
133 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
134 return(operandIndex == 0);
135 }
136 int getOperandSize(int operandIndex)
137 {
138 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
139 return((operandIndex == 0) ? dest.opSize() :
140 this->addr.opSize());
141 }
142 int getRegisterIndex(int operandIndex)
143 {
144 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
145 return((operandIndex == 0) ? dest.regIndex() :
146 this->addr.regIndex());
147 }
148 int getNumOperands()
149 {
150 if (this->addr.isVectorRegister())
151 return 2;
152 return 1;
153 }
154 };
155
156 template<typename DestDataType, typename AddrOperandType>
157 class LdaInst :
158 public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>,
159 public MemInst
160 {
161 public:
162 void generateDisassembly();
163
164 LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
165 const char *_opcode)
166 : LdaInstBase<typename DestDataType::OperandType,
167 AddrOperandType>(ib, obj, _opcode)
168 {
169 init_addr(&this->addr);
170 }
171
172 void execute(GPUDynInstPtr gpuDynInst);
173 };
174
175 template<typename DataType>
176 GPUStaticInst*
177 decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
178 {
179 unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
180 BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj);
181
182 if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
183 return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas");
184 } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
185 // V2/V4 not allowed
186 switch (regDataType.regKind) {
187 case Brig::BRIG_REGISTER_KIND_SINGLE:
188 return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas");
189 case Brig::BRIG_REGISTER_KIND_DOUBLE:
190 return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas");
191 default:
192 fatal("Bad ldas register operand type %d\n", regDataType.type);
193 }
194 } else {
195 fatal("Bad ldas register operand kind %d\n", regDataType.kind);
196 }
197 }
198
199 template<typename MemOperandType, typename DestOperandType,
200 typename AddrOperandType>
201 class LdInstBase : public HsailGPUStaticInst
202 {
203 public:
204 Brig::BrigWidth8_t width;
205 typename DestOperandType::DestOperand dest;
206 AddrOperandType addr;
207
208 Brig::BrigSegment segment;
209 Brig::BrigMemoryOrder memoryOrder;
210 Brig::BrigMemoryScope memoryScope;
211 unsigned int equivClass;
212 bool isArgLoad()
213 {
214 return segment == Brig::BRIG_SEGMENT_KERNARG ||
215 segment == Brig::BRIG_SEGMENT_ARG;
216 }
217 void
218 initLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
219 const char *_opcode)
220 {
221 using namespace Brig;
222
223 const BrigInstMem *ldst = (const BrigInstMem*)ib;
224
225 segment = (BrigSegment)ldst->segment;
226 memoryOrder = BRIG_MEMORY_ORDER_NONE;
227 memoryScope = BRIG_MEMORY_SCOPE_NONE;
228 equivClass = ldst->equivClass;
229
230 switch (segment) {
231 case BRIG_SEGMENT_GLOBAL:
232 o_type = Enums::OT_GLOBAL_READ;
233 break;
234
235 case BRIG_SEGMENT_GROUP:
236 o_type = Enums::OT_SHARED_READ;
237 break;
238
239 case BRIG_SEGMENT_PRIVATE:
240 o_type = Enums::OT_PRIVATE_READ;
241 break;
242
243 case BRIG_SEGMENT_READONLY:
244 o_type = Enums::OT_READONLY_READ;
245 break;
246
247 case BRIG_SEGMENT_SPILL:
248 o_type = Enums::OT_SPILL_READ;
249 break;
250
251 case BRIG_SEGMENT_FLAT:
252 o_type = Enums::OT_FLAT_READ;
253 break;
254
255 case BRIG_SEGMENT_KERNARG:
256 o_type = Enums::OT_KERN_READ;
257 break;
258
259 case BRIG_SEGMENT_ARG:
260 o_type = Enums::OT_ARG;
261 break;
262
263 default:
264 panic("Ld: segment %d not supported\n", segment);
265 }
266
267 width = ldst->width;
268 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
269 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
270 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
271 dest.init(op_offs, obj);
272
273 op_offs = obj->getOperandPtr(ib->operands, 1);
274 addr.init(op_offs, obj);
275 }
276
277 void
278 initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
279 const char *_opcode)
280 {
281 using namespace Brig;
282
283 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
284
285 segment = (BrigSegment)at->segment;
286 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
287 memoryScope = (BrigMemoryScope)at->memoryScope;
288 equivClass = 0;
289
290 switch (segment) {
291 case BRIG_SEGMENT_GLOBAL:
292 o_type = Enums::OT_GLOBAL_READ;
293 break;
294
295 case BRIG_SEGMENT_GROUP:
296 o_type = Enums::OT_SHARED_READ;
297 break;
298
299 case BRIG_SEGMENT_PRIVATE:
300 o_type = Enums::OT_PRIVATE_READ;
301 break;
302
303 case BRIG_SEGMENT_READONLY:
304 o_type = Enums::OT_READONLY_READ;
305 break;
306
307 case BRIG_SEGMENT_SPILL:
308 o_type = Enums::OT_SPILL_READ;
309 break;
310
311 case BRIG_SEGMENT_FLAT:
312 o_type = Enums::OT_FLAT_READ;
313 break;
314
315 case BRIG_SEGMENT_KERNARG:
316 o_type = Enums::OT_KERN_READ;
317 break;
318
319 case BRIG_SEGMENT_ARG:
320 o_type = Enums::OT_ARG;
321 break;
322
323 default:
324 panic("Ld: segment %d not supported\n", segment);
325 }
326
327 width = BRIG_WIDTH_1;
328 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
329 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
330
331 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
332 dest.init(op_offs, obj);
333
334 op_offs = obj->getOperandPtr(ib->operands,1);
335 addr.init(op_offs, obj);
336 }
337
338 LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
339 const char *_opcode)
340 : HsailGPUStaticInst(obj, _opcode)
341 {
342 using namespace Brig;
343
344 if (ib->opcode == BRIG_OPCODE_LD) {
345 initLd(ib, obj, _opcode);
346 } else {
347 initAtomicLd(ib, obj, _opcode);
348 }
349 }
350
351 int numSrcRegOperands() { return(this->addr.isVectorRegister()); }
352 int numDstRegOperands() { return dest.isVectorRegister(); }
353 int getNumOperands()
354 {
355 if (this->addr.isVectorRegister())
356 return 2;
357 else
358 return 1;
359 }
360 bool isVectorRegister(int operandIndex)
361 {
362 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
363 return((operandIndex == 0) ? dest.isVectorRegister() :
364 this->addr.isVectorRegister());
365 }
366 bool isCondRegister(int operandIndex)
367 {
368 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
369 return((operandIndex == 0) ? dest.isCondRegister() :
370 this->addr.isCondRegister());
371 }
372 bool isScalarRegister(int operandIndex)
373 {
374 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
375 return((operandIndex == 0) ? dest.isScalarRegister() :
376 this->addr.isScalarRegister());
377 }
378 bool isSrcOperand(int operandIndex)
379 {
380 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
381 if (operandIndex > 0)
382 return(this->addr.isVectorRegister());
383 return false;
384 }
385 bool isDstOperand(int operandIndex)
386 {
387 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
388 return(operandIndex == 0);
389 }
390 int getOperandSize(int operandIndex)
391 {
392 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
393 return((operandIndex == 0) ? dest.opSize() :
394 this->addr.opSize());
395 }
396 int getRegisterIndex(int operandIndex)
397 {
398 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
399 return((operandIndex == 0) ? dest.regIndex() :
400 this->addr.regIndex());
401 }
402 };
403
404 template<typename MemDataType, typename DestDataType,
405 typename AddrOperandType>
406 class LdInst :
407 public LdInstBase<typename MemDataType::CType,
408 typename DestDataType::OperandType, AddrOperandType>,
409 public MemInst
410 {
411 typename DestDataType::OperandType::DestOperand dest_vect[4];
412 uint16_t num_dest_operands;
413 void generateDisassembly();
414
415 public:
416 LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
417 const char *_opcode)
418 : LdInstBase<typename MemDataType::CType,
419 typename DestDataType::OperandType,
420 AddrOperandType>(ib, obj, _opcode),
421 MemInst(MemDataType::memType)
422 {
423 init_addr(&this->addr);
424
425 unsigned op_offs = obj->getOperandPtr(ib->operands,0);
426 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
427
428 if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
429 const Brig::BrigOperandOperandList *brigRegVecOp =
430 (const Brig::BrigOperandOperandList*)brigOp;
431
432 num_dest_operands =
433 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
434
435 assert(num_dest_operands <= 4);
436 } else {
437 num_dest_operands = 1;
438 }
439
440 if (num_dest_operands > 1) {
441 assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
442
443 for (int i = 0; i < num_dest_operands; ++i) {
444 dest_vect[i].init_from_vect(op_offs, obj, i);
445 }
446 }
447 }
448
449 void
450 initiateAcc(GPUDynInstPtr gpuDynInst) override
451 {
452 typedef typename MemDataType::CType c0;
453
454 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
455
456 if (num_dest_operands > 1) {
457 for (int i = 0; i < VSZ; ++i)
458 if (gpuDynInst->exec_mask[i])
459 gpuDynInst->statusVector.push_back(num_dest_operands);
460 else
461 gpuDynInst->statusVector.push_back(0);
462 }
463
464 for (int k = 0; k < num_dest_operands; ++k) {
465
466 c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
467
468 for (int i = 0; i < VSZ; ++i) {
469 if (gpuDynInst->exec_mask[i]) {
470 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
471
472 if (isLocalMem()) {
473 // load from shared memory
474 *d = gpuDynInst->wavefront()->ldsChunk->
475 read<c0>(vaddr);
476 } else {
477 Request *req = new Request(0, vaddr, sizeof(c0), 0,
478 gpuDynInst->computeUnit()->masterId(),
479 0, gpuDynInst->wfDynId, i);
480
481 gpuDynInst->setRequestFlags(req);
482 PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
483 pkt->dataStatic(d);
484
485 if (gpuDynInst->computeUnit()->shader->
486 separate_acquire_release &&
487 gpuDynInst->memoryOrder ==
488 Enums::MEMORY_ORDER_SC_ACQUIRE) {
489 // if this load has acquire semantics,
490 // set the response continuation function
491 // to perform an Acquire request
492 gpuDynInst->execContinuation =
493 &GPUStaticInst::execLdAcq;
494
495 gpuDynInst->useContinuation = true;
496 } else {
497 // the request will be finished when
498 // the load completes
499 gpuDynInst->useContinuation = false;
500 }
501 // translation is performed in sendRequest()
502 gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
503 i, pkt);
504 }
505 }
506 ++d;
507 }
508 }
509
510 gpuDynInst->updateStats();
511 }
512
513 private:
514 void
515 execLdAcq(GPUDynInstPtr gpuDynInst) override
516 {
517 // after the load has complete and if the load has acquire
518 // semantics, issue an acquire request.
519 if (!isLocalMem()) {
520 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
521 && gpuDynInst->memoryOrder ==
522 Enums::MEMORY_ORDER_SC_ACQUIRE) {
523 gpuDynInst->statusBitVector = VectorMask(1);
524 gpuDynInst->useContinuation = false;
525 // create request
526 Request *req = new Request(0, 0, 0, 0,
527 gpuDynInst->computeUnit()->masterId(),
528 0, gpuDynInst->wfDynId, -1);
529 req->setFlags(Request::ACQUIRE);
530 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
531 }
532 }
533 }
534
535 public:
536 bool
537 isLocalMem() const override
538 {
539 return this->segment == Brig::BRIG_SEGMENT_GROUP;
540 }
541
542 bool isVectorRegister(int operandIndex)
543 {
544 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
545 if ((num_dest_operands != getNumOperands()) &&
546 (operandIndex == (getNumOperands()-1)))
547 return(this->addr.isVectorRegister());
548 if (num_dest_operands > 1) {
549 return dest_vect[operandIndex].isVectorRegister();
550 }
551 else if (num_dest_operands == 1) {
552 return LdInstBase<typename MemDataType::CType,
553 typename DestDataType::OperandType,
554 AddrOperandType>::dest.isVectorRegister();
555 }
556 return false;
557 }
558 bool isCondRegister(int operandIndex)
559 {
560 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
561 if ((num_dest_operands != getNumOperands()) &&
562 (operandIndex == (getNumOperands()-1)))
563 return(this->addr.isCondRegister());
564 if (num_dest_operands > 1)
565 return dest_vect[operandIndex].isCondRegister();
566 else if (num_dest_operands == 1)
567 return LdInstBase<typename MemDataType::CType,
568 typename DestDataType::OperandType,
569 AddrOperandType>::dest.isCondRegister();
570 return false;
571 }
572 bool isScalarRegister(int operandIndex)
573 {
574 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
575 if ((num_dest_operands != getNumOperands()) &&
576 (operandIndex == (getNumOperands()-1)))
577 return(this->addr.isScalarRegister());
578 if (num_dest_operands > 1)
579 return dest_vect[operandIndex].isScalarRegister();
580 else if (num_dest_operands == 1)
581 return LdInstBase<typename MemDataType::CType,
582 typename DestDataType::OperandType,
583 AddrOperandType>::dest.isScalarRegister();
584 return false;
585 }
586 bool isSrcOperand(int operandIndex)
587 {
588 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
589 if ((num_dest_operands != getNumOperands()) &&
590 (operandIndex == (getNumOperands()-1)))
591 return(this->addr.isVectorRegister());
592 return false;
593 }
594 bool isDstOperand(int operandIndex)
595 {
596 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
597 if ((num_dest_operands != getNumOperands()) &&
598 (operandIndex == (getNumOperands()-1)))
599 return false;
600 return true;
601 }
602 int getOperandSize(int operandIndex)
603 {
604 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
605 if ((num_dest_operands != getNumOperands()) &&
606 (operandIndex == (getNumOperands()-1)))
607 return(this->addr.opSize());
608 if (num_dest_operands > 1)
609 return(dest_vect[operandIndex].opSize());
610 else if (num_dest_operands == 1)
611 return(LdInstBase<typename MemDataType::CType,
612 typename DestDataType::OperandType,
613 AddrOperandType>::dest.opSize());
614 return 0;
615 }
616 int getRegisterIndex(int operandIndex)
617 {
618 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
619 if ((num_dest_operands != getNumOperands()) &&
620 (operandIndex == (getNumOperands()-1)))
621 return(this->addr.regIndex());
622 if (num_dest_operands > 1)
623 return(dest_vect[operandIndex].regIndex());
624 else if (num_dest_operands == 1)
625 return(LdInstBase<typename MemDataType::CType,
626 typename DestDataType::OperandType,
627 AddrOperandType>::dest.regIndex());
628 return -1;
629 }
630 int getNumOperands()
631 {
632 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
633 return(num_dest_operands+1);
634 else
635 return(num_dest_operands);
636 }
637 void execute(GPUDynInstPtr gpuDynInst);
638 };
639
640 template<typename MemDT, typename DestDT>
641 GPUStaticInst*
642 decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
643 {
644 unsigned op_offs = obj->getOperandPtr(ib->operands,1);
645 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
646
647 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
648 return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld");
649 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
650 tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
651 switch (tmp.regKind) {
652 case Brig::BRIG_REGISTER_KIND_SINGLE:
653 return new LdInst<MemDT, DestDT,
654 SRegAddrOperand>(ib, obj, "ld");
655 case Brig::BRIG_REGISTER_KIND_DOUBLE:
656 return new LdInst<MemDT, DestDT,
657 DRegAddrOperand>(ib, obj, "ld");
658 default:
659 fatal("Bad ld register operand type %d\n", tmp.regKind);
660 }
661 } else {
662 fatal("Bad ld register operand kind %d\n", tmp.kind);
663 }
664 }
665
666 template<typename MemDT>
667 GPUStaticInst*
668 decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
669 {
670 unsigned op_offs = obj->getOperandPtr(ib->operands,0);
671 BrigRegOperandInfo dest = findRegDataType(op_offs, obj);
672
673 assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
674 dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
675 switch(dest.regKind) {
676 case Brig::BRIG_REGISTER_KIND_SINGLE:
677 switch (ib->type) {
678 case Brig::BRIG_TYPE_B8:
679 case Brig::BRIG_TYPE_B16:
680 case Brig::BRIG_TYPE_B32:
681 return decodeLd2<MemDT, B32>(ib, obj);
682 case Brig::BRIG_TYPE_U8:
683 case Brig::BRIG_TYPE_U16:
684 case Brig::BRIG_TYPE_U32:
685 return decodeLd2<MemDT, U32>(ib, obj);
686 case Brig::BRIG_TYPE_S8:
687 case Brig::BRIG_TYPE_S16:
688 case Brig::BRIG_TYPE_S32:
689 return decodeLd2<MemDT, S32>(ib, obj);
690 case Brig::BRIG_TYPE_F16:
691 case Brig::BRIG_TYPE_F32:
692 return decodeLd2<MemDT, U32>(ib, obj);
693 default:
694 fatal("Bad ld register operand type %d, %d\n",
695 dest.regKind, ib->type);
696 };
697 case Brig::BRIG_REGISTER_KIND_DOUBLE:
698 switch (ib->type) {
699 case Brig::BRIG_TYPE_B64:
700 return decodeLd2<MemDT, B64>(ib, obj);
701 case Brig::BRIG_TYPE_U64:
702 return decodeLd2<MemDT, U64>(ib, obj);
703 case Brig::BRIG_TYPE_S64:
704 return decodeLd2<MemDT, S64>(ib, obj);
705 case Brig::BRIG_TYPE_F64:
706 return decodeLd2<MemDT, U64>(ib, obj);
707 default:
708 fatal("Bad ld register operand type %d, %d\n",
709 dest.regKind, ib->type);
710 };
711 default:
712 fatal("Bad ld register operand type %d, %d\n", dest.regKind,
713 ib->type);
714 }
715 }
716
717 template<typename MemDataType, typename SrcOperandType,
718 typename AddrOperandType>
719 class StInstBase : public HsailGPUStaticInst
720 {
721 public:
722 typename SrcOperandType::SrcOperand src;
723 AddrOperandType addr;
724
725 Brig::BrigSegment segment;
726 Brig::BrigMemoryScope memoryScope;
727 Brig::BrigMemoryOrder memoryOrder;
728 unsigned int equivClass;
729
730 void
731 initSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
732 const char *_opcode)
733 {
734 using namespace Brig;
735
736 const BrigInstMem *ldst = (const BrigInstMem*)ib;
737
738 segment = (BrigSegment)ldst->segment;
739 memoryOrder = BRIG_MEMORY_ORDER_NONE;
740 memoryScope = BRIG_MEMORY_SCOPE_NONE;
741 equivClass = ldst->equivClass;
742
743 switch (segment) {
744 case BRIG_SEGMENT_GLOBAL:
745 o_type = Enums::OT_GLOBAL_WRITE;
746 break;
747
748 case BRIG_SEGMENT_GROUP:
749 o_type = Enums::OT_SHARED_WRITE;
750 break;
751
752 case BRIG_SEGMENT_PRIVATE:
753 o_type = Enums::OT_PRIVATE_WRITE;
754 break;
755
756 case BRIG_SEGMENT_READONLY:
757 o_type = Enums::OT_READONLY_WRITE;
758 break;
759
760 case BRIG_SEGMENT_SPILL:
761 o_type = Enums::OT_SPILL_WRITE;
762 break;
763
764 case BRIG_SEGMENT_FLAT:
765 o_type = Enums::OT_FLAT_WRITE;
766 break;
767
768 case BRIG_SEGMENT_ARG:
769 o_type = Enums::OT_ARG;
770 break;
771
772 default:
773 panic("St: segment %d not supported\n", segment);
774 }
775
776 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
777 const BrigOperand *baseOp = obj->getOperand(op_offs);
778
779 if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
780 (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
781 src.init(op_offs, obj);
782 }
783
784 op_offs = obj->getOperandPtr(ib->operands, 1);
785 addr.init(op_offs, obj);
786 }
787
788 void
789 initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
790 const char *_opcode)
791 {
792 using namespace Brig;
793
794 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
795
796 segment = (BrigSegment)at->segment;
797 memoryScope = (BrigMemoryScope)at->memoryScope;
798 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
799 equivClass = 0;
800
801 switch (segment) {
802 case BRIG_SEGMENT_GLOBAL:
803 o_type = Enums::OT_GLOBAL_WRITE;
804 break;
805
806 case BRIG_SEGMENT_GROUP:
807 o_type = Enums::OT_SHARED_WRITE;
808 break;
809
810 case BRIG_SEGMENT_PRIVATE:
811 o_type = Enums::OT_PRIVATE_WRITE;
812 break;
813
814 case BRIG_SEGMENT_READONLY:
815 o_type = Enums::OT_READONLY_WRITE;
816 break;
817
818 case BRIG_SEGMENT_SPILL:
819 o_type = Enums::OT_SPILL_WRITE;
820 break;
821
822 case BRIG_SEGMENT_FLAT:
823 o_type = Enums::OT_FLAT_WRITE;
824 break;
825
826 case BRIG_SEGMENT_ARG:
827 o_type = Enums::OT_ARG;
828 break;
829
830 default:
831 panic("St: segment %d not supported\n", segment);
832 }
833
834 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
835 addr.init(op_offs, obj);
836
837 op_offs = obj->getOperandPtr(ib->operands, 1);
838 src.init(op_offs, obj);
839 }
840
841 StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
842 const char *_opcode)
843 : HsailGPUStaticInst(obj, _opcode)
844 {
845 using namespace Brig;
846
847 if (ib->opcode == BRIG_OPCODE_ST) {
848 initSt(ib, obj, _opcode);
849 } else {
850 initAtomicSt(ib, obj, _opcode);
851 }
852 }
853
854 int numDstRegOperands() { return 0; }
855 int numSrcRegOperands()
856 {
857 return src.isVectorRegister() + this->addr.isVectorRegister();
858 }
859 int getNumOperands()
860 {
861 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
862 return 2;
863 else
864 return 1;
865 }
866 bool isVectorRegister(int operandIndex)
867 {
868 assert(operandIndex >= 0 && operandIndex < getNumOperands());
869 return !operandIndex ? src.isVectorRegister() :
870 this->addr.isVectorRegister();
871 }
872 bool isCondRegister(int operandIndex)
873 {
874 assert(operandIndex >= 0 && operandIndex < getNumOperands());
875 return !operandIndex ? src.isCondRegister() :
876 this->addr.isCondRegister();
877 }
878 bool isScalarRegister(int operandIndex)
879 {
880 assert(operandIndex >= 0 && operandIndex < getNumOperands());
881 return !operandIndex ? src.isScalarRegister() :
882 this->addr.isScalarRegister();
883 }
884 bool isSrcOperand(int operandIndex)
885 {
886 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
887 return true;
888 }
889 bool isDstOperand(int operandIndex) { return false; }
890 int getOperandSize(int operandIndex)
891 {
892 assert(operandIndex >= 0 && operandIndex < getNumOperands());
893 return !operandIndex ? src.opSize() : this->addr.opSize();
894 }
895 int getRegisterIndex(int operandIndex)
896 {
897 assert(operandIndex >= 0 && operandIndex < getNumOperands());
898 return !operandIndex ? src.regIndex() : this->addr.regIndex();
899 }
900 };
901
902
903 template<typename MemDataType, typename SrcDataType,
904 typename AddrOperandType>
905 class StInst :
906 public StInstBase<MemDataType, typename SrcDataType::OperandType,
907 AddrOperandType>,
908 public MemInst
909 {
910 public:
911 typename SrcDataType::OperandType::SrcOperand src_vect[4];
912 uint16_t num_src_operands;
913 void generateDisassembly();
914
915 StInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
916 const char *_opcode, int srcIdx)
917 : StInstBase<MemDataType, typename SrcDataType::OperandType,
918 AddrOperandType>(ib, obj, _opcode),
919 MemInst(SrcDataType::memType)
920 {
921 init_addr(&this->addr);
922
923 BrigRegOperandInfo rinfo;
924 unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx);
925 const Brig::BrigOperand *baseOp = obj->getOperand(op_offs);
926
927 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
928 const Brig::BrigOperandConstantBytes *op =
929 (Brig::BrigOperandConstantBytes*)baseOp;
930
931 rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind,
932 Brig::BRIG_TYPE_NONE);
933 } else {
934 rinfo = findRegDataType(op_offs, obj);
935 }
936
937 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
938 const Brig::BrigOperandOperandList *brigRegVecOp =
939 (const Brig::BrigOperandOperandList*)baseOp;
940
941 num_src_operands =
942 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
943
944 assert(num_src_operands <= 4);
945 } else {
946 num_src_operands = 1;
947 }
948
949 if (num_src_operands > 1) {
950 assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
951
952 for (int i = 0; i < num_src_operands; ++i) {
953 src_vect[i].init_from_vect(op_offs, obj, i);
954 }
955 }
956 }
957
958 void
959 initiateAcc(GPUDynInstPtr gpuDynInst) override
960 {
961 // before performing a store, check if this store has
962 // release semantics, and if so issue a release first
963 if (!isLocalMem()) {
964 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
965 && gpuDynInst->memoryOrder ==
966 Enums::MEMORY_ORDER_SC_RELEASE) {
967
968 gpuDynInst->statusBitVector = VectorMask(1);
969 gpuDynInst->execContinuation = &GPUStaticInst::execSt;
970 gpuDynInst->useContinuation = true;
971 // create request
972 Request *req = new Request(0, 0, 0, 0,
973 gpuDynInst->computeUnit()->masterId(),
974 0, gpuDynInst->wfDynId, -1);
975 req->setFlags(Request::RELEASE);
976 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
977
978 return;
979 }
980 }
981
982 // if there is no release semantic, perform stores immediately
983 execSt(gpuDynInst);
984 }
985
986 bool
987 isLocalMem() const override
988 {
989 return this->segment == Brig::BRIG_SEGMENT_GROUP;
990 }
991
992 private:
993 // execSt may be called through a continuation
994 // if the store had release semantics. see comment for
995 // execSt in gpu_static_inst.hh
996 void
997 execSt(GPUDynInstPtr gpuDynInst) override
998 {
999 typedef typename MemDataType::CType c0;
1000
1001 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1002
1003 if (num_src_operands > 1) {
1004 for (int i = 0; i < VSZ; ++i)
1005 if (gpuDynInst->exec_mask[i])
1006 gpuDynInst->statusVector.push_back(num_src_operands);
1007 else
1008 gpuDynInst->statusVector.push_back(0);
1009 }
1010
1011 for (int k = 0; k < num_src_operands; ++k) {
1012 c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
1013
1014 for (int i = 0; i < VSZ; ++i) {
1015 if (gpuDynInst->exec_mask[i]) {
1016 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
1017
1018 if (isLocalMem()) {
1019 //store to shared memory
1020 gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
1021 *d);
1022 } else {
1023 Request *req =
1024 new Request(0, vaddr, sizeof(c0), 0,
1025 gpuDynInst->computeUnit()->masterId(),
1026 0, gpuDynInst->wfDynId, i);
1027
1028 gpuDynInst->setRequestFlags(req);
1029 PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
1030 pkt->dataStatic<c0>(d);
1031
1032 // translation is performed in sendRequest()
1033 // the request will be finished when the store completes
1034 gpuDynInst->useContinuation = false;
1035 gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
1036 i, pkt);
1037
1038 }
1039 }
1040 ++d;
1041 }
1042 }
1043
1044 gpuDynInst->updateStats();
1045 }
1046
1047 public:
1048 bool isVectorRegister(int operandIndex)
1049 {
1050 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1051 if (operandIndex == num_src_operands)
1052 return this->addr.isVectorRegister();
1053 if (num_src_operands > 1)
1054 return src_vect[operandIndex].isVectorRegister();
1055 else if (num_src_operands == 1)
1056 return StInstBase<MemDataType,
1057 typename SrcDataType::OperandType,
1058 AddrOperandType>::src.isVectorRegister();
1059 return false;
1060 }
1061 bool isCondRegister(int operandIndex)
1062 {
1063 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1064 if (operandIndex == num_src_operands)
1065 return this->addr.isCondRegister();
1066 if (num_src_operands > 1)
1067 return src_vect[operandIndex].isCondRegister();
1068 else if (num_src_operands == 1)
1069 return StInstBase<MemDataType,
1070 typename SrcDataType::OperandType,
1071 AddrOperandType>::src.isCondRegister();
1072 return false;
1073 }
1074 bool isScalarRegister(int operandIndex)
1075 {
1076 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1077 if (operandIndex == num_src_operands)
1078 return this->addr.isScalarRegister();
1079 if (num_src_operands > 1)
1080 return src_vect[operandIndex].isScalarRegister();
1081 else if (num_src_operands == 1)
1082 return StInstBase<MemDataType,
1083 typename SrcDataType::OperandType,
1084 AddrOperandType>::src.isScalarRegister();
1085 return false;
1086 }
1087 bool isSrcOperand(int operandIndex)
1088 {
1089 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1090 return true;
1091 }
1092 bool isDstOperand(int operandIndex) { return false; }
1093 int getOperandSize(int operandIndex)
1094 {
1095 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1096 if (operandIndex == num_src_operands)
1097 return this->addr.opSize();
1098 if (num_src_operands > 1)
1099 return src_vect[operandIndex].opSize();
1100 else if (num_src_operands == 1)
1101 return StInstBase<MemDataType,
1102 typename SrcDataType::OperandType,
1103 AddrOperandType>::src.opSize();
1104 return 0;
1105 }
1106 int getRegisterIndex(int operandIndex)
1107 {
1108 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1109 if (operandIndex == num_src_operands)
1110 return this->addr.regIndex();
1111 if (num_src_operands > 1)
1112 return src_vect[operandIndex].regIndex();
1113 else if (num_src_operands == 1)
1114 return StInstBase<MemDataType,
1115 typename SrcDataType::OperandType,
1116 AddrOperandType>::src.regIndex();
1117 return -1;
1118 }
1119 int getNumOperands()
1120 {
1121 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
1122 return num_src_operands + 1;
1123 else
1124 return num_src_operands;
1125 }
1126 void execute(GPUDynInstPtr gpuDynInst);
1127 };
1128
1129 template<typename DataType, typename SrcDataType>
1130 GPUStaticInst*
1131 decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
1132 {
1133 int srcIdx = 0;
1134 int destIdx = 1;
1135 if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC ||
1136 ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) {
1137 srcIdx = 1;
1138 destIdx = 0;
1139 }
1140 unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx);
1141
1142 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1143
1144 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1145 return new StInst<DataType, SrcDataType,
1146 NoRegAddrOperand>(ib, obj, "st", srcIdx);
1147 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1148 // V2/V4 not allowed
1149 switch (tmp.regKind) {
1150 case Brig::BRIG_REGISTER_KIND_SINGLE:
1151 return new StInst<DataType, SrcDataType,
1152 SRegAddrOperand>(ib, obj, "st", srcIdx);
1153 case Brig::BRIG_REGISTER_KIND_DOUBLE:
1154 return new StInst<DataType, SrcDataType,
1155 DRegAddrOperand>(ib, obj, "st", srcIdx);
1156 default:
1157 fatal("Bad st register operand type %d\n", tmp.type);
1158 }
1159 } else {
1160 fatal("Bad st register operand kind %d\n", tmp.kind);
1161 }
1162 }
1163
1164 Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode,
1165 Brig::BrigAtomicOperation brigOp);
1166
1167 template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
1168 bool HasDst>
1169 class AtomicInstBase : public HsailGPUStaticInst
1170 {
1171 public:
1172 typename OperandType::DestOperand dest;
1173 typename OperandType::SrcOperand src[NumSrcOperands];
1174 AddrOperandType addr;
1175
1176 Brig::BrigSegment segment;
1177 Brig::BrigMemoryOrder memoryOrder;
1178 Brig::BrigAtomicOperation atomicOperation;
1179 Brig::BrigMemoryScope memoryScope;
1180 Brig::BrigOpcode opcode;
1181 Enums::MemOpType opType;
1182
1183 AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
1184 const char *_opcode)
1185 : HsailGPUStaticInst(obj, _opcode)
1186 {
1187 using namespace Brig;
1188
1189 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
1190
1191 segment = (BrigSegment)at->segment;
1192 memoryScope = (BrigMemoryScope)at->memoryScope;
1193 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
1194 atomicOperation = (BrigAtomicOperation)at->atomicOperation;
1195 opcode = (BrigOpcode)ib->opcode;
1196 opType = brigAtomicToMemOpType(opcode, atomicOperation);
1197
1198 switch (segment) {
1199 case BRIG_SEGMENT_GLOBAL:
1200 o_type = Enums::OT_GLOBAL_ATOMIC;
1201 break;
1202
1203 case BRIG_SEGMENT_GROUP:
1204 o_type = Enums::OT_SHARED_ATOMIC;
1205 break;
1206
1207 case BRIG_SEGMENT_FLAT:
1208 o_type = Enums::OT_FLAT_ATOMIC;
1209 break;
1210
1211 default:
1212 panic("Atomic: segment %d not supported\n", segment);
1213 }
1214
1215 if (HasDst) {
1216 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1217 dest.init(op_offs, obj);
1218
1219 op_offs = obj->getOperandPtr(ib->operands, 1);
1220 addr.init(op_offs, obj);
1221
1222 for (int i = 0; i < NumSrcOperands; ++i) {
1223 op_offs = obj->getOperandPtr(ib->operands, i + 2);
1224 src[i].init(op_offs, obj);
1225 }
1226 } else {
1227
1228 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1229 addr.init(op_offs, obj);
1230
1231 for (int i = 0; i < NumSrcOperands; ++i) {
1232 op_offs = obj->getOperandPtr(ib->operands, i + 1);
1233 src[i].init(op_offs, obj);
1234 }
1235 }
1236 }
1237
1238 int numSrcRegOperands()
1239 {
1240 int operands = 0;
1241 for (int i = 0; i < NumSrcOperands; i++) {
1242 if (src[i].isVectorRegister() == true) {
1243 operands++;
1244 }
1245 }
1246 if (addr.isVectorRegister())
1247 operands++;
1248 return operands;
1249 }
1250 int numDstRegOperands() { return dest.isVectorRegister(); }
1251 int getNumOperands()
1252 {
1253 if (addr.isVectorRegister())
1254 return(NumSrcOperands + 2);
1255 return(NumSrcOperands + 1);
1256 }
1257 bool isVectorRegister(int operandIndex)
1258 {
1259 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1260 if (operandIndex < NumSrcOperands)
1261 return src[operandIndex].isVectorRegister();
1262 else if (operandIndex == NumSrcOperands)
1263 return(addr.isVectorRegister());
1264 else
1265 return dest.isVectorRegister();
1266 }
1267 bool isCondRegister(int operandIndex)
1268 {
1269 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1270 if (operandIndex < NumSrcOperands)
1271 return src[operandIndex].isCondRegister();
1272 else if (operandIndex == NumSrcOperands)
1273 return(addr.isCondRegister());
1274 else
1275 return dest.isCondRegister();
1276 }
1277 bool isScalarRegister(int operandIndex)
1278 {
1279 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1280 if (operandIndex < NumSrcOperands)
1281 return src[operandIndex].isScalarRegister();
1282 else if (operandIndex == NumSrcOperands)
1283 return(addr.isScalarRegister());
1284 else
1285 return dest.isScalarRegister();
1286 }
1287 bool isSrcOperand(int operandIndex)
1288 {
1289 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1290 if (operandIndex < NumSrcOperands)
1291 return true;
1292 else if (operandIndex == NumSrcOperands)
1293 return(addr.isVectorRegister());
1294 else
1295 return false;
1296 }
1297 bool isDstOperand(int operandIndex)
1298 {
1299 if (operandIndex <= NumSrcOperands)
1300 return false;
1301 else
1302 return true;
1303 }
1304 int getOperandSize(int operandIndex)
1305 {
1306 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1307 if (operandIndex < NumSrcOperands)
1308 return(src[operandIndex].opSize());
1309 else if (operandIndex == NumSrcOperands)
1310 return(addr.opSize());
1311 else
1312 return(dest.opSize());
1313 }
1314 int getRegisterIndex(int operandIndex)
1315 {
1316 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1317 if (operandIndex < NumSrcOperands)
1318 return(src[operandIndex].regIndex());
1319 else if (operandIndex == NumSrcOperands)
1320 return(addr.regIndex());
1321 else
1322 return(dest.regIndex());
1323 return -1;
1324 }
1325 };
1326
1327 template<typename MemDataType, typename AddrOperandType, int NumSrcOperands,
1328 bool HasDst>
1329 class AtomicInst :
1330 public AtomicInstBase<typename MemDataType::OperandType,
1331 AddrOperandType, NumSrcOperands, HasDst>,
1332 public MemInst
1333 {
1334 public:
1335 void generateDisassembly();
1336
1337 AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
1338 const char *_opcode)
1339 : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType,
1340 NumSrcOperands, HasDst>
1341 (ib, obj, _opcode),
1342 MemInst(MemDataType::memType)
1343 {
1344 init_addr(&this->addr);
1345 }
1346
1347 void
1348 initiateAcc(GPUDynInstPtr gpuDynInst) override
1349 {
1350 // before doing the RMW, check if this atomic has
1351 // release semantics, and if so issue a release first
1352 if (!isLocalMem()) {
1353 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1354 && (gpuDynInst->memoryOrder ==
1355 Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder ==
1356 Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) {
1357
1358 gpuDynInst->statusBitVector = VectorMask(1);
1359
1360 gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
1361 gpuDynInst->useContinuation = true;
1362
1363 // create request
1364 Request *req = new Request(0, 0, 0, 0,
1365 gpuDynInst->computeUnit()->masterId(),
1366 0, gpuDynInst->wfDynId, -1);
1367 req->setFlags(Request::RELEASE);
1368 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1369
1370 return;
1371 }
1372 }
1373
1374 // if there is no release semantic, execute the RMW immediately
1375 execAtomic(gpuDynInst);
1376
1377 }
1378
1379 void execute(GPUDynInstPtr gpuDynInst);
1380
1381 bool
1382 isLocalMem() const override
1383 {
1384 return this->segment == Brig::BRIG_SEGMENT_GROUP;
1385 }
1386
1387 private:
1388 // execAtomic may be called through a continuation
1389 // if the RMW had release semantics. see comment for
1390 // execContinuation in gpu_dyn_inst.hh
1391 void
1392 execAtomic(GPUDynInstPtr gpuDynInst) override
1393 {
1394 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1395
1396 typedef typename MemDataType::CType c0;
1397
1398 c0 *d = &((c0*) gpuDynInst->d_data)[0];
1399 c0 *e = &((c0*) gpuDynInst->a_data)[0];
1400 c0 *f = &((c0*) gpuDynInst->x_data)[0];
1401
1402 for (int i = 0; i < VSZ; ++i) {
1403 if (gpuDynInst->exec_mask[i]) {
1404 Addr vaddr = gpuDynInst->addr[i];
1405
1406 if (isLocalMem()) {
1407 Wavefront *wavefront = gpuDynInst->wavefront();
1408 *d = wavefront->ldsChunk->read<c0>(vaddr);
1409
1410 switch (this->opType) {
1411 case Enums::MO_AADD:
1412 case Enums::MO_ANRADD:
1413 wavefront->ldsChunk->write<c0>(vaddr,
1414 wavefront->ldsChunk->read<c0>(vaddr) + (*e));
1415 break;
1416 case Enums::MO_ASUB:
1417 case Enums::MO_ANRSUB:
1418 wavefront->ldsChunk->write<c0>(vaddr,
1419 wavefront->ldsChunk->read<c0>(vaddr) - (*e));
1420 break;
1421 case Enums::MO_AMAX:
1422 case Enums::MO_ANRMAX:
1423 wavefront->ldsChunk->write<c0>(vaddr,
1424 std::max(wavefront->ldsChunk->read<c0>(vaddr),
1425 (*e)));
1426 break;
1427 case Enums::MO_AMIN:
1428 case Enums::MO_ANRMIN:
1429 wavefront->ldsChunk->write<c0>(vaddr,
1430 std::min(wavefront->ldsChunk->read<c0>(vaddr),
1431 (*e)));
1432 break;
1433 case Enums::MO_AAND:
1434 case Enums::MO_ANRAND:
1435 wavefront->ldsChunk->write<c0>(vaddr,
1436 wavefront->ldsChunk->read<c0>(vaddr) & (*e));
1437 break;
1438 case Enums::MO_AOR:
1439 case Enums::MO_ANROR:
1440 wavefront->ldsChunk->write<c0>(vaddr,
1441 wavefront->ldsChunk->read<c0>(vaddr) | (*e));
1442 break;
1443 case Enums::MO_AXOR:
1444 case Enums::MO_ANRXOR:
1445 wavefront->ldsChunk->write<c0>(vaddr,
1446 wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
1447 break;
1448 case Enums::MO_AINC:
1449 case Enums::MO_ANRINC:
1450 wavefront->ldsChunk->write<c0>(vaddr,
1451 wavefront->ldsChunk->read<c0>(vaddr) + 1);
1452 break;
1453 case Enums::MO_ADEC:
1454 case Enums::MO_ANRDEC:
1455 wavefront->ldsChunk->write<c0>(vaddr,
1456 wavefront->ldsChunk->read<c0>(vaddr) - 1);
1457 break;
1458 case Enums::MO_AEXCH:
1459 case Enums::MO_ANREXCH:
1460 wavefront->ldsChunk->write<c0>(vaddr, (*e));
1461 break;
1462 case Enums::MO_ACAS:
1463 case Enums::MO_ANRCAS:
1464 wavefront->ldsChunk->write<c0>(vaddr,
1465 (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
1466 (*f) : wavefront->ldsChunk->read<c0>(vaddr));
1467 break;
1468 default:
1469 fatal("Unrecognized or invalid HSAIL atomic op "
1470 "type.\n");
1471 break;
1472 }
1473 } else {
1474 Request *req =
1475 new Request(0, vaddr, sizeof(c0), 0,
1476 gpuDynInst->computeUnit()->masterId(),
1477 0, gpuDynInst->wfDynId, i,
1478 gpuDynInst->makeAtomicOpFunctor<c0>(e,
1479 f, this->opType));
1480
1481 gpuDynInst->setRequestFlags(req);
1482 PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
1483 pkt->dataStatic(d);
1484
1485 if (gpuDynInst->computeUnit()->shader->
1486 separate_acquire_release &&
1487 (gpuDynInst->memoryOrder ==
1488 Enums::MEMORY_ORDER_SC_ACQUIRE)) {
1489 // if this atomic has acquire semantics,
1490 // schedule the continuation to perform an
1491 // acquire after the RMW completes
1492 gpuDynInst->execContinuation =
1493 &GPUStaticInst::execAtomicAcq;
1494
1495 gpuDynInst->useContinuation = true;
1496 } else {
1497 // the request will be finished when the RMW completes
1498 gpuDynInst->useContinuation = false;
1499 }
1500 // translation is performed in sendRequest()
1501 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i,
1502 pkt);
1503 }
1504 }
1505
1506 ++d;
1507 ++e;
1508 ++f;
1509 }
1510
1511 gpuDynInst->updateStats();
1512 }
1513
1514 // execAtomicACq will always be called through a continuation.
1515 // see comment for execContinuation in gpu_dyn_inst.hh
1516 void
1517 execAtomicAcq(GPUDynInstPtr gpuDynInst) override
1518 {
1519 // after performing the RMW, check to see if this instruction
1520 // has acquire semantics, and if so, issue an acquire
1521 if (!isLocalMem()) {
1522 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1523 && gpuDynInst->memoryOrder ==
1524 Enums::MEMORY_ORDER_SC_ACQUIRE) {
1525 gpuDynInst->statusBitVector = VectorMask(1);
1526
1527 // the request will be finished when
1528 // the acquire completes
1529 gpuDynInst->useContinuation = false;
1530 // create request
1531 Request *req = new Request(0, 0, 0, 0,
1532 gpuDynInst->computeUnit()->masterId(),
1533 0, gpuDynInst->wfDynId, -1);
1534 req->setFlags(Request::ACQUIRE);
1535 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1536 }
1537 }
1538 }
1539 };
1540
1541 template<typename DataType, typename AddrOperandType, int NumSrcOperands>
1542 GPUStaticInst*
1543 constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1544 {
1545 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1546
1547 if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) {
1548 return decodeLd<DataType>(ib, obj);
1549 } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) {
1550 switch (ib->type) {
1551 case Brig::BRIG_TYPE_B8:
1552 return decodeSt<S8,S8>(ib, obj);
1553 case Brig::BRIG_TYPE_B16:
1554 return decodeSt<S8,S16>(ib, obj);
1555 case Brig::BRIG_TYPE_B32:
1556 return decodeSt<S8,S32>(ib, obj);
1557 case Brig::BRIG_TYPE_B64:
1558 return decodeSt<S8,S64>(ib, obj);
1559 default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type);
1560 }
1561 } else {
1562 if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET)
1563 return new AtomicInst<DataType, AddrOperandType,
1564 NumSrcOperands, false>(ib, obj, "atomicnoret");
1565 else
1566 return new AtomicInst<DataType, AddrOperandType,
1567 NumSrcOperands, true>(ib, obj, "atomic");
1568 }
1569 }
1570
1571 template<typename DataType, int NumSrcOperands>
1572 GPUStaticInst*
1573 decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj)
1574 {
1575 unsigned addrIndex = (Brig::BrigOpcode)ib->opcode ==
1576 Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1;
1577
1578 unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex);
1579
1580 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1581
1582 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1583 return constructAtomic<DataType, NoRegAddrOperand,
1584 NumSrcOperands>(ib, obj);
1585 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1586 // V2/V4 not allowed
1587 switch (tmp.regKind) {
1588 case Brig::BRIG_REGISTER_KIND_SINGLE:
1589 return constructAtomic<DataType, SRegAddrOperand,
1590 NumSrcOperands>(ib, obj);
1591 case Brig::BRIG_REGISTER_KIND_DOUBLE:
1592 return constructAtomic<DataType, DRegAddrOperand,
1593 NumSrcOperands>(ib, obj);
1594 default:
1595 fatal("Bad atomic register operand type %d\n", tmp.type);
1596 }
1597 } else {
1598 fatal("Bad atomic register operand kind %d\n", tmp.kind);
1599 }
1600 }
1601
1602
1603 template<typename DataType>
1604 GPUStaticInst*
1605 decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1606 {
1607 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1608
1609 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1610 return decodeAtomicHelper<DataType, 2>(ib, obj);
1611 } else {
1612 return decodeAtomicHelper<DataType, 1>(ib, obj);
1613 }
1614 }
1615
1616 template<typename DataType>
1617 GPUStaticInst*
1618 decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj)
1619 {
1620 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1621 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1622 return decodeAtomicHelper<DataType, 2>(ib, obj);
1623 } else {
1624 return decodeAtomicHelper<DataType, 1>(ib, obj);
1625 }
1626 }
1627 } // namespace HsailISA
1628
1629 #endif // __ARCH_HSAIL_INSTS_MEM_HH__