2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
33 * Author: Steve Reinhardt
36 #ifndef __ARCH_HSAIL_INSTS_MEM_HH__
37 #define __ARCH_HSAIL_INSTS_MEM_HH__
39 #include "arch/hsail/insts/decl.hh"
40 #include "arch/hsail/insts/gpu_static_inst.hh"
41 #include "arch/hsail/operand.hh"
48 MemInst() : size(0), addr_operand(nullptr) { }
50 MemInst(Enums::MemType m_type)
52 if (m_type == Enums::M_U64 ||
53 m_type == Enums::M_S64 ||
54 m_type == Enums::M_F64) {
56 } else if (m_type == Enums::M_U32 ||
57 m_type == Enums::M_S32 ||
58 m_type == Enums::M_F32) {
60 } else if (m_type == Enums::M_U16 ||
61 m_type == Enums::M_S16 ||
62 m_type == Enums::M_F16) {
68 addr_operand = nullptr;
72 init_addr(AddrOperandBase *_addr_operand)
74 addr_operand = _addr_operand;
79 AddrOperandBase *addr_operand;
82 int getMemOperandSize() { return size; }
83 AddrOperandBase *getAddressOperand() { return addr_operand; }
86 template<typename DestOperandType, typename AddrOperandType>
87 class LdaInstBase : public HsailGPUStaticInst
90 typename DestOperandType::DestOperand dest;
93 LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
95 : HsailGPUStaticInst(obj, _opcode)
99 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
100 dest.init(op_offs, obj);
101 op_offs = obj->getOperandPtr(ib->operands, 1);
102 addr.init(op_offs, obj);
105 int numSrcRegOperands() { return(this->addr.isVectorRegister()); }
106 int numDstRegOperands() { return dest.isVectorRegister(); }
107 bool isVectorRegister(int operandIndex)
109 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
110 return((operandIndex == 0) ? dest.isVectorRegister() :
111 this->addr.isVectorRegister());
113 bool isCondRegister(int operandIndex)
115 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
116 return((operandIndex == 0) ? dest.isCondRegister() :
117 this->addr.isCondRegister());
119 bool isScalarRegister(int operandIndex)
121 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
122 return((operandIndex == 0) ? dest.isScalarRegister() :
123 this->addr.isScalarRegister());
125 bool isSrcOperand(int operandIndex)
127 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
128 if (operandIndex > 0)
129 return(this->addr.isVectorRegister());
132 bool isDstOperand(int operandIndex) {
133 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
134 return(operandIndex == 0);
136 int getOperandSize(int operandIndex)
138 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
139 return((operandIndex == 0) ? dest.opSize() :
140 this->addr.opSize());
142 int getRegisterIndex(int operandIndex)
144 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
145 return((operandIndex == 0) ? dest.regIndex() :
146 this->addr.regIndex());
150 if (this->addr.isVectorRegister())
156 template<typename DestDataType, typename AddrOperandType>
158 public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>,
162 void generateDisassembly();
164 LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
166 : LdaInstBase<typename DestDataType::OperandType,
167 AddrOperandType>(ib, obj, _opcode)
169 init_addr(&this->addr);
172 void execute(GPUDynInstPtr gpuDynInst);
175 template<typename DataType>
177 decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
179 unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
180 BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj);
182 if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
183 return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas");
184 } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
186 switch (regDataType.regKind) {
187 case Brig::BRIG_REGISTER_KIND_SINGLE:
188 return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas");
189 case Brig::BRIG_REGISTER_KIND_DOUBLE:
190 return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas");
192 fatal("Bad ldas register operand type %d\n", regDataType.type);
195 fatal("Bad ldas register operand kind %d\n", regDataType.kind);
199 template<typename MemOperandType, typename DestOperandType,
200 typename AddrOperandType>
201 class LdInstBase : public HsailGPUStaticInst
204 Brig::BrigWidth8_t width;
205 typename DestOperandType::DestOperand dest;
206 AddrOperandType addr;
208 Brig::BrigSegment segment;
209 Brig::BrigMemoryOrder memoryOrder;
210 Brig::BrigMemoryScope memoryScope;
211 unsigned int equivClass;
214 return segment == Brig::BRIG_SEGMENT_KERNARG ||
215 segment == Brig::BRIG_SEGMENT_ARG;
218 initLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
221 using namespace Brig;
223 const BrigInstMem *ldst = (const BrigInstMem*)ib;
225 segment = (BrigSegment)ldst->segment;
226 memoryOrder = BRIG_MEMORY_ORDER_NONE;
227 memoryScope = BRIG_MEMORY_SCOPE_NONE;
228 equivClass = ldst->equivClass;
231 case BRIG_SEGMENT_GLOBAL:
232 o_type = Enums::OT_GLOBAL_READ;
235 case BRIG_SEGMENT_GROUP:
236 o_type = Enums::OT_SHARED_READ;
239 case BRIG_SEGMENT_PRIVATE:
240 o_type = Enums::OT_PRIVATE_READ;
243 case BRIG_SEGMENT_READONLY:
244 o_type = Enums::OT_READONLY_READ;
247 case BRIG_SEGMENT_SPILL:
248 o_type = Enums::OT_SPILL_READ;
251 case BRIG_SEGMENT_FLAT:
252 o_type = Enums::OT_FLAT_READ;
255 case BRIG_SEGMENT_KERNARG:
256 o_type = Enums::OT_KERN_READ;
259 case BRIG_SEGMENT_ARG:
260 o_type = Enums::OT_ARG;
264 panic("Ld: segment %d not supported\n", segment);
268 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
269 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
270 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
271 dest.init(op_offs, obj);
273 op_offs = obj->getOperandPtr(ib->operands, 1);
274 addr.init(op_offs, obj);
278 initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
281 using namespace Brig;
283 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
285 segment = (BrigSegment)at->segment;
286 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
287 memoryScope = (BrigMemoryScope)at->memoryScope;
291 case BRIG_SEGMENT_GLOBAL:
292 o_type = Enums::OT_GLOBAL_READ;
295 case BRIG_SEGMENT_GROUP:
296 o_type = Enums::OT_SHARED_READ;
299 case BRIG_SEGMENT_PRIVATE:
300 o_type = Enums::OT_PRIVATE_READ;
303 case BRIG_SEGMENT_READONLY:
304 o_type = Enums::OT_READONLY_READ;
307 case BRIG_SEGMENT_SPILL:
308 o_type = Enums::OT_SPILL_READ;
311 case BRIG_SEGMENT_FLAT:
312 o_type = Enums::OT_FLAT_READ;
315 case BRIG_SEGMENT_KERNARG:
316 o_type = Enums::OT_KERN_READ;
319 case BRIG_SEGMENT_ARG:
320 o_type = Enums::OT_ARG;
324 panic("Ld: segment %d not supported\n", segment);
327 width = BRIG_WIDTH_1;
328 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
329 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
331 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
332 dest.init(op_offs, obj);
334 op_offs = obj->getOperandPtr(ib->operands,1);
335 addr.init(op_offs, obj);
338 LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
340 : HsailGPUStaticInst(obj, _opcode)
342 using namespace Brig;
344 if (ib->opcode == BRIG_OPCODE_LD) {
345 initLd(ib, obj, _opcode);
347 initAtomicLd(ib, obj, _opcode);
351 int numSrcRegOperands() { return(this->addr.isVectorRegister()); }
352 int numDstRegOperands() { return dest.isVectorRegister(); }
355 if (this->addr.isVectorRegister())
360 bool isVectorRegister(int operandIndex)
362 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
363 return((operandIndex == 0) ? dest.isVectorRegister() :
364 this->addr.isVectorRegister());
366 bool isCondRegister(int operandIndex)
368 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
369 return((operandIndex == 0) ? dest.isCondRegister() :
370 this->addr.isCondRegister());
372 bool isScalarRegister(int operandIndex)
374 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
375 return((operandIndex == 0) ? dest.isScalarRegister() :
376 this->addr.isScalarRegister());
378 bool isSrcOperand(int operandIndex)
380 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
381 if (operandIndex > 0)
382 return(this->addr.isVectorRegister());
385 bool isDstOperand(int operandIndex)
387 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
388 return(operandIndex == 0);
390 int getOperandSize(int operandIndex)
392 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
393 return((operandIndex == 0) ? dest.opSize() :
394 this->addr.opSize());
396 int getRegisterIndex(int operandIndex)
398 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
399 return((operandIndex == 0) ? dest.regIndex() :
400 this->addr.regIndex());
404 template<typename MemDataType, typename DestDataType,
405 typename AddrOperandType>
407 public LdInstBase<typename MemDataType::CType,
408 typename DestDataType::OperandType, AddrOperandType>,
411 typename DestDataType::OperandType::DestOperand dest_vect[4];
412 uint16_t num_dest_operands;
413 void generateDisassembly();
416 LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
418 : LdInstBase<typename MemDataType::CType,
419 typename DestDataType::OperandType,
420 AddrOperandType>(ib, obj, _opcode),
421 MemInst(MemDataType::memType)
423 init_addr(&this->addr);
425 unsigned op_offs = obj->getOperandPtr(ib->operands,0);
426 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
428 if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
429 const Brig::BrigOperandOperandList *brigRegVecOp =
430 (const Brig::BrigOperandOperandList*)brigOp;
433 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
435 assert(num_dest_operands <= 4);
437 num_dest_operands = 1;
440 if (num_dest_operands > 1) {
441 assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
443 for (int i = 0; i < num_dest_operands; ++i) {
444 dest_vect[i].init_from_vect(op_offs, obj, i);
450 initiateAcc(GPUDynInstPtr gpuDynInst) override
452 typedef typename MemDataType::CType c0;
454 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
456 if (num_dest_operands > 1) {
457 for (int i = 0; i < VSZ; ++i)
458 if (gpuDynInst->exec_mask[i])
459 gpuDynInst->statusVector.push_back(num_dest_operands);
461 gpuDynInst->statusVector.push_back(0);
464 for (int k = 0; k < num_dest_operands; ++k) {
466 c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
468 for (int i = 0; i < VSZ; ++i) {
469 if (gpuDynInst->exec_mask[i]) {
470 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
473 // load from shared memory
474 *d = gpuDynInst->wavefront()->ldsChunk->
477 Request *req = new Request(0, vaddr, sizeof(c0), 0,
478 gpuDynInst->computeUnit()->masterId(),
479 0, gpuDynInst->wfDynId, i);
481 gpuDynInst->setRequestFlags(req);
482 PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
485 if (gpuDynInst->computeUnit()->shader->
486 separate_acquire_release &&
487 gpuDynInst->memoryOrder ==
488 Enums::MEMORY_ORDER_SC_ACQUIRE) {
489 // if this load has acquire semantics,
490 // set the response continuation function
491 // to perform an Acquire request
492 gpuDynInst->execContinuation =
493 &GPUStaticInst::execLdAcq;
495 gpuDynInst->useContinuation = true;
497 // the request will be finished when
498 // the load completes
499 gpuDynInst->useContinuation = false;
501 // translation is performed in sendRequest()
502 gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
510 gpuDynInst->updateStats();
515 execLdAcq(GPUDynInstPtr gpuDynInst) override
517 // after the load has complete and if the load has acquire
518 // semantics, issue an acquire request.
520 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
521 && gpuDynInst->memoryOrder ==
522 Enums::MEMORY_ORDER_SC_ACQUIRE) {
523 gpuDynInst->statusBitVector = VectorMask(1);
524 gpuDynInst->useContinuation = false;
526 Request *req = new Request(0, 0, 0, 0,
527 gpuDynInst->computeUnit()->masterId(),
528 0, gpuDynInst->wfDynId, -1);
529 req->setFlags(Request::ACQUIRE);
530 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
537 isLocalMem() const override
539 return this->segment == Brig::BRIG_SEGMENT_GROUP;
542 bool isVectorRegister(int operandIndex)
544 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
545 if ((num_dest_operands != getNumOperands()) &&
546 (operandIndex == (getNumOperands()-1)))
547 return(this->addr.isVectorRegister());
548 if (num_dest_operands > 1) {
549 return dest_vect[operandIndex].isVectorRegister();
551 else if (num_dest_operands == 1) {
552 return LdInstBase<typename MemDataType::CType,
553 typename DestDataType::OperandType,
554 AddrOperandType>::dest.isVectorRegister();
558 bool isCondRegister(int operandIndex)
560 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
561 if ((num_dest_operands != getNumOperands()) &&
562 (operandIndex == (getNumOperands()-1)))
563 return(this->addr.isCondRegister());
564 if (num_dest_operands > 1)
565 return dest_vect[operandIndex].isCondRegister();
566 else if (num_dest_operands == 1)
567 return LdInstBase<typename MemDataType::CType,
568 typename DestDataType::OperandType,
569 AddrOperandType>::dest.isCondRegister();
572 bool isScalarRegister(int operandIndex)
574 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
575 if ((num_dest_operands != getNumOperands()) &&
576 (operandIndex == (getNumOperands()-1)))
577 return(this->addr.isScalarRegister());
578 if (num_dest_operands > 1)
579 return dest_vect[operandIndex].isScalarRegister();
580 else if (num_dest_operands == 1)
581 return LdInstBase<typename MemDataType::CType,
582 typename DestDataType::OperandType,
583 AddrOperandType>::dest.isScalarRegister();
586 bool isSrcOperand(int operandIndex)
588 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
589 if ((num_dest_operands != getNumOperands()) &&
590 (operandIndex == (getNumOperands()-1)))
591 return(this->addr.isVectorRegister());
594 bool isDstOperand(int operandIndex)
596 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
597 if ((num_dest_operands != getNumOperands()) &&
598 (operandIndex == (getNumOperands()-1)))
602 int getOperandSize(int operandIndex)
604 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
605 if ((num_dest_operands != getNumOperands()) &&
606 (operandIndex == (getNumOperands()-1)))
607 return(this->addr.opSize());
608 if (num_dest_operands > 1)
609 return(dest_vect[operandIndex].opSize());
610 else if (num_dest_operands == 1)
611 return(LdInstBase<typename MemDataType::CType,
612 typename DestDataType::OperandType,
613 AddrOperandType>::dest.opSize());
616 int getRegisterIndex(int operandIndex)
618 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
619 if ((num_dest_operands != getNumOperands()) &&
620 (operandIndex == (getNumOperands()-1)))
621 return(this->addr.regIndex());
622 if (num_dest_operands > 1)
623 return(dest_vect[operandIndex].regIndex());
624 else if (num_dest_operands == 1)
625 return(LdInstBase<typename MemDataType::CType,
626 typename DestDataType::OperandType,
627 AddrOperandType>::dest.regIndex());
632 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
633 return(num_dest_operands+1);
635 return(num_dest_operands);
637 void execute(GPUDynInstPtr gpuDynInst);
640 template<typename MemDT, typename DestDT>
642 decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
644 unsigned op_offs = obj->getOperandPtr(ib->operands,1);
645 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
647 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
648 return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld");
649 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
650 tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
651 switch (tmp.regKind) {
652 case Brig::BRIG_REGISTER_KIND_SINGLE:
653 return new LdInst<MemDT, DestDT,
654 SRegAddrOperand>(ib, obj, "ld");
655 case Brig::BRIG_REGISTER_KIND_DOUBLE:
656 return new LdInst<MemDT, DestDT,
657 DRegAddrOperand>(ib, obj, "ld");
659 fatal("Bad ld register operand type %d\n", tmp.regKind);
662 fatal("Bad ld register operand kind %d\n", tmp.kind);
666 template<typename MemDT>
668 decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
670 unsigned op_offs = obj->getOperandPtr(ib->operands,0);
671 BrigRegOperandInfo dest = findRegDataType(op_offs, obj);
673 assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
674 dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
675 switch(dest.regKind) {
676 case Brig::BRIG_REGISTER_KIND_SINGLE:
678 case Brig::BRIG_TYPE_B8:
679 case Brig::BRIG_TYPE_B16:
680 case Brig::BRIG_TYPE_B32:
681 return decodeLd2<MemDT, B32>(ib, obj);
682 case Brig::BRIG_TYPE_U8:
683 case Brig::BRIG_TYPE_U16:
684 case Brig::BRIG_TYPE_U32:
685 return decodeLd2<MemDT, U32>(ib, obj);
686 case Brig::BRIG_TYPE_S8:
687 case Brig::BRIG_TYPE_S16:
688 case Brig::BRIG_TYPE_S32:
689 return decodeLd2<MemDT, S32>(ib, obj);
690 case Brig::BRIG_TYPE_F16:
691 case Brig::BRIG_TYPE_F32:
692 return decodeLd2<MemDT, U32>(ib, obj);
694 fatal("Bad ld register operand type %d, %d\n",
695 dest.regKind, ib->type);
697 case Brig::BRIG_REGISTER_KIND_DOUBLE:
699 case Brig::BRIG_TYPE_B64:
700 return decodeLd2<MemDT, B64>(ib, obj);
701 case Brig::BRIG_TYPE_U64:
702 return decodeLd2<MemDT, U64>(ib, obj);
703 case Brig::BRIG_TYPE_S64:
704 return decodeLd2<MemDT, S64>(ib, obj);
705 case Brig::BRIG_TYPE_F64:
706 return decodeLd2<MemDT, U64>(ib, obj);
708 fatal("Bad ld register operand type %d, %d\n",
709 dest.regKind, ib->type);
712 fatal("Bad ld register operand type %d, %d\n", dest.regKind,
717 template<typename MemDataType, typename SrcOperandType,
718 typename AddrOperandType>
719 class StInstBase : public HsailGPUStaticInst
722 typename SrcOperandType::SrcOperand src;
723 AddrOperandType addr;
725 Brig::BrigSegment segment;
726 Brig::BrigMemoryScope memoryScope;
727 Brig::BrigMemoryOrder memoryOrder;
728 unsigned int equivClass;
731 initSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
734 using namespace Brig;
736 const BrigInstMem *ldst = (const BrigInstMem*)ib;
738 segment = (BrigSegment)ldst->segment;
739 memoryOrder = BRIG_MEMORY_ORDER_NONE;
740 memoryScope = BRIG_MEMORY_SCOPE_NONE;
741 equivClass = ldst->equivClass;
744 case BRIG_SEGMENT_GLOBAL:
745 o_type = Enums::OT_GLOBAL_WRITE;
748 case BRIG_SEGMENT_GROUP:
749 o_type = Enums::OT_SHARED_WRITE;
752 case BRIG_SEGMENT_PRIVATE:
753 o_type = Enums::OT_PRIVATE_WRITE;
756 case BRIG_SEGMENT_READONLY:
757 o_type = Enums::OT_READONLY_WRITE;
760 case BRIG_SEGMENT_SPILL:
761 o_type = Enums::OT_SPILL_WRITE;
764 case BRIG_SEGMENT_FLAT:
765 o_type = Enums::OT_FLAT_WRITE;
768 case BRIG_SEGMENT_ARG:
769 o_type = Enums::OT_ARG;
773 panic("St: segment %d not supported\n", segment);
776 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
777 const BrigOperand *baseOp = obj->getOperand(op_offs);
779 if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
780 (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
781 src.init(op_offs, obj);
784 op_offs = obj->getOperandPtr(ib->operands, 1);
785 addr.init(op_offs, obj);
789 initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
792 using namespace Brig;
794 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
796 segment = (BrigSegment)at->segment;
797 memoryScope = (BrigMemoryScope)at->memoryScope;
798 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
802 case BRIG_SEGMENT_GLOBAL:
803 o_type = Enums::OT_GLOBAL_WRITE;
806 case BRIG_SEGMENT_GROUP:
807 o_type = Enums::OT_SHARED_WRITE;
810 case BRIG_SEGMENT_PRIVATE:
811 o_type = Enums::OT_PRIVATE_WRITE;
814 case BRIG_SEGMENT_READONLY:
815 o_type = Enums::OT_READONLY_WRITE;
818 case BRIG_SEGMENT_SPILL:
819 o_type = Enums::OT_SPILL_WRITE;
822 case BRIG_SEGMENT_FLAT:
823 o_type = Enums::OT_FLAT_WRITE;
826 case BRIG_SEGMENT_ARG:
827 o_type = Enums::OT_ARG;
831 panic("St: segment %d not supported\n", segment);
834 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
835 addr.init(op_offs, obj);
837 op_offs = obj->getOperandPtr(ib->operands, 1);
838 src.init(op_offs, obj);
841 StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
843 : HsailGPUStaticInst(obj, _opcode)
845 using namespace Brig;
847 if (ib->opcode == BRIG_OPCODE_ST) {
848 initSt(ib, obj, _opcode);
850 initAtomicSt(ib, obj, _opcode);
854 int numDstRegOperands() { return 0; }
855 int numSrcRegOperands()
857 return src.isVectorRegister() + this->addr.isVectorRegister();
861 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
866 bool isVectorRegister(int operandIndex)
868 assert(operandIndex >= 0 && operandIndex < getNumOperands());
869 return !operandIndex ? src.isVectorRegister() :
870 this->addr.isVectorRegister();
872 bool isCondRegister(int operandIndex)
874 assert(operandIndex >= 0 && operandIndex < getNumOperands());
875 return !operandIndex ? src.isCondRegister() :
876 this->addr.isCondRegister();
878 bool isScalarRegister(int operandIndex)
880 assert(operandIndex >= 0 && operandIndex < getNumOperands());
881 return !operandIndex ? src.isScalarRegister() :
882 this->addr.isScalarRegister();
884 bool isSrcOperand(int operandIndex)
886 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
889 bool isDstOperand(int operandIndex) { return false; }
890 int getOperandSize(int operandIndex)
892 assert(operandIndex >= 0 && operandIndex < getNumOperands());
893 return !operandIndex ? src.opSize() : this->addr.opSize();
895 int getRegisterIndex(int operandIndex)
897 assert(operandIndex >= 0 && operandIndex < getNumOperands());
898 return !operandIndex ? src.regIndex() : this->addr.regIndex();
903 template<typename MemDataType, typename SrcDataType,
904 typename AddrOperandType>
906 public StInstBase<MemDataType, typename SrcDataType::OperandType,
911 typename SrcDataType::OperandType::SrcOperand src_vect[4];
912 uint16_t num_src_operands;
913 void generateDisassembly();
915 StInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
916 const char *_opcode, int srcIdx)
917 : StInstBase<MemDataType, typename SrcDataType::OperandType,
918 AddrOperandType>(ib, obj, _opcode),
919 MemInst(SrcDataType::memType)
921 init_addr(&this->addr);
923 BrigRegOperandInfo rinfo;
924 unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx);
925 const Brig::BrigOperand *baseOp = obj->getOperand(op_offs);
927 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
928 const Brig::BrigOperandConstantBytes *op =
929 (Brig::BrigOperandConstantBytes*)baseOp;
931 rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind,
932 Brig::BRIG_TYPE_NONE);
934 rinfo = findRegDataType(op_offs, obj);
937 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
938 const Brig::BrigOperandOperandList *brigRegVecOp =
939 (const Brig::BrigOperandOperandList*)baseOp;
942 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
944 assert(num_src_operands <= 4);
946 num_src_operands = 1;
949 if (num_src_operands > 1) {
950 assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
952 for (int i = 0; i < num_src_operands; ++i) {
953 src_vect[i].init_from_vect(op_offs, obj, i);
959 initiateAcc(GPUDynInstPtr gpuDynInst) override
961 // before performing a store, check if this store has
962 // release semantics, and if so issue a release first
964 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
965 && gpuDynInst->memoryOrder ==
966 Enums::MEMORY_ORDER_SC_RELEASE) {
968 gpuDynInst->statusBitVector = VectorMask(1);
969 gpuDynInst->execContinuation = &GPUStaticInst::execSt;
970 gpuDynInst->useContinuation = true;
972 Request *req = new Request(0, 0, 0, 0,
973 gpuDynInst->computeUnit()->masterId(),
974 0, gpuDynInst->wfDynId, -1);
975 req->setFlags(Request::RELEASE);
976 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
982 // if there is no release semantic, perform stores immediately
987 isLocalMem() const override
989 return this->segment == Brig::BRIG_SEGMENT_GROUP;
993 // execSt may be called through a continuation
994 // if the store had release semantics. see comment for
995 // execSt in gpu_static_inst.hh
997 execSt(GPUDynInstPtr gpuDynInst) override
999 typedef typename MemDataType::CType c0;
1001 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1003 if (num_src_operands > 1) {
1004 for (int i = 0; i < VSZ; ++i)
1005 if (gpuDynInst->exec_mask[i])
1006 gpuDynInst->statusVector.push_back(num_src_operands);
1008 gpuDynInst->statusVector.push_back(0);
1011 for (int k = 0; k < num_src_operands; ++k) {
1012 c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
1014 for (int i = 0; i < VSZ; ++i) {
1015 if (gpuDynInst->exec_mask[i]) {
1016 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
1019 //store to shared memory
1020 gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
1024 new Request(0, vaddr, sizeof(c0), 0,
1025 gpuDynInst->computeUnit()->masterId(),
1026 0, gpuDynInst->wfDynId, i);
1028 gpuDynInst->setRequestFlags(req);
1029 PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
1030 pkt->dataStatic<c0>(d);
1032 // translation is performed in sendRequest()
1033 // the request will be finished when the store completes
1034 gpuDynInst->useContinuation = false;
1035 gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
1044 gpuDynInst->updateStats();
1048 bool isVectorRegister(int operandIndex)
1050 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1051 if (operandIndex == num_src_operands)
1052 return this->addr.isVectorRegister();
1053 if (num_src_operands > 1)
1054 return src_vect[operandIndex].isVectorRegister();
1055 else if (num_src_operands == 1)
1056 return StInstBase<MemDataType,
1057 typename SrcDataType::OperandType,
1058 AddrOperandType>::src.isVectorRegister();
1061 bool isCondRegister(int operandIndex)
1063 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1064 if (operandIndex == num_src_operands)
1065 return this->addr.isCondRegister();
1066 if (num_src_operands > 1)
1067 return src_vect[operandIndex].isCondRegister();
1068 else if (num_src_operands == 1)
1069 return StInstBase<MemDataType,
1070 typename SrcDataType::OperandType,
1071 AddrOperandType>::src.isCondRegister();
1074 bool isScalarRegister(int operandIndex)
1076 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1077 if (operandIndex == num_src_operands)
1078 return this->addr.isScalarRegister();
1079 if (num_src_operands > 1)
1080 return src_vect[operandIndex].isScalarRegister();
1081 else if (num_src_operands == 1)
1082 return StInstBase<MemDataType,
1083 typename SrcDataType::OperandType,
1084 AddrOperandType>::src.isScalarRegister();
1087 bool isSrcOperand(int operandIndex)
1089 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1092 bool isDstOperand(int operandIndex) { return false; }
1093 int getOperandSize(int operandIndex)
1095 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1096 if (operandIndex == num_src_operands)
1097 return this->addr.opSize();
1098 if (num_src_operands > 1)
1099 return src_vect[operandIndex].opSize();
1100 else if (num_src_operands == 1)
1101 return StInstBase<MemDataType,
1102 typename SrcDataType::OperandType,
1103 AddrOperandType>::src.opSize();
1106 int getRegisterIndex(int operandIndex)
1108 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1109 if (operandIndex == num_src_operands)
1110 return this->addr.regIndex();
1111 if (num_src_operands > 1)
1112 return src_vect[operandIndex].regIndex();
1113 else if (num_src_operands == 1)
1114 return StInstBase<MemDataType,
1115 typename SrcDataType::OperandType,
1116 AddrOperandType>::src.regIndex();
1119 int getNumOperands()
1121 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
1122 return num_src_operands + 1;
1124 return num_src_operands;
1126 void execute(GPUDynInstPtr gpuDynInst);
1129 template<typename DataType, typename SrcDataType>
1131 decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
1135 if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC ||
1136 ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) {
1140 unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx);
1142 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1144 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1145 return new StInst<DataType, SrcDataType,
1146 NoRegAddrOperand>(ib, obj, "st", srcIdx);
1147 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1148 // V2/V4 not allowed
1149 switch (tmp.regKind) {
1150 case Brig::BRIG_REGISTER_KIND_SINGLE:
1151 return new StInst<DataType, SrcDataType,
1152 SRegAddrOperand>(ib, obj, "st", srcIdx);
1153 case Brig::BRIG_REGISTER_KIND_DOUBLE:
1154 return new StInst<DataType, SrcDataType,
1155 DRegAddrOperand>(ib, obj, "st", srcIdx);
1157 fatal("Bad st register operand type %d\n", tmp.type);
1160 fatal("Bad st register operand kind %d\n", tmp.kind);
1164 Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode,
1165 Brig::BrigAtomicOperation brigOp);
1167 template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
1169 class AtomicInstBase : public HsailGPUStaticInst
1172 typename OperandType::DestOperand dest;
1173 typename OperandType::SrcOperand src[NumSrcOperands];
1174 AddrOperandType addr;
1176 Brig::BrigSegment segment;
1177 Brig::BrigMemoryOrder memoryOrder;
1178 Brig::BrigAtomicOperation atomicOperation;
1179 Brig::BrigMemoryScope memoryScope;
1180 Brig::BrigOpcode opcode;
1181 Enums::MemOpType opType;
1183 AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
1184 const char *_opcode)
1185 : HsailGPUStaticInst(obj, _opcode)
1187 using namespace Brig;
1189 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
1191 segment = (BrigSegment)at->segment;
1192 memoryScope = (BrigMemoryScope)at->memoryScope;
1193 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
1194 atomicOperation = (BrigAtomicOperation)at->atomicOperation;
1195 opcode = (BrigOpcode)ib->opcode;
1196 opType = brigAtomicToMemOpType(opcode, atomicOperation);
1199 case BRIG_SEGMENT_GLOBAL:
1200 o_type = Enums::OT_GLOBAL_ATOMIC;
1203 case BRIG_SEGMENT_GROUP:
1204 o_type = Enums::OT_SHARED_ATOMIC;
1207 case BRIG_SEGMENT_FLAT:
1208 o_type = Enums::OT_FLAT_ATOMIC;
1212 panic("Atomic: segment %d not supported\n", segment);
1216 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1217 dest.init(op_offs, obj);
1219 op_offs = obj->getOperandPtr(ib->operands, 1);
1220 addr.init(op_offs, obj);
1222 for (int i = 0; i < NumSrcOperands; ++i) {
1223 op_offs = obj->getOperandPtr(ib->operands, i + 2);
1224 src[i].init(op_offs, obj);
1228 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1229 addr.init(op_offs, obj);
1231 for (int i = 0; i < NumSrcOperands; ++i) {
1232 op_offs = obj->getOperandPtr(ib->operands, i + 1);
1233 src[i].init(op_offs, obj);
1238 int numSrcRegOperands()
1241 for (int i = 0; i < NumSrcOperands; i++) {
1242 if (src[i].isVectorRegister()) {
1246 if (addr.isVectorRegister())
1250 int numDstRegOperands() { return dest.isVectorRegister(); }
1251 int getNumOperands()
1253 if (addr.isVectorRegister())
1254 return(NumSrcOperands + 2);
1255 return(NumSrcOperands + 1);
1257 bool isVectorRegister(int operandIndex)
1259 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1260 if (operandIndex < NumSrcOperands)
1261 return src[operandIndex].isVectorRegister();
1262 else if (operandIndex == NumSrcOperands)
1263 return(addr.isVectorRegister());
1265 return dest.isVectorRegister();
1267 bool isCondRegister(int operandIndex)
1269 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1270 if (operandIndex < NumSrcOperands)
1271 return src[operandIndex].isCondRegister();
1272 else if (operandIndex == NumSrcOperands)
1273 return(addr.isCondRegister());
1275 return dest.isCondRegister();
1277 bool isScalarRegister(int operandIndex)
1279 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1280 if (operandIndex < NumSrcOperands)
1281 return src[operandIndex].isScalarRegister();
1282 else if (operandIndex == NumSrcOperands)
1283 return(addr.isScalarRegister());
1285 return dest.isScalarRegister();
1287 bool isSrcOperand(int operandIndex)
1289 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1290 if (operandIndex < NumSrcOperands)
1292 else if (operandIndex == NumSrcOperands)
1293 return(addr.isVectorRegister());
1297 bool isDstOperand(int operandIndex)
1299 if (operandIndex <= NumSrcOperands)
1304 int getOperandSize(int operandIndex)
1306 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1307 if (operandIndex < NumSrcOperands)
1308 return(src[operandIndex].opSize());
1309 else if (operandIndex == NumSrcOperands)
1310 return(addr.opSize());
1312 return(dest.opSize());
1314 int getRegisterIndex(int operandIndex)
1316 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1317 if (operandIndex < NumSrcOperands)
1318 return(src[operandIndex].regIndex());
1319 else if (operandIndex == NumSrcOperands)
1320 return(addr.regIndex());
1322 return(dest.regIndex());
1327 template<typename MemDataType, typename AddrOperandType, int NumSrcOperands,
1330 public AtomicInstBase<typename MemDataType::OperandType,
1331 AddrOperandType, NumSrcOperands, HasDst>,
1335 void generateDisassembly();
1337 AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
1338 const char *_opcode)
1339 : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType,
1340 NumSrcOperands, HasDst>
1342 MemInst(MemDataType::memType)
1344 init_addr(&this->addr);
1348 initiateAcc(GPUDynInstPtr gpuDynInst) override
1350 // before doing the RMW, check if this atomic has
1351 // release semantics, and if so issue a release first
1352 if (!isLocalMem()) {
1353 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1354 && (gpuDynInst->memoryOrder ==
1355 Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder ==
1356 Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) {
1358 gpuDynInst->statusBitVector = VectorMask(1);
1360 gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
1361 gpuDynInst->useContinuation = true;
1364 Request *req = new Request(0, 0, 0, 0,
1365 gpuDynInst->computeUnit()->masterId(),
1366 0, gpuDynInst->wfDynId, -1);
1367 req->setFlags(Request::RELEASE);
1368 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1374 // if there is no release semantic, execute the RMW immediately
1375 execAtomic(gpuDynInst);
1379 void execute(GPUDynInstPtr gpuDynInst);
1382 isLocalMem() const override
1384 return this->segment == Brig::BRIG_SEGMENT_GROUP;
1388 // execAtomic may be called through a continuation
1389 // if the RMW had release semantics. see comment for
1390 // execContinuation in gpu_dyn_inst.hh
1392 execAtomic(GPUDynInstPtr gpuDynInst) override
1394 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1396 typedef typename MemDataType::CType c0;
1398 c0 *d = &((c0*) gpuDynInst->d_data)[0];
1399 c0 *e = &((c0*) gpuDynInst->a_data)[0];
1400 c0 *f = &((c0*) gpuDynInst->x_data)[0];
1402 for (int i = 0; i < VSZ; ++i) {
1403 if (gpuDynInst->exec_mask[i]) {
1404 Addr vaddr = gpuDynInst->addr[i];
1407 Wavefront *wavefront = gpuDynInst->wavefront();
1408 *d = wavefront->ldsChunk->read<c0>(vaddr);
1410 switch (this->opType) {
1411 case Enums::MO_AADD:
1412 case Enums::MO_ANRADD:
1413 wavefront->ldsChunk->write<c0>(vaddr,
1414 wavefront->ldsChunk->read<c0>(vaddr) + (*e));
1416 case Enums::MO_ASUB:
1417 case Enums::MO_ANRSUB:
1418 wavefront->ldsChunk->write<c0>(vaddr,
1419 wavefront->ldsChunk->read<c0>(vaddr) - (*e));
1421 case Enums::MO_AMAX:
1422 case Enums::MO_ANRMAX:
1423 wavefront->ldsChunk->write<c0>(vaddr,
1424 std::max(wavefront->ldsChunk->read<c0>(vaddr),
1427 case Enums::MO_AMIN:
1428 case Enums::MO_ANRMIN:
1429 wavefront->ldsChunk->write<c0>(vaddr,
1430 std::min(wavefront->ldsChunk->read<c0>(vaddr),
1433 case Enums::MO_AAND:
1434 case Enums::MO_ANRAND:
1435 wavefront->ldsChunk->write<c0>(vaddr,
1436 wavefront->ldsChunk->read<c0>(vaddr) & (*e));
1439 case Enums::MO_ANROR:
1440 wavefront->ldsChunk->write<c0>(vaddr,
1441 wavefront->ldsChunk->read<c0>(vaddr) | (*e));
1443 case Enums::MO_AXOR:
1444 case Enums::MO_ANRXOR:
1445 wavefront->ldsChunk->write<c0>(vaddr,
1446 wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
1448 case Enums::MO_AINC:
1449 case Enums::MO_ANRINC:
1450 wavefront->ldsChunk->write<c0>(vaddr,
1451 wavefront->ldsChunk->read<c0>(vaddr) + 1);
1453 case Enums::MO_ADEC:
1454 case Enums::MO_ANRDEC:
1455 wavefront->ldsChunk->write<c0>(vaddr,
1456 wavefront->ldsChunk->read<c0>(vaddr) - 1);
1458 case Enums::MO_AEXCH:
1459 case Enums::MO_ANREXCH:
1460 wavefront->ldsChunk->write<c0>(vaddr, (*e));
1462 case Enums::MO_ACAS:
1463 case Enums::MO_ANRCAS:
1464 wavefront->ldsChunk->write<c0>(vaddr,
1465 (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
1466 (*f) : wavefront->ldsChunk->read<c0>(vaddr));
1469 fatal("Unrecognized or invalid HSAIL atomic op "
1475 new Request(0, vaddr, sizeof(c0), 0,
1476 gpuDynInst->computeUnit()->masterId(),
1477 0, gpuDynInst->wfDynId, i,
1478 gpuDynInst->makeAtomicOpFunctor<c0>(e,
1481 gpuDynInst->setRequestFlags(req);
1482 PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
1485 if (gpuDynInst->computeUnit()->shader->
1486 separate_acquire_release &&
1487 (gpuDynInst->memoryOrder ==
1488 Enums::MEMORY_ORDER_SC_ACQUIRE)) {
1489 // if this atomic has acquire semantics,
1490 // schedule the continuation to perform an
1491 // acquire after the RMW completes
1492 gpuDynInst->execContinuation =
1493 &GPUStaticInst::execAtomicAcq;
1495 gpuDynInst->useContinuation = true;
1497 // the request will be finished when the RMW completes
1498 gpuDynInst->useContinuation = false;
1500 // translation is performed in sendRequest()
1501 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i,
1511 gpuDynInst->updateStats();
1514 // execAtomicACq will always be called through a continuation.
1515 // see comment for execContinuation in gpu_dyn_inst.hh
1517 execAtomicAcq(GPUDynInstPtr gpuDynInst) override
1519 // after performing the RMW, check to see if this instruction
1520 // has acquire semantics, and if so, issue an acquire
1521 if (!isLocalMem()) {
1522 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1523 && gpuDynInst->memoryOrder ==
1524 Enums::MEMORY_ORDER_SC_ACQUIRE) {
1525 gpuDynInst->statusBitVector = VectorMask(1);
1527 // the request will be finished when
1528 // the acquire completes
1529 gpuDynInst->useContinuation = false;
1531 Request *req = new Request(0, 0, 0, 0,
1532 gpuDynInst->computeUnit()->masterId(),
1533 0, gpuDynInst->wfDynId, -1);
1534 req->setFlags(Request::ACQUIRE);
1535 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1541 template<typename DataType, typename AddrOperandType, int NumSrcOperands>
1543 constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1545 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1547 if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) {
1548 return decodeLd<DataType>(ib, obj);
1549 } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) {
1551 case Brig::BRIG_TYPE_B8:
1552 return decodeSt<S8,S8>(ib, obj);
1553 case Brig::BRIG_TYPE_B16:
1554 return decodeSt<S8,S16>(ib, obj);
1555 case Brig::BRIG_TYPE_B32:
1556 return decodeSt<S8,S32>(ib, obj);
1557 case Brig::BRIG_TYPE_B64:
1558 return decodeSt<S8,S64>(ib, obj);
1559 default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type);
1562 if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET)
1563 return new AtomicInst<DataType, AddrOperandType,
1564 NumSrcOperands, false>(ib, obj, "atomicnoret");
1566 return new AtomicInst<DataType, AddrOperandType,
1567 NumSrcOperands, true>(ib, obj, "atomic");
1571 template<typename DataType, int NumSrcOperands>
1573 decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj)
1575 unsigned addrIndex = (Brig::BrigOpcode)ib->opcode ==
1576 Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1;
1578 unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex);
1580 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1582 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1583 return constructAtomic<DataType, NoRegAddrOperand,
1584 NumSrcOperands>(ib, obj);
1585 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1586 // V2/V4 not allowed
1587 switch (tmp.regKind) {
1588 case Brig::BRIG_REGISTER_KIND_SINGLE:
1589 return constructAtomic<DataType, SRegAddrOperand,
1590 NumSrcOperands>(ib, obj);
1591 case Brig::BRIG_REGISTER_KIND_DOUBLE:
1592 return constructAtomic<DataType, DRegAddrOperand,
1593 NumSrcOperands>(ib, obj);
1595 fatal("Bad atomic register operand type %d\n", tmp.type);
1598 fatal("Bad atomic register operand kind %d\n", tmp.kind);
1603 template<typename DataType>
1605 decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1607 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1609 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1610 return decodeAtomicHelper<DataType, 2>(ib, obj);
1612 return decodeAtomicHelper<DataType, 1>(ib, obj);
1616 template<typename DataType>
1618 decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj)
1620 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1621 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1622 return decodeAtomicHelper<DataType, 2>(ib, obj);
1624 return decodeAtomicHelper<DataType, 1>(ib, obj);
1627 } // namespace HsailISA
1629 #endif // __ARCH_HSAIL_INSTS_MEM_HH__