src/gpu-compute/gpu_static_inst.hh

   1 /*
   2  * Copyright (c) 2015 Advanced Micro Devices, Inc.
   3  * All rights reserved.
   4  *
   5  * For use for simulation and test purposes only
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright notice,
  11  * this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright notice,
  14  * this list of conditions and the following disclaimer in the documentation
  15  * and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the copyright holder nor the names of its
  18  * contributors may be used to endorse or promote products derived from this
  19  * software without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #ifndef __GPU_STATIC_INST_HH__
  35 #define __GPU_STATIC_INST_HH__
  36
  37 /*
  38  * @file gpu_static_inst.hh
  39  *
  40  * Defines the base class representing static instructions for the GPU. The
  41  * instructions are "static" because they contain no dynamic instruction
  42  * information. GPUStaticInst corresponds to the StaticInst class for the CPU
  43  * models.
  44  */
  45
  46 #include <cstdint>
  47 #include <string>
  48
  49 #include "enums/GPUStaticInstFlags.hh"
  50 #include "enums/StorageClassType.hh"
  51 #include "gpu-compute/gpu_dyn_inst.hh"
  52 #include "gpu-compute/misc.hh"
  53
  54 class BaseOperand;
  55 class BaseRegOperand;
  56 class Wavefront;
  57
  58 class GPUStaticInst : public GPUStaticInstFlags
  59 {
  60   public:
  61     GPUStaticInst(const std::string &opcode);
  62     virtual ~GPUStaticInst() { }
  63     void instAddr(int inst_addr) { _instAddr = inst_addr; }
  64     int instAddr() const { return _instAddr; }
  65     int nextInstAddr() const { return _instAddr + instSize(); }
  66
  67     void instNum(int num) { _instNum = num; }
  68
  69     int instNum() { return _instNum;  }
  70
  71     void ipdInstNum(int num) { _ipdInstNum = num; }
  72
  73     int ipdInstNum() const { return _ipdInstNum; }
  74
  75     virtual TheGpuISA::ScalarRegU32 srcLiteral() const { return 0; }
  76
  77     virtual void execute(GPUDynInstPtr gpuDynInst) = 0;
  78     virtual void generateDisassembly() = 0;
  79     const std::string& disassemble();
  80     virtual int getNumOperands() = 0;
  81     virtual bool isScalarRegister(int operandIndex) = 0;
  82     virtual bool isVectorRegister(int operandIndex) = 0;
  83     virtual bool isSrcOperand(int operandIndex) = 0;
  84     virtual bool isDstOperand(int operandIndex) = 0;
  85     virtual bool isFlatScratchRegister(int opIdx) = 0;
  86     virtual bool isExecMaskRegister(int opIdx) = 0;
  87     virtual int getOperandSize(int operandIndex) = 0;
  88
  89     virtual int getRegisterIndex(int operandIndex,
  90                                  GPUDynInstPtr gpuDynInst) = 0;
  91
  92     virtual int numDstRegOperands() = 0;
  93     virtual int numSrcRegOperands() = 0;
  94
  95     virtual int coalescerTokenCount() const { return 0; }
  96
  97     int numDstVecOperands();
  98     int numSrcVecOperands();
  99     int numDstVecDWORDs();
 100     int numSrcVecDWORDs();
 101
 102     int numOpdDWORDs(int operandIdx);
 103
 104     bool isALU() const { return _flags[ALU]; }
 105     bool isBranch() const { return _flags[Branch]; }
 106     bool isCondBranch() const { return _flags[CondBranch]; }
 107     bool isNop() const { return _flags[Nop]; }
 108     bool isReturn() const { return _flags[Return]; }
 109     bool isEndOfKernel() const { return _flags[EndOfKernel]; }
 110     bool isKernelLaunch() const { return _flags[KernelLaunch]; }
 111     bool isSDWAInst() const { return _flags[IsSDWA]; }
 112     bool isDPPInst() const { return _flags[IsDPP]; }
 113
 114     bool
 115     isUnconditionalJump() const
 116     {
 117         return _flags[UnconditionalJump];
 118     }
 119
 120     bool isSpecialOp() const { return _flags[SpecialOp]; }
 121     bool isWaitcnt() const { return _flags[Waitcnt]; }
 122
 123     bool isBarrier() const { return _flags[MemBarrier]; }
 124     bool isMemSync() const { return _flags[MemSync]; }
 125     bool isMemRef() const { return _flags[MemoryRef]; }
 126     bool isFlat() const { return _flags[Flat]; }
 127     bool isLoad() const { return _flags[Load]; }
 128     bool isStore() const { return _flags[Store]; }
 129
 130     bool
 131     isAtomic() const
 132     {
 133         return _flags[AtomicReturn] || _flags[AtomicNoReturn];
 134     }
 135
 136     bool isAtomicNoRet() const { return _flags[AtomicNoReturn]; }
 137     bool isAtomicRet() const { return _flags[AtomicReturn]; }
 138
 139     bool isScalar() const { return _flags[Scalar]; }
 140     bool readsSCC() const { return _flags[ReadsSCC]; }
 141     bool writesSCC() const { return _flags[WritesSCC]; }
 142     bool readsVCC() const { return _flags[ReadsVCC]; }
 143     bool writesVCC() const { return _flags[WritesVCC]; }
 144     // Identify instructions that implicitly read the Execute mask
 145     // as a source operand but not to dictate which threads execute.
 146     bool readsEXEC() const { return _flags[ReadsEXEC]; }
 147     bool writesEXEC() const { return _flags[WritesEXEC]; }
 148     bool readsMode() const { return _flags[ReadsMode]; }
 149     bool writesMode() const { return _flags[WritesMode]; }
 150     bool ignoreExec() const { return _flags[IgnoreExec]; }
 151
 152     bool isAtomicAnd() const { return _flags[AtomicAnd]; }
 153     bool isAtomicOr() const { return _flags[AtomicOr]; }
 154     bool isAtomicXor() const { return _flags[AtomicXor]; }
 155     bool isAtomicCAS() const { return _flags[AtomicCAS]; }
 156     bool isAtomicExch() const { return _flags[AtomicExch]; }
 157     bool isAtomicAdd() const { return _flags[AtomicAdd]; }
 158     bool isAtomicSub() const { return _flags[AtomicSub]; }
 159     bool isAtomicInc() const { return _flags[AtomicInc]; }
 160     bool isAtomicDec() const { return _flags[AtomicDec]; }
 161     bool isAtomicMax() const { return _flags[AtomicMax]; }
 162     bool isAtomicMin() const { return _flags[AtomicMin]; }
 163
 164     bool
 165     isArgLoad() const
 166     {
 167         return (_flags[KernArgSegment] || _flags[ArgSegment]) && _flags[Load];
 168     }
 169
 170     bool
 171     isGlobalMem() const
 172     {
 173         return _flags[MemoryRef] && (_flags[GlobalSegment] ||
 174                _flags[PrivateSegment] || _flags[ReadOnlySegment] ||
 175                _flags[SpillSegment]);
 176     }
 177
 178     bool
 179     isLocalMem() const
 180     {
 181         return _flags[MemoryRef] && _flags[GroupSegment];
 182     }
 183
 184     bool isArgSeg() const { return _flags[ArgSegment]; }
 185     bool isGlobalSeg() const { return _flags[GlobalSegment]; }
 186     bool isGroupSeg() const { return _flags[GroupSegment]; }
 187     bool isKernArgSeg() const { return _flags[KernArgSegment]; }
 188     bool isPrivateSeg() const { return _flags[PrivateSegment]; }
 189     bool isReadOnlySeg() const { return _flags[ReadOnlySegment]; }
 190     bool isSpillSeg() const { return _flags[SpillSegment]; }
 191
 192     /**
 193      * Coherence domain of a memory instruction. The coherence domain
 194      * specifies where it is possible to perform memory synchronization
 195      * (e.g., acquire or release) from the shader kernel.
 196      *
 197      * isGloballyCoherent(): returns true if WIs share same device
 198      * isSystemCoherent(): returns true if WIs or threads in different
 199      *                     devices share memory
 200      *
 201      */
 202     bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
 203     bool isSystemCoherent() const { return _flags[SystemCoherent]; }
 204
 205     // Floating-point instructions
 206     bool isF16() const { return _flags[F16]; }
 207     bool isF32() const { return _flags[F32]; }
 208     bool isF64() const { return _flags[F64]; }
 209
 210     // FMA, MAC, MAD instructions
 211     bool isFMA() const { return _flags[FMA]; }
 212     bool isMAC() const { return _flags[MAC]; }
 213     bool isMAD() const { return _flags[MAD]; }
 214
 215     virtual int instSize() const = 0;
 216
 217     // only used for memory instructions
 218     virtual void
 219     initiateAcc(GPUDynInstPtr gpuDynInst)
 220     {
 221         fatal("calling initiateAcc() on a non-memory instruction.\n");
 222     }
 223
 224     // only used for memory instructions
 225     virtual void
 226     completeAcc(GPUDynInstPtr gpuDynInst)
 227     {
 228         fatal("calling completeAcc() on a non-memory instruction.\n");
 229     }
 230
 231     virtual uint32_t getTargetPc() { return 0; }
 232
 233     static uint64_t dynamic_id_count;
 234
 235     // For flat memory accesses
 236     Enums::StorageClassType executed_as;
 237
 238     void setFlag(Flags flag) {
 239         _flags[flag] = true;
 240
 241         if (isGroupSeg()) {
 242             executed_as = Enums::SC_GROUP;
 243         } else if (isGlobalSeg()) {
 244             executed_as = Enums::SC_GLOBAL;
 245         } else if (isPrivateSeg()) {
 246             executed_as = Enums::SC_PRIVATE;
 247         } else if (isSpillSeg()) {
 248             executed_as = Enums::SC_SPILL;
 249         } else if (isReadOnlySeg()) {
 250             executed_as = Enums::SC_READONLY;
 251         } else if (isKernArgSeg()) {
 252             executed_as = Enums::SC_KERNARG;
 253         } else if (isArgSeg()) {
 254             executed_as = Enums::SC_ARG;
 255         }
 256     }
 257     const std::string& opcode() const { return _opcode; }
 258
 259   protected:
 260     const std::string _opcode;
 261     std::string disassembly;
 262     int _instNum;
 263     int _instAddr;
 264     int srcVecOperands;
 265     int dstVecOperands;
 266     int srcVecDWORDs;
 267     int dstVecDWORDs;
 268     /**
 269      * Identifier of the immediate post-dominator instruction.
 270      */
 271     int _ipdInstNum;
 272
 273     std::bitset<Num_Flags> _flags;
 274 };
 275
 276 class KernelLaunchStaticInst : public GPUStaticInst
 277 {
 278   public:
 279     KernelLaunchStaticInst() : GPUStaticInst("kernel_launch")
 280     {
 281         setFlag(Nop);
 282         setFlag(KernelLaunch);
 283         setFlag(MemSync);
 284         setFlag(Scalar);
 285         setFlag(GlobalSegment);
 286     }
 287
 288     void
 289     execute(GPUDynInstPtr gpuDynInst) override
 290     {
 291         fatal("kernel launch instruction should not be executed\n");
 292     }
 293
 294     void
 295     generateDisassembly() override
 296     {
 297         disassembly = _opcode;
 298     }
 299
 300     int getNumOperands() override { return 0; }
 301     bool isFlatScratchRegister(int opIdx) override { return false; }
 302     // return true if the Execute mask is explicitly used as a source
 303     // register operand
 304     bool isExecMaskRegister(int opIdx) override { return false; }
 305     bool isScalarRegister(int operandIndex) override { return false; }
 306     bool isVectorRegister(int operandIndex) override { return false; }
 307     bool isSrcOperand(int operandIndex) override { return false; }
 308     bool isDstOperand(int operandIndex) override { return false; }
 309     int getOperandSize(int operandIndex) override { return 0; }
 310
 311     int
 312     getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
 313     {
 314         return 0;
 315     }
 316
 317     int numDstRegOperands() override { return 0; }
 318     int numSrcRegOperands() override { return 0; }
 319     int instSize() const override { return 0; }
 320 };
 321
 322 #endif // __GPU_STATIC_INST_HH__