arch: Make and use endian specific versions of the mem helpers.

[gem5.git] / src / gpu-compute / gpu_static_inst.hh
diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh

index c1de28427e19ee52aa8679fd979ddd08d8de93e9..7b3dc7015ce6fe112d907e3f09c32dab8adf9742 100644 (file)
--- a/src/gpu-compute/gpu_static_inst.hh
+++ b/src/gpu-compute/gpu_static_inst.hh
@@ -14,9 +14,9 @@
   * this list of conditions and the following disclaimer in the documentation
   * and/or other materials provided with the distribution.
   *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
   *
   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -30,7 +30,7 @@
   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   * POSSIBILITY OF SUCH DAMAGE.
   *
- * Author: Anthony Gutierrez
+ * Authors: Anthony Gutierrez
   */
  
  #ifndef __GPU_STATIC_INST_HH__
@@ -48,7 +48,7 @@
  #include <cstdint>
  #include <string>
  
-#include "enums/OpType.hh"
+#include "enums/GPUStaticInstFlags.hh"
  #include "enums/StorageClassType.hh"
  #include "gpu-compute/gpu_dyn_inst.hh"
  #include "gpu-compute/misc.hh"
@@ -57,10 +57,13 @@ class BaseOperand;
  class BaseRegOperand;
  class Wavefront;
  
-class GPUStaticInst
+class GPUStaticInst : public GPUStaticInstFlags
  {
    public:
      GPUStaticInst(const std::string &opcode);
+    void instAddr(int inst_addr) { _instAddr = inst_addr; }
+    int instAddr() const { return _instAddr; }
+    int nextInstAddr() const { return _instAddr + instSize(); }
  
      void instNum(int num) { _instNum = num; }
  
@@ -72,7 +75,7 @@ class GPUStaticInst
  
      virtual void execute(GPUDynInstPtr gpuDynInst) = 0;
      virtual void generateDisassembly() = 0;
-    virtual const std::string &disassemble() = 0;
+    const std::string& disassemble();
      virtual int getNumOperands() = 0;
      virtual bool isCondRegister(int operandIndex) = 0;
      virtual bool isScalarRegister(int operandIndex) = 0;
@@ -80,54 +83,144 @@ class GPUStaticInst
      virtual bool isSrcOperand(int operandIndex) = 0;
      virtual bool isDstOperand(int operandIndex) = 0;
      virtual int getOperandSize(int operandIndex) = 0;
-    virtual int getRegisterIndex(int operandIndex) = 0;
+
+    virtual int getRegisterIndex(int operandIndex,
+                                 GPUDynInstPtr gpuDynInst) = 0;
+
      virtual int numDstRegOperands() = 0;
      virtual int numSrcRegOperands() = 0;
  
-    /*
-     * Most instructions (including all HSAIL instructions)
-     * are vector ops, so _scalarOp will be false by default.
-     * Derived instruction objects that are scalar ops must
-     * set _scalarOp to true in their constructors.
-     */
-    bool scalarOp() const { return _scalarOp; }
+    virtual bool isValid() const = 0;
+
+    bool isALU() const { return _flags[ALU]; }
+    bool isBranch() const { return _flags[Branch]; }
+    bool isNop() const { return _flags[Nop]; }
+    bool isReturn() const { return _flags[Return]; }
  
-    virtual bool isLocalMem() const
+    bool
+    isUnconditionalJump() const
      {
-        fatal("calling isLocalMem() on non-memory instruction.\n");
+        return _flags[UnconditionalJump];
+    }
+
+    bool isSpecialOp() const { return _flags[SpecialOp]; }
+    bool isWaitcnt() const { return _flags[Waitcnt]; }
  
-        return false;
+    bool isBarrier() const { return _flags[MemBarrier]; }
+    bool isMemFence() const { return _flags[MemFence]; }
+    bool isMemRef() const { return _flags[MemoryRef]; }
+    bool isFlat() const { return _flags[Flat]; }
+    bool isLoad() const { return _flags[Load]; }
+    bool isStore() const { return _flags[Store]; }
+
+    bool
+    isAtomic() const
+    {
+        return _flags[AtomicReturn] || _flags[AtomicNoReturn];
      }
  
-    bool isArgLoad() { return false; }
-    virtual uint32_t instSize() = 0;
+    bool isAtomicNoRet() const { return _flags[AtomicNoReturn]; }
+    bool isAtomicRet() const { return _flags[AtomicReturn]; }
  
-    // only used for memory instructions
-    virtual void
-    initiateAcc(GPUDynInstPtr gpuDynInst)
+    bool isScalar() const { return _flags[Scalar]; }
+    bool readsSCC() const { return _flags[ReadsSCC]; }
+    bool writesSCC() const { return _flags[WritesSCC]; }
+    bool readsVCC() const { return _flags[ReadsVCC]; }
+    bool writesVCC() const { return _flags[WritesVCC]; }
+
+    bool isAtomicAnd() const { return _flags[AtomicAnd]; }
+    bool isAtomicOr() const { return _flags[AtomicOr]; }
+    bool isAtomicXor() const { return _flags[AtomicXor]; }
+    bool isAtomicCAS() const { return _flags[AtomicCAS]; }
+    bool isAtomicExch() const { return _flags[AtomicExch]; }
+    bool isAtomicAdd() const { return _flags[AtomicAdd]; }
+    bool isAtomicSub() const { return _flags[AtomicSub]; }
+    bool isAtomicInc() const { return _flags[AtomicInc]; }
+    bool isAtomicDec() const { return _flags[AtomicDec]; }
+    bool isAtomicMax() const { return _flags[AtomicMax]; }
+    bool isAtomicMin() const { return _flags[AtomicMin]; }
+
+    bool
+    isArgLoad() const
      {
-        fatal("calling initiateAcc() on a non-memory instruction.\n");
+        return (_flags[KernArgSegment] || _flags[ArgSegment]) && _flags[Load];
      }
  
-    virtual uint32_t getTargetPc() { return 0; }
+    bool
+    isGlobalMem() const
+    {
+        return _flags[MemoryRef] && (_flags[GlobalSegment] ||
+               _flags[PrivateSegment] || _flags[ReadOnlySegment] ||
+               _flags[SpillSegment]);
+    }
+
+    bool
+    isLocalMem() const
+    {
+        return _flags[MemoryRef] && _flags[GroupSegment];
+    }
+
+    bool isArgSeg() const { return _flags[ArgSegment]; }
+    bool isGlobalSeg() const { return _flags[GlobalSegment]; }
+    bool isGroupSeg() const { return _flags[GroupSegment]; }
+    bool isKernArgSeg() const { return _flags[KernArgSegment]; }
+    bool isPrivateSeg() const { return _flags[PrivateSegment]; }
+    bool isReadOnlySeg() const { return _flags[ReadOnlySegment]; }
+    bool isSpillSeg() const { return _flags[SpillSegment]; }
+
+    bool isWorkitemScope() const { return _flags[WorkitemScope]; }
+    bool isWavefrontScope() const { return _flags[WavefrontScope]; }
+    bool isWorkgroupScope() const { return _flags[WorkgroupScope]; }
+    bool isDeviceScope() const { return _flags[DeviceScope]; }
+    bool isSystemScope() const { return _flags[SystemScope]; }
+    bool isNoScope() const { return _flags[NoScope]; }
+
+    bool isRelaxedOrder() const { return _flags[RelaxedOrder]; }
+    bool isAcquire() const { return _flags[Acquire]; }
+    bool isRelease() const { return _flags[Release]; }
+    bool isAcquireRelease() const { return _flags[AcquireRelease]; }
+    bool isNoOrder() const { return _flags[NoOrder]; }
  
      /**
-     * Query whether the instruction is an unconditional jump i.e., the jump
-     * is always executed because there is no condition to be evaluated.
+     * Coherence domain of a memory instruction. Only valid for
+     * machine ISA. The coherence domain specifies where it is
+     * possible to perform memory synchronization, e.g., acquire
+     * or release, from the shader kernel.
       *
-     * If the instruction is not of branch type, the result is always false.
+     * isGloballyCoherent(): returns true if kernel is sharing memory
+     * with other work-items on the same device (GPU)
       *
-     * @return True if the instruction is an unconditional jump.
+     * isSystemCoherent(): returns true if kernel is sharing memory
+     * with other work-items on a different device (GPU) or the host (CPU)
       */
-    virtual bool unconditionalJumpInstruction() { return false; }
+    bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
+    bool isSystemCoherent() const { return _flags[SystemCoherent]; }
+
+    virtual int instSize() const = 0;
+
+    // only used for memory instructions
+    virtual void
+    initiateAcc(GPUDynInstPtr gpuDynInst)
+    {
+        fatal("calling initiateAcc() on a non-memory instruction.\n");
+    }
+
+    // only used for memory instructions
+    virtual void
+    completeAcc(GPUDynInstPtr gpuDynInst)
+    {
+        fatal("calling completeAcc() on a non-memory instruction.\n");
+    }
+
+    virtual uint32_t getTargetPc() { return 0; }
  
      static uint64_t dynamic_id_count;
  
-    Enums::OpType o_type;
      // For flat memory accesses
      Enums::StorageClassType executed_as;
  
-  protected:
+    void setFlag(Flags flag) { _flags[flag] = true; }
+
      virtual void
      execLdAcq(GPUDynInstPtr gpuDynInst)
      {
@@ -152,15 +245,61 @@ class GPUStaticInst
          fatal("calling execAtomicAcq() on a non-atomic instruction.\n");
      }
  
+  protected:
      const std::string opcode;
      std::string disassembly;
      int _instNum;
+    int _instAddr;
      /**
       * Identifier of the immediate post-dominator instruction.
       */
      int _ipdInstNum;
  
-    bool _scalarOp;
+    std::bitset<Num_Flags> _flags;
+};
+
+class KernelLaunchStaticInst : public GPUStaticInst
+{
+  public:
+    KernelLaunchStaticInst() : GPUStaticInst("kernel_launch")
+    {
+        setFlag(Nop);
+        setFlag(Scalar);
+        setFlag(Acquire);
+        setFlag(SystemScope);
+        setFlag(GlobalSegment);
+    }
+
+    void
+    execute(GPUDynInstPtr gpuDynInst) override
+    {
+        fatal("kernel launch instruction should not be executed\n");
+    }
+
+    void
+    generateDisassembly() override
+    {
+        disassembly = opcode;
+    }
+
+    int getNumOperands() override { return 0; }
+    bool isCondRegister(int operandIndex) override { return false; }
+    bool isScalarRegister(int operandIndex) override { return false; }
+    bool isVectorRegister(int operandIndex) override { return false; }
+    bool isSrcOperand(int operandIndex) override { return false; }
+    bool isDstOperand(int operandIndex) override { return false; }
+    int getOperandSize(int operandIndex) override { return 0; }
+
+    int
+    getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
+    {
+        return 0;
+    }
+
+    int numDstRegOperands() override { return 0; }
+    int numSrcRegOperands() override { return 0; }
+    bool isValid() const override { return true; }
+    int instSize() const override { return 0; }
  };
  
  #endif // __GPU_STATIC_INST_HH__