mem: misc flags for AMD gpu model

author Blake Hechtman <blake.hechtman@amd.com>

Mon, 20 Jul 2015 14:15:18 +0000 (09:15 -0500)

committer Blake Hechtman <blake.hechtman@amd.com>

Mon, 20 Jul 2015 14:15:18 +0000 (09:15 -0500)
author Blake Hechtman <blake.hechtman@amd.com>
Mon, 20 Jul 2015 14:15:18 +0000 (09:15 -0500)
committer Blake Hechtman <blake.hechtman@amd.com>
Mon, 20 Jul 2015 14:15:18 +0000 (09:15 -0500)
diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm

index 617989d15f6440109286ce3cf19fda79195e44f9..6fedfeb2d15a0599211ba3c63554b4048b07b8f1 100644 (file)
--- a/src/mem/protocol/RubySlicc_Exports.sm
+++ b/src/mem/protocol/RubySlicc_Exports.sm
@@ -41,7 +41,7 @@ external_type(Tick, primitive="yes", default="0");
  
  structure(DataBlock, external = "yes", desc="..."){
    void clear();
-  void copyPartial(DataBlock, int, int);
+  void atomicPartial(DataBlock, WriteMask);
  }
  
  bool testAndRead(Addr addr, DataBlock datablk, Packet *pkt);
@@ -78,6 +78,26 @@ enumeration(AccessPermission, desc="...", default="AccessPermission_NotPresent")
    NotPresent, desc="block is NotPresent";
    Busy,       desc="block is in a transient state, currently invalid";
  }
+//HSA scopes
+enumeration(HSAScope, desc="...", default="HSAScope_UNSPECIFIED") {
+  UNSPECIFIED, desc="Unspecified scope";
+  NOSCOPE,     desc="Explictly unscoped";
+  WAVEFRONT,   desc="Wavefront scope";
+  WORKGROUP,   desc="Workgroup scope";
+  DEVICE,      desc="Device scope";
+  SYSTEM,      desc="System scope";
+}
+
+// HSA segment types
+enumeration(HSASegment, desc="...", default="HSASegment_GLOBAL") {
+  GLOBAL,   desc="Global segment";
+  GROUP,    desc="Group segment";
+  PRIVATE,  desc="Private segment";
+  KERNARG,  desc="Kernarg segment";
+  READONLY, desc="Readonly segment";
+  SPILL,    desc="Spill segment";
+  ARG,      desc="Arg segment";
+}
  
  // TesterStatus
  enumeration(TesterStatus, desc="...") {
@@ -143,9 +163,10 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") {
  }
  
  enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") {
-  Default,    desc="Replace this with access_types passed to the DMA Ruby object";
+  Default,     desc="Replace this with access_types passed to the DMA Ruby object";
    LD,          desc="Load";
    ST,          desc="Store";
+  FLUSH,       desc="Flush request type";
    NULL,        desc="Invalid request type";
  }
  
diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm

index 8e846098c433018c6698c316e835c0b876f9d4f6..c7479089bae55a74005f3115e2ebdf724002f20d 100644 (file)
--- a/src/mem/protocol/RubySlicc_Types.sm
+++ b/src/mem/protocol/RubySlicc_Types.sm
@@ -126,6 +126,8 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") {
    int Size,                  desc="size in bytes of access";
    PrefetchBit Prefetch,      desc="Is this a prefetch request";
    int contextId,             desc="this goes away but must be replace with Nilay";
+  HSAScope scope,            desc="HSA scope";
+  HSASegment segment,        desc="HSA segment";
  }
  
  structure(AbstractEntry, primitive="yes", external = "yes") {
diff --git a/src/mem/request.hh b/src/mem/request.hh

index de781f5d6f0dc8533ebca2432bfa327871434f15..bb5e5d59c313da1e2ede09929b64cfc56a25bf1e 100644 (file)
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -160,6 +160,12 @@ class Request
          /** The request should be marked with RELEASE. */
          RELEASE                     = 0x00040000,
  
+        /** The request should be marked with KERNEL.
+          * Used to indicate the synchronization associated with a GPU kernel
+          * launch or completion.
+          */
+        KERNEL                      = 0x00001000,
+
          /**
           * The request should be handled by the generic IPR code (only
           * valid together with MMAPPED_IPR)
@@ -198,6 +204,37 @@ class Request
      };
      /** @} */
  
+    typedef uint32_t MemSpaceConfigFlagsType;
+    typedef ::Flags<MemSpaceConfigFlagsType> MemSpaceConfigFlags;
+
+    enum : MemSpaceConfigFlagsType {
+        /** Has a synchronization scope been set? */
+        SCOPE_VALID            = 0x00000001,
+        /** Access has Wavefront scope visibility */
+        WAVEFRONT_SCOPE        = 0x00000002,
+        /** Access has Workgroup scope visibility */
+        WORKGROUP_SCOPE        = 0x00000004,
+        /** Access has Device (e.g., GPU) scope visibility */
+        DEVICE_SCOPE           = 0x00000008,
+        /** Access has System (e.g., CPU + GPU) scope visibility */
+        SYSTEM_SCOPE           = 0x00000010,
+
+        /** Global Segment */
+        GLOBAL_SEGMENT         = 0x00000020,
+        /** Group Segment */
+        GROUP_SEGMENT          = 0x00000040,
+        /** Private Segment */
+        PRIVATE_SEGMENT        = 0x00000080,
+        /** Kergarg Segment */
+        KERNARG_SEGMENT        = 0x00000100,
+        /** Readonly Segment */
+        READONLY_SEGMENT       = 0x00000200,
+        /** Spill Segment */
+        SPILL_SEGMENT          = 0x00000400,
+        /** Arg Segment */
+        ARG_SEGMENT            = 0x00000800,
+    };
+
    private:
      typedef uint8_t PrivateFlagsType;
      typedef ::Flags<PrivateFlagsType> PrivateFlags;
@@ -268,6 +305,9 @@ class Request
      /** Flag structure for the request. */
      Flags _flags;
  
+    /** Memory space configuraiton flag structure for the request. */
+    MemSpaceConfigFlags _memSpaceConfigFlags;
+
      /** Private flags for field validity checking. */
      PrivateFlags privateFlags;
  
@@ -520,6 +560,13 @@ class Request
          _flags.set(flags);
      }
  
+    void
+    setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags)
+    {
+        assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR));
+        _memSpaceConfigFlags.set(extraFlags);
+    }
+
      /** Accessor function for vaddr.*/
      bool
      hasVaddr() const
@@ -685,7 +732,7 @@ class Request
          _reqInstSeqNum = seq_num;
      }
  
-    /** Accessor functions for flags.  Note that these are for testing
+    /** Accessor functions for flags. Note that these are for testing
          only; setting flags should be done via setFlags(). */
      bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); }
      bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); }
@@ -701,6 +748,88 @@ class Request
      bool isPTWalk() const { return _flags.isSet(PT_WALK); }
      bool isAcquire() const { return _flags.isSet(ACQUIRE); }
      bool isRelease() const { return _flags.isSet(RELEASE); }
+    bool isKernel() const { return _flags.isSet(KERNEL); }
+
+    /**
+     * Accessor functions for the memory space configuration flags and used by
+     * GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that
+     * these are for testing only; setting extraFlags should be done via
+     * setMemSpaceConfigFlags().
+     */
+    bool isScoped() const { return _memSpaceConfigFlags.isSet(SCOPE_VALID); }
+
+    bool
+    isWavefrontScope() const
+    {
+        assert(isScoped());
+        return _memSpaceConfigFlags.isSet(WAVEFRONT_SCOPE);
+    }
+
+    bool
+    isWorkgroupScope() const
+    {
+        assert(isScoped());
+        return _memSpaceConfigFlags.isSet(WORKGROUP_SCOPE);
+    }
+
+    bool
+    isDeviceScope() const
+    {
+        assert(isScoped());
+        return _memSpaceConfigFlags.isSet(DEVICE_SCOPE);
+    }
+
+    bool
+    isSystemScope() const
+    {
+        assert(isScoped());
+        return _memSpaceConfigFlags.isSet(SYSTEM_SCOPE);
+    }
+
+    bool
+    isGlobalSegment() const
+    {
+        return _memSpaceConfigFlags.isSet(GLOBAL_SEGMENT) ||
+               (!isGroupSegment() && !isPrivateSegment() &&
+                !isKernargSegment() && !isReadonlySegment() &&
+                !isSpillSegment() && !isArgSegment());
+    }
+
+    bool
+    isGroupSegment() const
+    {
+        return _memSpaceConfigFlags.isSet(GROUP_SEGMENT);
+    }
+
+    bool
+    isPrivateSegment() const
+    {
+        return _memSpaceConfigFlags.isSet(PRIVATE_SEGMENT);
+    }
+
+    bool
+    isKernargSegment() const
+    {
+        return _memSpaceConfigFlags.isSet(KERNARG_SEGMENT);
+    }
+
+    bool
+    isReadonlySegment() const
+    {
+        return _memSpaceConfigFlags.isSet(READONLY_SEGMENT);
+    }
+
+    bool
+    isSpillSegment() const
+    {
+        return _memSpaceConfigFlags.isSet(SPILL_SEGMENT);
+    }
+
+    bool
+    isArgSegment() const
+    {
+        return _memSpaceConfigFlags.isSet(ARG_SEGMENT);
+    }
  };
  
  #endif // __MEM_REQUEST_HH__
diff --git a/src/mem/ruby/common/DataBlock.hh b/src/mem/ruby/common/DataBlock.hh

index ac08fac82517408cbc8119b6e860215492e6ba27..49ce3624adfd858f2c3afc458914c7fd569a43bf 100644 (file)
--- a/src/mem/ruby/common/DataBlock.hh
+++ b/src/mem/ruby/common/DataBlock.hh
@@ -60,7 +60,6 @@ class DataBlock
      const uint8_t *getData(int offset, int len) const;
      void setByte(int whichByte, uint8_t data);
      void setData(const uint8_t *data, int offset, int len);
-    void copyPartial(const DataBlock & dblk, int offset, int len);
      bool equal(const DataBlock& obj) const;
      void print(std::ostream& out) const;
  
diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh

index b17269a78452b72ad3b51984470fecd820a7718e..73f214a205d490d87e378aa8c0ce9fc3ee3308bd 100644 (file)
--- a/src/mem/ruby/slicc_interface/RubyRequest.hh
+++ b/src/mem/ruby/slicc_interface/RubyRequest.hh
@@ -30,12 +30,16 @@
  #define __MEM_RUBY_SLICC_INTERFACE_RUBY_REQUEST_HH__
  
  #include <ostream>
+#include <vector>
  
+#include "mem/protocol/HSAScope.hh"
+#include "mem/protocol/HSASegment.hh"
  #include "mem/protocol/Message.hh"
  #include "mem/protocol/PrefetchBit.hh"
  #include "mem/protocol/RubyAccessMode.hh"
  #include "mem/protocol/RubyRequestType.hh"
  #include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/DataBlock.hh"
  
  class RubyRequest : public Message
  {
@@ -50,11 +54,41 @@ class RubyRequest : public Message
      uint8_t* data;
      PacketPtr pkt;
      ContextID m_contextId;
+    int m_wfid;
+    HSAScope m_scope;
+    HSASegment m_segment;
+
  
      RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
          uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
          PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No,
-        ContextID _proc_id = 100)
+        ContextID _proc_id = 100, ContextID _core_id = 99,
+        HSAScope _scope = HSAScope_UNSPECIFIED,
+        HSASegment _segment = HSASegment_GLOBAL)
+        : Message(curTime),
+          m_PhysicalAddress(_paddr),
+          m_Type(_type),
+          m_ProgramCounter(_pc),
+          m_AccessMode(_access_mode),
+          m_Size(_len),
+          m_Prefetch(_pb),
+          data(_data),
+          pkt(_pkt),
+          m_contextId(_core_id),
+          m_scope(_scope),
+          m_segment(_segment)
+    {
+        m_LineAddress = makeLineAddress(m_PhysicalAddress);
+    }
+
+    RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
+        uint64_t _pc, RubyRequestType _type,
+        RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
+        unsigned _proc_id, unsigned _core_id,
+        int _wm_size, std::vector<bool> & _wm_mask,
+        DataBlock & _Data,
+        HSAScope _scope = HSAScope_UNSPECIFIED,
+        HSASegment _segment = HSASegment_GLOBAL)
          : Message(curTime),
            m_PhysicalAddress(_paddr),
            m_Type(_type),
@@ -64,11 +98,41 @@ class RubyRequest : public Message
            m_Prefetch(_pb),
            data(_data),
            pkt(_pkt),
-          m_contextId(_proc_id)
+          m_contextId(_core_id),
+          m_wfid(_proc_id),
+          m_scope(_scope),
+          m_segment(_segment)
      {
-      m_LineAddress = makeLineAddress(m_PhysicalAddress);
+        m_LineAddress = makeLineAddress(m_PhysicalAddress);
      }
  
+    RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
+        uint64_t _pc, RubyRequestType _type,
+        RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
+        unsigned _proc_id, unsigned _core_id,
+        int _wm_size, std::vector<bool> & _wm_mask,
+        DataBlock & _Data,
+        std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps,
+        HSAScope _scope = HSAScope_UNSPECIFIED,
+        HSASegment _segment = HSASegment_GLOBAL)
+        : Message(curTime),
+          m_PhysicalAddress(_paddr),
+          m_Type(_type),
+          m_ProgramCounter(_pc),
+          m_AccessMode(_access_mode),
+          m_Size(_len),
+          m_Prefetch(_pb),
+          data(_data),
+          pkt(_pkt),
+          m_contextId(_core_id),
+          m_wfid(_proc_id),
+          m_scope(_scope),
+          m_segment(_segment)
+    {
+        m_LineAddress = makeLineAddress(m_PhysicalAddress);
+    }
+
+
      RubyRequest(Tick curTime) : Message(curTime) {}
      MsgPtr clone() const
      { return std::shared_ptr<Message>(new RubyRequest(*this)); }
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc

index 52acaf8c354b25e1be463e202e48a5cfe2051cd3..5a5f528bbaff07b75d07999c225c65f2b10bfd72 100644 (file)
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -237,25 +237,27 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
  
      // Check for pio requests and directly send them to the dedicated
      // pio port.
-    if (!isPhysMemAddress(pkt->getAddr())) {
-        assert(ruby_port->memMasterPort.isConnected());
-        DPRINTF(RubyPort, "Request address %#x assumed to be a pio address\n",
-                pkt->getAddr());
-
-        // Save the port in the sender state object to be used later to
-        // route the response
-        pkt->pushSenderState(new SenderState(this));
+    if (pkt->cmd != MemCmd::MemFenceReq) {
+        if (!isPhysMemAddress(pkt->getAddr())) {
+            assert(ruby_port->memMasterPort.isConnected());
+            DPRINTF(RubyPort, "Request address %#x assumed to be a "
+                    "pio address\n", pkt->getAddr());
+
+            // Save the port in the sender state object to be used later to
+            // route the response
+            pkt->pushSenderState(new SenderState(this));
+
+            // send next cycle
+            RubySystem *rs = ruby_port->m_ruby_system;
+            ruby_port->memMasterPort.schedTimingReq(pkt,
+                curTick() + rs->clockPeriod());
+            return true;
+        }
  
-        // send next cycle
-        RubySystem *rs = ruby_port->m_ruby_system;
-        ruby_port->memMasterPort.schedTimingReq(pkt,
-            curTick() + rs->clockPeriod());
-        return true;
+        assert(getOffset(pkt->getAddr()) + pkt->getSize() <=
+               RubySystem::getBlockSizeBytes());
      }
  
-    assert(getOffset(pkt->getAddr()) + pkt->getSize() <=
-           RubySystem::getBlockSizeBytes());
-
      // Submit the ruby request
      RequestStatus requestStatus = ruby_port->makeRequest(pkt);
  
@@ -272,9 +274,11 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
          return true;
      }
  
-
-    DPRINTF(RubyPort, "Request for address %#x did not issued because %s\n",
-            pkt->getAddr(), RequestStatus_to_string(requestStatus));
+    if (pkt->cmd != MemCmd::MemFenceReq) {
+        DPRINTF(RubyPort,
+                "Request for address %#x did not issued because %s\n",
+                pkt->getAddr(), RequestStatus_to_string(requestStatus));
+    }
  
      addToRetryList();
  
@@ -466,9 +470,14 @@ RubyPort::MemSlavePort::hitCallback(PacketPtr pkt)
          }
      }
  
-    // Flush requests don't access physical memory
-    if (pkt->isFlush()) {
+    // Flush, acquire, release requests don't access physical memory
+    if (pkt->isFlush() || pkt->cmd == MemCmd::MemFenceReq) {
+        accessPhysMem = false;
+    }
+
+    if (pkt->req->isKernel()) {
          accessPhysMem = false;
+        needsResponse = true;
      }
  
      DPRINTF(RubyPort, "Hit callback needs response %d\n", needsResponse);
author	Blake Hechtman <blake.hechtman@amd.com>
	Mon, 20 Jul 2015 14:15:18 +0000 (09:15 -0500)
committer	Blake Hechtman <blake.hechtman@amd.com>
	Mon, 20 Jul 2015 14:15:18 +0000 (09:15 -0500)
src/mem/protocol/RubySlicc_Exports.sm		patch \| blob \| history
src/mem/protocol/RubySlicc_Types.sm		patch \| blob \| history
src/mem/request.hh		patch \| blob \| history
src/mem/ruby/common/DataBlock.hh		patch \| blob \| history
src/mem/ruby/slicc_interface/RubyRequest.hh		patch \| blob \| history
src/mem/ruby/system/RubyPort.cc		patch \| blob \| history