mem-ruby: update memory interfaces to support GPU ISA

author Tuan Ta <tuan.ta@amd.com>

Tue, 1 May 2018 15:43:16 +0000 (11:43 -0400)

committer Anthony Gutierrez <anthony.gutierrez@amd.com>

Tue, 9 Jun 2020 20:00:13 +0000 (20:00 +0000)
author Tuan Ta <tuan.ta@amd.com>
Tue, 1 May 2018 15:43:16 +0000 (11:43 -0400)
committer Anthony Gutierrez <anthony.gutierrez@amd.com>
Tue, 9 Jun 2020 20:00:13 +0000 (20:00 +0000)
diff --git a/src/mem/packet.cc b/src/mem/packet.cc

index 2d69ba297873f7107ebb0ecf6fec1b74c6c14031..1c1da212d5bbced31f97365490d9f9993c4ecb4e 100644 (file)
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -181,6 +181,10 @@ MemCmd::commandInfo[] =
      { 0, InvalidCmd, "Deprecated_MessageResp" },
      /* MemFenceReq -- for synchronization requests */
      {SET2(IsRequest, NeedsResponse), MemFenceResp, "MemFenceReq"},
+    /* MemSyncReq */
+    {SET2(IsRequest, NeedsResponse), MemSyncResp, "MemSyncReq"},
+    /* MemSyncResp */
+    {SET1(IsResponse), InvalidCmd, "MemSyncResp"},
      /* MemFenceResp -- for synchronization responses */
      {SET1(IsResponse), InvalidCmd, "MemFenceResp"},
      /* Cache Clean Request -- Update with the latest data all existing
diff --git a/src/mem/packet.hh b/src/mem/packet.hh

index d390c0092c867a8c677b9bc4850c33a15c532250..42d286a5e550b0ea780fbb87d6a4443609a5bb13 100644 (file)
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -110,6 +110,8 @@ class MemCmd
          SwapResp,
          // MessageReq and MessageResp are deprecated.
          MemFenceReq = SwapResp + 3,
+        MemSyncReq,  // memory synchronization request (e.g., cache invalidate)
+        MemSyncResp, // memory synchronization response
          MemFenceResp,
          CleanSharedReq,
          CleanSharedResp,
diff --git a/src/mem/request.hh b/src/mem/request.hh

index 01252bf66850a78ee3fbf3b53ddd0be62f925bec..4e0ba974cb7d9aba9387157966c8282e063b8e61 100644 (file)
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -110,7 +110,7 @@ class Request
           * STRICT_ORDER flag should be set if such reordering is
           * undesirable.
           */
-        UNCACHEABLE                = 0x00000400,
+        UNCACHEABLE                 = 0x00000400,
          /**
           * The request is required to be strictly ordered by <i>CPU
           * models</i> and is non-speculative.
@@ -216,35 +216,30 @@ class Request
      };
      /** @} */
  
-    typedef uint32_t MemSpaceConfigFlagsType;
-    typedef ::Flags<MemSpaceConfigFlagsType> MemSpaceConfigFlags;
-
-    enum : MemSpaceConfigFlagsType {
-        /** Has a synchronization scope been set? */
-        SCOPE_VALID            = 0x00000001,
-        /** Access has Wavefront scope visibility */
-        WAVEFRONT_SCOPE        = 0x00000002,
-        /** Access has Workgroup scope visibility */
-        WORKGROUP_SCOPE        = 0x00000004,
-        /** Access has Device (e.g., GPU) scope visibility */
-        DEVICE_SCOPE           = 0x00000008,
-        /** Access has System (e.g., CPU + GPU) scope visibility */
-        SYSTEM_SCOPE           = 0x00000010,
-
-        /** Global Segment */
-        GLOBAL_SEGMENT         = 0x00000020,
-        /** Group Segment */
-        GROUP_SEGMENT          = 0x00000040,
-        /** Private Segment */
-        PRIVATE_SEGMENT        = 0x00000080,
-        /** Kergarg Segment */
-        KERNARG_SEGMENT        = 0x00000100,
-        /** Readonly Segment */
-        READONLY_SEGMENT       = 0x00000200,
-        /** Spill Segment */
-        SPILL_SEGMENT          = 0x00000400,
-        /** Arg Segment */
-        ARG_SEGMENT            = 0x00000800,
+    typedef uint64_t CacheCoherenceFlagsType;
+    typedef ::Flags<CacheCoherenceFlagsType> CacheCoherenceFlags;
+
+    /**
+     * These bits are used to set the coherence policy
+     * for the GPU and are encoded in the GCN3 instructions.
+     * See the AMD GCN3 ISA Architecture Manual for more
+     * details.
+     *
+     * SLC: System Level Coherent. Accesses are forced to miss in
+     *      the L2 cache and are coherent with system memory.
+     *
+     * GLC: Globally Coherent. Controls how reads and writes are
+     *      handled by the L1 cache. Global here referes to the
+     *      data being visible globally on the GPU (i.e., visible
+     *      to all WGs).
+     *
+     * For atomics, the GLC bit is used to distinguish between
+     * between atomic return/no-return operations.
+     */
+    enum : CacheCoherenceFlagsType {
+        /** user-policy flags */
+        SLC_BIT                 = 0x00000080,
+        GLC_BIT                 = 0x00000100,
      };
  
      using LocalAccessor =
@@ -305,8 +300,8 @@ class Request
      /** Flag structure for the request. */
      Flags _flags;
  
-    /** Memory space configuraiton flag structure for the request. */
-    MemSpaceConfigFlags _memSpaceConfigFlags;
+    /** Flags that control how downstream cache system maintains coherence*/
+    CacheCoherenceFlags _cacheCoherenceFlags;
  
      /** Private flags for field validity checking. */
      PrivateFlags privateFlags;
@@ -394,7 +389,7 @@ class Request
            _byteEnable(other._byteEnable),
            _masterId(other._masterId),
            _flags(other._flags),
-          _memSpaceConfigFlags(other._memSpaceConfigFlags),
+          _cacheCoherenceFlags(other._cacheCoherenceFlags),
            privateFlags(other.privateFlags),
            _time(other._time),
            _taskId(other._taskId), _vaddr(other._vaddr),
@@ -629,10 +624,11 @@ class Request
      }
  
      void
-    setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags)
+    setCacheCoherenceFlags(CacheCoherenceFlags extraFlags)
      {
+        // TODO: do mem_sync_op requests have valid paddr/vaddr?
          assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR));
-        _memSpaceConfigFlags.set(extraFlags);
+        _cacheCoherenceFlags.set(extraFlags);
      }
  
      /** Accessor function for vaddr.*/
@@ -840,82 +836,10 @@ class Request
       * Accessor functions for the memory space configuration flags and used by
       * GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that
       * these are for testing only; setting extraFlags should be done via
-     * setMemSpaceConfigFlags().
+     * setCacheCoherenceFlags().
       */
-    bool isScoped() const { return _memSpaceConfigFlags.isSet(SCOPE_VALID); }
-
-    bool
-    isWavefrontScope() const
-    {
-        assert(isScoped());
-        return _memSpaceConfigFlags.isSet(WAVEFRONT_SCOPE);
-    }
-
-    bool
-    isWorkgroupScope() const
-    {
-        assert(isScoped());
-        return _memSpaceConfigFlags.isSet(WORKGROUP_SCOPE);
-    }
-
-    bool
-    isDeviceScope() const
-    {
-        assert(isScoped());
-        return _memSpaceConfigFlags.isSet(DEVICE_SCOPE);
-    }
-
-    bool
-    isSystemScope() const
-    {
-        assert(isScoped());
-        return _memSpaceConfigFlags.isSet(SYSTEM_SCOPE);
-    }
-
-    bool
-    isGlobalSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(GLOBAL_SEGMENT) ||
-               (!isGroupSegment() && !isPrivateSegment() &&
-                !isKernargSegment() && !isReadonlySegment() &&
-                !isSpillSegment() && !isArgSegment());
-    }
-
-    bool
-    isGroupSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(GROUP_SEGMENT);
-    }
-
-    bool
-    isPrivateSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(PRIVATE_SEGMENT);
-    }
-
-    bool
-    isKernargSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(KERNARG_SEGMENT);
-    }
-
-    bool
-    isReadonlySegment() const
-    {
-        return _memSpaceConfigFlags.isSet(READONLY_SEGMENT);
-    }
-
-    bool
-    isSpillSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(SPILL_SEGMENT);
-    }
-
-    bool
-    isArgSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(ARG_SEGMENT);
-    }
+    bool isSLC() const { return _cacheCoherenceFlags.isSet(SLC_BIT); }
+    bool isGLC() const { return _cacheCoherenceFlags.isSet(GLC_BIT); }
  
      /**
       * Accessor functions to determine whether this request is part of
diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh

index f6b25bf9a0689f7150dbee63f249862a887dcbe2..68b11f55dce3bd4b1c406c0a6fd28d3a78c0dbe2 100644 (file)
--- a/src/mem/ruby/slicc_interface/RubyRequest.hh
+++ b/src/mem/ruby/slicc_interface/RubyRequest.hh
@@ -58,16 +58,11 @@ class RubyRequest : public Message
      WriteMask m_writeMask;
      DataBlock m_WTData;
      int m_wfid;
-    HSAScope m_scope;
-    HSASegment m_segment;
-
  
      RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
          uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
          PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No,
-        ContextID _proc_id = 100, ContextID _core_id = 99,
-        HSAScope _scope = HSAScope_UNSPECIFIED,
-        HSASegment _segment = HSASegment_GLOBAL)
+        ContextID _proc_id = 100, ContextID _core_id = 99)
          : Message(curTime),
            m_PhysicalAddress(_paddr),
            m_Type(_type),
@@ -77,9 +72,7 @@ class RubyRequest : public Message
            m_Prefetch(_pb),
            data(_data),
            m_pkt(_pkt),
-          m_contextId(_core_id),
-          m_scope(_scope),
-          m_segment(_segment)
+          m_contextId(_core_id)
      {
          m_LineAddress = makeLineAddress(m_PhysicalAddress);
      }
@@ -89,9 +82,7 @@ class RubyRequest : public Message
          RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
          unsigned _proc_id, unsigned _core_id,
          int _wm_size, std::vector<bool> & _wm_mask,
-        DataBlock & _Data,
-        HSAScope _scope = HSAScope_UNSPECIFIED,
-        HSASegment _segment = HSASegment_GLOBAL)
+        DataBlock & _Data)
          : Message(curTime),
            m_PhysicalAddress(_paddr),
            m_Type(_type),
@@ -104,9 +95,7 @@ class RubyRequest : public Message
            m_contextId(_core_id),
            m_writeMask(_wm_size,_wm_mask),
            m_WTData(_Data),
-          m_wfid(_proc_id),
-          m_scope(_scope),
-          m_segment(_segment)
+          m_wfid(_proc_id)
      {
          m_LineAddress = makeLineAddress(m_PhysicalAddress);
      }
@@ -117,9 +106,7 @@ class RubyRequest : public Message
          unsigned _proc_id, unsigned _core_id,
          int _wm_size, std::vector<bool> & _wm_mask,
          DataBlock & _Data,
-        std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps,
-        HSAScope _scope = HSAScope_UNSPECIFIED,
-        HSASegment _segment = HSASegment_GLOBAL)
+        std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps)
          : Message(curTime),
            m_PhysicalAddress(_paddr),
            m_Type(_type),
@@ -132,14 +119,11 @@ class RubyRequest : public Message
            m_contextId(_core_id),
            m_writeMask(_wm_size,_wm_mask,_atomicOps),
            m_WTData(_Data),
-          m_wfid(_proc_id),
-          m_scope(_scope),
-          m_segment(_segment)
+          m_wfid(_proc_id)
      {
          m_LineAddress = makeLineAddress(m_PhysicalAddress);
      }
  
-
      RubyRequest(Tick curTime) : Message(curTime) {}
      MsgPtr clone() const
      { return std::shared_ptr<Message>(new RubyRequest(*this)); }
author	Tuan Ta <tuan.ta@amd.com>
	Tue, 1 May 2018 15:43:16 +0000 (11:43 -0400)
committer	Anthony Gutierrez <anthony.gutierrez@amd.com>
	Tue, 9 Jun 2020 20:00:13 +0000 (20:00 +0000)
src/mem/packet.cc		patch \| blob \| history
src/mem/packet.hh		patch \| blob \| history
src/mem/request.hh		patch \| blob \| history
src/mem/ruby/slicc_interface/RubyRequest.hh		patch \| blob \| history