From: Tuan Ta <tuan.ta@amd.com>
Date: Tue, 1 May 2018 15:43:16 +0000 (-0400)
Subject: mem-ruby: update memory interfaces to support GPU ISA
X-Git-Tag: v20.1.0.0~603
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=adc9de4d612ee4aad983a1845b4ae020d0cf8359;p=gem5.git

mem-ruby: update memory interfaces to support GPU ISA

This patch deprecates HSA-based memory request types and adds new
types that can be used by real ISA instructions.

Change-Id: Ie107a69d8a35e9de0853f1407392ad01a8b3e930
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28408
Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com>
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---

diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 2d69ba297..1c1da212d 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -181,6 +181,10 @@ MemCmd::commandInfo[] =
     { 0, InvalidCmd, "Deprecated_MessageResp" },
     /* MemFenceReq -- for synchronization requests */
     {SET2(IsRequest, NeedsResponse), MemFenceResp, "MemFenceReq"},
+    /* MemSyncReq */
+    {SET2(IsRequest, NeedsResponse), MemSyncResp, "MemSyncReq"},
+    /* MemSyncResp */
+    {SET1(IsResponse), InvalidCmd, "MemSyncResp"},
     /* MemFenceResp -- for synchronization responses */
     {SET1(IsResponse), InvalidCmd, "MemFenceResp"},
     /* Cache Clean Request -- Update with the latest data all existing
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index d390c0092..42d286a5e 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -110,6 +110,8 @@ class MemCmd
         SwapResp,
         // MessageReq and MessageResp are deprecated.
         MemFenceReq = SwapResp + 3,
+        MemSyncReq,  // memory synchronization request (e.g., cache invalidate)
+        MemSyncResp, // memory synchronization response
         MemFenceResp,
         CleanSharedReq,
         CleanSharedResp,
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 01252bf66..4e0ba974c 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -110,7 +110,7 @@ class Request
          * STRICT_ORDER flag should be set if such reordering is
          * undesirable.
          */
-        UNCACHEABLE                = 0x00000400,
+        UNCACHEABLE                 = 0x00000400,
         /**
          * The request is required to be strictly ordered by <i>CPU
          * models</i> and is non-speculative.
@@ -216,35 +216,30 @@ class Request
     };
     /** @} */
 
-    typedef uint32_t MemSpaceConfigFlagsType;
-    typedef ::Flags<MemSpaceConfigFlagsType> MemSpaceConfigFlags;
-
-    enum : MemSpaceConfigFlagsType {
-        /** Has a synchronization scope been set? */
-        SCOPE_VALID            = 0x00000001,
-        /** Access has Wavefront scope visibility */
-        WAVEFRONT_SCOPE        = 0x00000002,
-        /** Access has Workgroup scope visibility */
-        WORKGROUP_SCOPE        = 0x00000004,
-        /** Access has Device (e.g., GPU) scope visibility */
-        DEVICE_SCOPE           = 0x00000008,
-        /** Access has System (e.g., CPU + GPU) scope visibility */
-        SYSTEM_SCOPE           = 0x00000010,
-
-        /** Global Segment */
-        GLOBAL_SEGMENT         = 0x00000020,
-        /** Group Segment */
-        GROUP_SEGMENT          = 0x00000040,
-        /** Private Segment */
-        PRIVATE_SEGMENT        = 0x00000080,
-        /** Kergarg Segment */
-        KERNARG_SEGMENT        = 0x00000100,
-        /** Readonly Segment */
-        READONLY_SEGMENT       = 0x00000200,
-        /** Spill Segment */
-        SPILL_SEGMENT          = 0x00000400,
-        /** Arg Segment */
-        ARG_SEGMENT            = 0x00000800,
+    typedef uint64_t CacheCoherenceFlagsType;
+    typedef ::Flags<CacheCoherenceFlagsType> CacheCoherenceFlags;
+
+    /**
+     * These bits are used to set the coherence policy
+     * for the GPU and are encoded in the GCN3 instructions.
+     * See the AMD GCN3 ISA Architecture Manual for more
+     * details.
+     *
+     * SLC: System Level Coherent. Accesses are forced to miss in
+     *      the L2 cache and are coherent with system memory.
+     *
+     * GLC: Globally Coherent. Controls how reads and writes are
+     *      handled by the L1 cache. Global here referes to the
+     *      data being visible globally on the GPU (i.e., visible
+     *      to all WGs).
+     *
+     * For atomics, the GLC bit is used to distinguish between
+     * between atomic return/no-return operations.
+     */
+    enum : CacheCoherenceFlagsType {
+        /** user-policy flags */
+        SLC_BIT                 = 0x00000080,
+        GLC_BIT                 = 0x00000100,
     };
 
     using LocalAccessor =
@@ -305,8 +300,8 @@ class Request
     /** Flag structure for the request. */
     Flags _flags;
 
-    /** Memory space configuraiton flag structure for the request. */
-    MemSpaceConfigFlags _memSpaceConfigFlags;
+    /** Flags that control how downstream cache system maintains coherence*/
+    CacheCoherenceFlags _cacheCoherenceFlags;
 
     /** Private flags for field validity checking. */
     PrivateFlags privateFlags;
@@ -394,7 +389,7 @@ class Request
           _byteEnable(other._byteEnable),
           _masterId(other._masterId),
           _flags(other._flags),
-          _memSpaceConfigFlags(other._memSpaceConfigFlags),
+          _cacheCoherenceFlags(other._cacheCoherenceFlags),
           privateFlags(other.privateFlags),
           _time(other._time),
           _taskId(other._taskId), _vaddr(other._vaddr),
@@ -629,10 +624,11 @@ class Request
     }
 
     void
-    setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags)
+    setCacheCoherenceFlags(CacheCoherenceFlags extraFlags)
     {
+        // TODO: do mem_sync_op requests have valid paddr/vaddr?
         assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR));
-        _memSpaceConfigFlags.set(extraFlags);
+        _cacheCoherenceFlags.set(extraFlags);
     }
 
     /** Accessor function for vaddr.*/
@@ -840,82 +836,10 @@ class Request
      * Accessor functions for the memory space configuration flags and used by
      * GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that
      * these are for testing only; setting extraFlags should be done via
-     * setMemSpaceConfigFlags().
+     * setCacheCoherenceFlags().
      */
-    bool isScoped() const { return _memSpaceConfigFlags.isSet(SCOPE_VALID); }
-
-    bool
-    isWavefrontScope() const
-    {
-        assert(isScoped());
-        return _memSpaceConfigFlags.isSet(WAVEFRONT_SCOPE);
-    }
-
-    bool
-    isWorkgroupScope() const
-    {
-        assert(isScoped());
-        return _memSpaceConfigFlags.isSet(WORKGROUP_SCOPE);
-    }
-
-    bool
-    isDeviceScope() const
-    {
-        assert(isScoped());
-        return _memSpaceConfigFlags.isSet(DEVICE_SCOPE);
-    }
-
-    bool
-    isSystemScope() const
-    {
-        assert(isScoped());
-        return _memSpaceConfigFlags.isSet(SYSTEM_SCOPE);
-    }
-
-    bool
-    isGlobalSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(GLOBAL_SEGMENT) ||
-               (!isGroupSegment() && !isPrivateSegment() &&
-                !isKernargSegment() && !isReadonlySegment() &&
-                !isSpillSegment() && !isArgSegment());
-    }
-
-    bool
-    isGroupSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(GROUP_SEGMENT);
-    }
-
-    bool
-    isPrivateSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(PRIVATE_SEGMENT);
-    }
-
-    bool
-    isKernargSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(KERNARG_SEGMENT);
-    }
-
-    bool
-    isReadonlySegment() const
-    {
-        return _memSpaceConfigFlags.isSet(READONLY_SEGMENT);
-    }
-
-    bool
-    isSpillSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(SPILL_SEGMENT);
-    }
-
-    bool
-    isArgSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(ARG_SEGMENT);
-    }
+    bool isSLC() const { return _cacheCoherenceFlags.isSet(SLC_BIT); }
+    bool isGLC() const { return _cacheCoherenceFlags.isSet(GLC_BIT); }
 
     /**
      * Accessor functions to determine whether this request is part of
diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh
index f6b25bf9a..68b11f55d 100644
--- a/src/mem/ruby/slicc_interface/RubyRequest.hh
+++ b/src/mem/ruby/slicc_interface/RubyRequest.hh
@@ -58,16 +58,11 @@ class RubyRequest : public Message
     WriteMask m_writeMask;
     DataBlock m_WTData;
     int m_wfid;
-    HSAScope m_scope;
-    HSASegment m_segment;
-
 
     RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
         uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
         PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No,
-        ContextID _proc_id = 100, ContextID _core_id = 99,
-        HSAScope _scope = HSAScope_UNSPECIFIED,
-        HSASegment _segment = HSASegment_GLOBAL)
+        ContextID _proc_id = 100, ContextID _core_id = 99)
         : Message(curTime),
           m_PhysicalAddress(_paddr),
           m_Type(_type),
@@ -77,9 +72,7 @@ class RubyRequest : public Message
           m_Prefetch(_pb),
           data(_data),
           m_pkt(_pkt),
-          m_contextId(_core_id),
-          m_scope(_scope),
-          m_segment(_segment)
+          m_contextId(_core_id)
     {
         m_LineAddress = makeLineAddress(m_PhysicalAddress);
     }
@@ -89,9 +82,7 @@ class RubyRequest : public Message
         RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
         unsigned _proc_id, unsigned _core_id,
         int _wm_size, std::vector<bool> & _wm_mask,
-        DataBlock & _Data,
-        HSAScope _scope = HSAScope_UNSPECIFIED,
-        HSASegment _segment = HSASegment_GLOBAL)
+        DataBlock & _Data)
         : Message(curTime),
           m_PhysicalAddress(_paddr),
           m_Type(_type),
@@ -104,9 +95,7 @@ class RubyRequest : public Message
           m_contextId(_core_id),
           m_writeMask(_wm_size,_wm_mask),
           m_WTData(_Data),
-          m_wfid(_proc_id),
-          m_scope(_scope),
-          m_segment(_segment)
+          m_wfid(_proc_id)
     {
         m_LineAddress = makeLineAddress(m_PhysicalAddress);
     }
@@ -117,9 +106,7 @@ class RubyRequest : public Message
         unsigned _proc_id, unsigned _core_id,
         int _wm_size, std::vector<bool> & _wm_mask,
         DataBlock & _Data,
-        std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps,
-        HSAScope _scope = HSAScope_UNSPECIFIED,
-        HSASegment _segment = HSASegment_GLOBAL)
+        std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps)
         : Message(curTime),
           m_PhysicalAddress(_paddr),
           m_Type(_type),
@@ -132,14 +119,11 @@ class RubyRequest : public Message
           m_contextId(_core_id),
           m_writeMask(_wm_size,_wm_mask,_atomicOps),
           m_WTData(_Data),
-          m_wfid(_proc_id),
-          m_scope(_scope),
-          m_segment(_segment)
+          m_wfid(_proc_id)
     {
         m_LineAddress = makeLineAddress(m_PhysicalAddress);
     }
 
-
     RubyRequest(Tick curTime) : Message(curTime) {}
     MsgPtr clone() const
     { return std::shared_ptr<Message>(new RubyRequest(*this)); }