From adc9de4d612ee4aad983a1845b4ae020d0cf8359 Mon Sep 17 00:00:00 2001 From: Tuan Ta Date: Tue, 1 May 2018 11:43:16 -0400 Subject: [PATCH] mem-ruby: update memory interfaces to support GPU ISA This patch deprecates HSA-based memory request types and adds new types that can be used by real ISA instructions. Change-Id: Ie107a69d8a35e9de0853f1407392ad01a8b3e930 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28408 Reviewed-by: Anthony Gutierrez Maintainer: Anthony Gutierrez Tested-by: kokoro --- src/mem/packet.cc | 4 + src/mem/packet.hh | 2 + src/mem/request.hh | 144 +++++--------------- src/mem/ruby/slicc_interface/RubyRequest.hh | 28 +--- 4 files changed, 46 insertions(+), 132 deletions(-) diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 2d69ba297..1c1da212d 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -181,6 +181,10 @@ MemCmd::commandInfo[] = { 0, InvalidCmd, "Deprecated_MessageResp" }, /* MemFenceReq -- for synchronization requests */ {SET2(IsRequest, NeedsResponse), MemFenceResp, "MemFenceReq"}, + /* MemSyncReq */ + {SET2(IsRequest, NeedsResponse), MemSyncResp, "MemSyncReq"}, + /* MemSyncResp */ + {SET1(IsResponse), InvalidCmd, "MemSyncResp"}, /* MemFenceResp -- for synchronization responses */ {SET1(IsResponse), InvalidCmd, "MemFenceResp"}, /* Cache Clean Request -- Update with the latest data all existing diff --git a/src/mem/packet.hh b/src/mem/packet.hh index d390c0092..42d286a5e 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -110,6 +110,8 @@ class MemCmd SwapResp, // MessageReq and MessageResp are deprecated. MemFenceReq = SwapResp + 3, + MemSyncReq, // memory synchronization request (e.g., cache invalidate) + MemSyncResp, // memory synchronization response MemFenceResp, CleanSharedReq, CleanSharedResp, diff --git a/src/mem/request.hh b/src/mem/request.hh index 01252bf66..4e0ba974c 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -110,7 +110,7 @@ class Request * STRICT_ORDER flag should be set if such reordering is * undesirable. */ - UNCACHEABLE = 0x00000400, + UNCACHEABLE = 0x00000400, /** * The request is required to be strictly ordered by CPU * models and is non-speculative. @@ -216,35 +216,30 @@ class Request }; /** @} */ - typedef uint32_t MemSpaceConfigFlagsType; - typedef ::Flags MemSpaceConfigFlags; - - enum : MemSpaceConfigFlagsType { - /** Has a synchronization scope been set? */ - SCOPE_VALID = 0x00000001, - /** Access has Wavefront scope visibility */ - WAVEFRONT_SCOPE = 0x00000002, - /** Access has Workgroup scope visibility */ - WORKGROUP_SCOPE = 0x00000004, - /** Access has Device (e.g., GPU) scope visibility */ - DEVICE_SCOPE = 0x00000008, - /** Access has System (e.g., CPU + GPU) scope visibility */ - SYSTEM_SCOPE = 0x00000010, - - /** Global Segment */ - GLOBAL_SEGMENT = 0x00000020, - /** Group Segment */ - GROUP_SEGMENT = 0x00000040, - /** Private Segment */ - PRIVATE_SEGMENT = 0x00000080, - /** Kergarg Segment */ - KERNARG_SEGMENT = 0x00000100, - /** Readonly Segment */ - READONLY_SEGMENT = 0x00000200, - /** Spill Segment */ - SPILL_SEGMENT = 0x00000400, - /** Arg Segment */ - ARG_SEGMENT = 0x00000800, + typedef uint64_t CacheCoherenceFlagsType; + typedef ::Flags CacheCoherenceFlags; + + /** + * These bits are used to set the coherence policy + * for the GPU and are encoded in the GCN3 instructions. + * See the AMD GCN3 ISA Architecture Manual for more + * details. + * + * SLC: System Level Coherent. Accesses are forced to miss in + * the L2 cache and are coherent with system memory. + * + * GLC: Globally Coherent. Controls how reads and writes are + * handled by the L1 cache. Global here referes to the + * data being visible globally on the GPU (i.e., visible + * to all WGs). + * + * For atomics, the GLC bit is used to distinguish between + * between atomic return/no-return operations. + */ + enum : CacheCoherenceFlagsType { + /** user-policy flags */ + SLC_BIT = 0x00000080, + GLC_BIT = 0x00000100, }; using LocalAccessor = @@ -305,8 +300,8 @@ class Request /** Flag structure for the request. */ Flags _flags; - /** Memory space configuraiton flag structure for the request. */ - MemSpaceConfigFlags _memSpaceConfigFlags; + /** Flags that control how downstream cache system maintains coherence*/ + CacheCoherenceFlags _cacheCoherenceFlags; /** Private flags for field validity checking. */ PrivateFlags privateFlags; @@ -394,7 +389,7 @@ class Request _byteEnable(other._byteEnable), _masterId(other._masterId), _flags(other._flags), - _memSpaceConfigFlags(other._memSpaceConfigFlags), + _cacheCoherenceFlags(other._cacheCoherenceFlags), privateFlags(other.privateFlags), _time(other._time), _taskId(other._taskId), _vaddr(other._vaddr), @@ -629,10 +624,11 @@ class Request } void - setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags) + setCacheCoherenceFlags(CacheCoherenceFlags extraFlags) { + // TODO: do mem_sync_op requests have valid paddr/vaddr? assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR)); - _memSpaceConfigFlags.set(extraFlags); + _cacheCoherenceFlags.set(extraFlags); } /** Accessor function for vaddr.*/ @@ -840,82 +836,10 @@ class Request * Accessor functions for the memory space configuration flags and used by * GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that * these are for testing only; setting extraFlags should be done via - * setMemSpaceConfigFlags(). + * setCacheCoherenceFlags(). */ - bool isScoped() const { return _memSpaceConfigFlags.isSet(SCOPE_VALID); } - - bool - isWavefrontScope() const - { - assert(isScoped()); - return _memSpaceConfigFlags.isSet(WAVEFRONT_SCOPE); - } - - bool - isWorkgroupScope() const - { - assert(isScoped()); - return _memSpaceConfigFlags.isSet(WORKGROUP_SCOPE); - } - - bool - isDeviceScope() const - { - assert(isScoped()); - return _memSpaceConfigFlags.isSet(DEVICE_SCOPE); - } - - bool - isSystemScope() const - { - assert(isScoped()); - return _memSpaceConfigFlags.isSet(SYSTEM_SCOPE); - } - - bool - isGlobalSegment() const - { - return _memSpaceConfigFlags.isSet(GLOBAL_SEGMENT) || - (!isGroupSegment() && !isPrivateSegment() && - !isKernargSegment() && !isReadonlySegment() && - !isSpillSegment() && !isArgSegment()); - } - - bool - isGroupSegment() const - { - return _memSpaceConfigFlags.isSet(GROUP_SEGMENT); - } - - bool - isPrivateSegment() const - { - return _memSpaceConfigFlags.isSet(PRIVATE_SEGMENT); - } - - bool - isKernargSegment() const - { - return _memSpaceConfigFlags.isSet(KERNARG_SEGMENT); - } - - bool - isReadonlySegment() const - { - return _memSpaceConfigFlags.isSet(READONLY_SEGMENT); - } - - bool - isSpillSegment() const - { - return _memSpaceConfigFlags.isSet(SPILL_SEGMENT); - } - - bool - isArgSegment() const - { - return _memSpaceConfigFlags.isSet(ARG_SEGMENT); - } + bool isSLC() const { return _cacheCoherenceFlags.isSet(SLC_BIT); } + bool isGLC() const { return _cacheCoherenceFlags.isSet(GLC_BIT); } /** * Accessor functions to determine whether this request is part of diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh index f6b25bf9a..68b11f55d 100644 --- a/src/mem/ruby/slicc_interface/RubyRequest.hh +++ b/src/mem/ruby/slicc_interface/RubyRequest.hh @@ -58,16 +58,11 @@ class RubyRequest : public Message WriteMask m_writeMask; DataBlock m_WTData; int m_wfid; - HSAScope m_scope; - HSASegment m_segment; - RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No, - ContextID _proc_id = 100, ContextID _core_id = 99, - HSAScope _scope = HSAScope_UNSPECIFIED, - HSASegment _segment = HSASegment_GLOBAL) + ContextID _proc_id = 100, ContextID _core_id = 99) : Message(curTime), m_PhysicalAddress(_paddr), m_Type(_type), @@ -77,9 +72,7 @@ class RubyRequest : public Message m_Prefetch(_pb), data(_data), m_pkt(_pkt), - m_contextId(_core_id), - m_scope(_scope), - m_segment(_segment) + m_contextId(_core_id) { m_LineAddress = makeLineAddress(m_PhysicalAddress); } @@ -89,9 +82,7 @@ class RubyRequest : public Message RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb, unsigned _proc_id, unsigned _core_id, int _wm_size, std::vector & _wm_mask, - DataBlock & _Data, - HSAScope _scope = HSAScope_UNSPECIFIED, - HSASegment _segment = HSASegment_GLOBAL) + DataBlock & _Data) : Message(curTime), m_PhysicalAddress(_paddr), m_Type(_type), @@ -104,9 +95,7 @@ class RubyRequest : public Message m_contextId(_core_id), m_writeMask(_wm_size,_wm_mask), m_WTData(_Data), - m_wfid(_proc_id), - m_scope(_scope), - m_segment(_segment) + m_wfid(_proc_id) { m_LineAddress = makeLineAddress(m_PhysicalAddress); } @@ -117,9 +106,7 @@ class RubyRequest : public Message unsigned _proc_id, unsigned _core_id, int _wm_size, std::vector & _wm_mask, DataBlock & _Data, - std::vector< std::pair > _atomicOps, - HSAScope _scope = HSAScope_UNSPECIFIED, - HSASegment _segment = HSASegment_GLOBAL) + std::vector< std::pair > _atomicOps) : Message(curTime), m_PhysicalAddress(_paddr), m_Type(_type), @@ -132,14 +119,11 @@ class RubyRequest : public Message m_contextId(_core_id), m_writeMask(_wm_size,_wm_mask,_atomicOps), m_WTData(_Data), - m_wfid(_proc_id), - m_scope(_scope), - m_segment(_segment) + m_wfid(_proc_id) { m_LineAddress = makeLineAddress(m_PhysicalAddress); } - RubyRequest(Tick curTime) : Message(curTime) {} MsgPtr clone() const { return std::shared_ptr(new RubyRequest(*this)); } -- 2.30.2