From: Timothy Hayes Date: Mon, 27 Apr 2020 14:03:26 +0000 (+0100) Subject: mem-ruby: LL/SC fixes X-Git-Tag: v20.0.0.0~61 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=dd6cd33344044bee2f6a6341b233b0bdb619af94;p=gem5.git mem-ruby: LL/SC fixes The implementation for load-linked/store-conditional did not work correctly for multi-core simulations. Since load-links were treated as stores, it was not possible for a line to have multiple readers which often resulted in livelock when using these instructions to implemented mutexes. This improved implementation treats load-linked instructions similarly to loads but locks the line after a copy has been fetched locally. Writes to a monitored address ensure the 'linked' property is blown away and any subsequent store-conditional will fail. Change-Id: I19bd74459e26732c92c8b594901936e6439fb073 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/27103 Reviewed-by: Daniel Carvalho Maintainer: Bobby R. Bruce Tested-by: kokoro --- diff --git a/src/mem/ruby/protocol/RubySlicc_Types.sm b/src/mem/ruby/protocol/RubySlicc_Types.sm index fd7628965..f8de9ed4c 100644 --- a/src/mem/ruby/protocol/RubySlicc_Types.sm +++ b/src/mem/ruby/protocol/RubySlicc_Types.sm @@ -1,4 +1,16 @@ /* + * Copyright (c) 2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 1999-2005 Mark D. Hill and David A. Wood * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. @@ -112,6 +124,10 @@ structure (Sequencer, external = "yes") { void writeCallback(Addr, DataBlock, bool, MachineType, Cycles, Cycles, Cycles); + // ll/sc support + void writeCallbackScFail(Addr, DataBlock); + bool llscCheckMonitor(Addr); + void checkCoherence(Addr); void evictionCallback(Addr); void recordRequestType(SequencerRequestType); diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 1f538c375..0287e1301 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited + * Copyright (c) 2019-2020 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -45,6 +45,7 @@ #include "base/logging.hh" #include "base/str.hh" #include "cpu/testers/rubytest/RubyTester.hh" +#include "debug/LLSC.hh" #include "debug/MemoryAccess.hh" #include "debug/ProtocolTrace.hh" #include "debug/RubySequencer.hh" @@ -89,6 +90,64 @@ Sequencer::~Sequencer() { } +void +Sequencer::llscLoadLinked(const Addr claddr) +{ + AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr); + if (line) { + line->setLocked(m_version); + DPRINTF(LLSC, "LLSC Monitor - inserting load linked - " + "addr=0x%lx - cpu=%u\n", claddr, m_version); + } +} + +void +Sequencer::llscClearMonitor(const Addr claddr) +{ + AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr); + if (line && line->isLocked(m_version)) { + line->clearLocked(); + DPRINTF(LLSC, "LLSC Monitor - clearing due to store - " + "addr=0x%lx - cpu=%u\n", claddr, m_version); + } +} + +bool +Sequencer::llscStoreConditional(const Addr claddr) +{ + AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr); + if (!line) + return false; + + DPRINTF(LLSC, "LLSC Monitor - clearing due to " + "store conditional - " + "addr=0x%lx - cpu=%u\n", + claddr, m_version); + + if (line->isLocked(m_version)) { + line->clearLocked(); + return true; + } else { + line->clearLocked(); + return false; + } +} + +bool +Sequencer::llscCheckMonitor(const Addr address) +{ + const Addr claddr = makeLineAddress(address); + AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr); + if (!line) + return false; + + if (line->isLocked(m_version)) { + return true; + } else { + return false; + } +} + void Sequencer::wakeup() { @@ -202,62 +261,6 @@ Sequencer::markRemoved() m_outstanding_count--; } -void -Sequencer::invalidateSC(Addr address) -{ - AbstractCacheEntry *e = m_dataCache_ptr->lookup(address); - // The controller has lost the coherence permissions, hence the lock - // on the cache line maintained by the cache should be cleared. - if (e && e->isLocked(m_version)) { - e->clearLocked(); - } -} - -bool -Sequencer::handleLlsc(Addr address, SequencerRequest* request) -{ - AbstractCacheEntry *e = m_dataCache_ptr->lookup(address); - if (!e) - return true; - - // The success flag indicates whether the LLSC operation was successful. - // LL ops will always succeed, but SC may fail if the cache line is no - // longer locked. - bool success = true; - if (request->m_type == RubyRequestType_Store_Conditional) { - if (!e->isLocked(m_version)) { - // - // For failed SC requests, indicate the failure to the cpu by - // setting the extra data to zero. - // - request->pkt->req->setExtraData(0); - success = false; - } else { - // - // For successful SC requests, indicate the success to the cpu by - // setting the extra data to one. - // - request->pkt->req->setExtraData(1); - } - // - // Independent of success, all SC operations must clear the lock - // - e->clearLocked(); - } else if (request->m_type == RubyRequestType_Load_Linked) { - // - // Note: To fully follow Alpha LLSC semantics, should the LL clear any - // previously locked cache lines? - // - e->setLocked(m_version); - } else if (e->isLocked(m_version)) { - // - // Normal writes should clear the locked address - // - e->clearLocked(); - } - return success; -} - void Sequencer::recordMissLatency(SequencerRequest* srequest, bool llscSuccess, const MachineType respondingMach, @@ -324,6 +327,13 @@ Sequencer::recordMissLatency(SequencerRequest* srequest, bool llscSuccess, } } +void +Sequencer::writeCallbackScFail(Addr address, DataBlock& data) +{ + llscClearMonitor(address); + writeCallback(address, data); +} + void Sequencer::writeCallback(Addr address, DataBlock& data, const bool externalHit, const MachineType mach, @@ -349,21 +359,27 @@ Sequencer::writeCallback(Addr address, DataBlock& data, SequencerRequest &seq_req = seq_req_list.front(); if (ruby_request) { assert(seq_req.m_type != RubyRequestType_LD); + assert(seq_req.m_type != RubyRequestType_Load_Linked); assert(seq_req.m_type != RubyRequestType_IFETCH); } // handle write request if ((seq_req.m_type != RubyRequestType_LD) && + (seq_req.m_type != RubyRequestType_Load_Linked) && (seq_req.m_type != RubyRequestType_IFETCH)) { - // - // For Alpha, properly handle LL, SC, and write requests with - // respect to locked cache blocks. - // - // Not valid for Garnet_standalone protocl - // - bool success = true; - if (!m_runningGarnetStandalone) - success = handleLlsc(address, &seq_req); + // LL/SC support (tested with ARMv8) + bool success = false; + + if (seq_req.m_type != RubyRequestType_Store_Conditional) { + // Regular stores to addresses being monitored + // will fail (remove) the monitor entry. + llscClearMonitor(address); + } else { + // Store conditionals must first check the monitor + // if they will succeed or not + success = llscStoreConditional(address); + seq_req.pkt->req->setExtraData(success ? 1 : 0); + } // Handle SLICC block_on behavior for Locked_RMW accesses. NOTE: the // address variable here is assumed to be a line address, so when @@ -435,11 +451,13 @@ Sequencer::readCallback(Addr address, DataBlock& data, SequencerRequest &seq_req = seq_req_list.front(); if (ruby_request) { assert((seq_req.m_type == RubyRequestType_LD) || + (seq_req.m_type == RubyRequestType_Load_Linked) || (seq_req.m_type == RubyRequestType_IFETCH)); } else { aliased_loads++; } if ((seq_req.m_type != RubyRequestType_LD) && + (seq_req.m_type != RubyRequestType_Load_Linked) && (seq_req.m_type != RubyRequestType_IFETCH)) { // Write request: reissue request to the cache hierarchy issueRequest(seq_req.pkt, seq_req.m_second_type); @@ -480,6 +498,12 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, Addr request_address(pkt->getAddr()); RubyRequestType type = srequest->m_type; + // Load-linked handling + if (type == RubyRequestType_Load_Linked) { + Addr line_addr = makeLineAddress(request_address); + llscLoadLinked(line_addr); + } + // update the data unless it is a non-data-carrying flush if (RubySystem::getWarmupEnabled()) { data.setData(pkt->getConstPtr(), @@ -553,23 +577,26 @@ Sequencer::makeRequest(PacketPtr pkt) RubyRequestType secondary_type = RubyRequestType_NULL; if (pkt->isLLSC()) { - // - // Alpha LL/SC instructions need to be handled carefully by the cache + // LL/SC instructions need to be handled carefully by the cache // coherence protocol to ensure they follow the proper semantics. In // particular, by identifying the operations as atomic, the protocol // should understand that migratory sharing optimizations should not // be performed (i.e. a load between the LL and SC should not steal // away exclusive permission). // + // The following logic works correctly with the semantics + // of armV8 LDEX/STEX instructions. + if (pkt->isWrite()) { DPRINTF(RubySequencer, "Issuing SC\n"); primary_type = RubyRequestType_Store_Conditional; + secondary_type = RubyRequestType_ST; } else { DPRINTF(RubySequencer, "Issuing LL\n"); assert(pkt->isRead()); primary_type = RubyRequestType_Load_Linked; + secondary_type = RubyRequestType_LD; } - secondary_type = RubyRequestType_ATOMIC; } else if (pkt->req->isLockedRMW()) { // // x86 locked instructions are translated to store cache coherence @@ -724,6 +751,7 @@ Sequencer::recordRequestType(SequencerRequestType requestType) { void Sequencer::evictionCallback(Addr address) { + llscClearMonitor(address); ruby_eviction_callback(address); } diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index bb2819b15..bb9360719 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -1,6 +1,6 @@ /* - * Copyright (c) 2019 ARM Limited - * All rights reserved. + * Copyright (c) 2019-2020 ARM Limited + * All rights reserved * * The license below extends only to copyright in the software and shall * not be construed as granting a license to any other intellectual @@ -84,6 +84,13 @@ class Sequencer : public RubyPort Sequencer(const Params *); ~Sequencer(); + /** + * Proxy function to writeCallback that first + * invalidates the line address in the local monitor. + */ + void writeCallbackScFail(Addr address, + DataBlock& data); + // Public Methods void wakeup(); // Used only for deadlock detection void resetStats() override; @@ -121,7 +128,6 @@ class Sequencer : public RubyPort void markRemoved(); void evictionCallback(Addr address); - void invalidateSC(Addr address); int coreId() const { return m_coreId; } virtual int functionalWrite(Packet *func_pkt) override; @@ -191,7 +197,6 @@ class Sequencer : public RubyPort RequestStatus insertRequest(PacketPtr pkt, RubyRequestType primary_type, RubyRequestType secondary_type); - bool handleLlsc(Addr address, SequencerRequest* request); // Private copy constructor and assignment operator Sequencer(const Sequencer& obj); @@ -257,6 +262,39 @@ class Sequencer : public RubyPort std::vector m_IncompleteTimes; EventFunctionWrapper deadlockCheckEvent; + + // support for LL/SC + + /** + * Places the cache line address into the global monitor + * tagged with this Sequencer object's version id. + */ + void llscLoadLinked(const Addr); + + /** + * Removes the cache line address from the global monitor. + * This is independent of this Sequencer object's version id. + */ + void llscClearMonitor(const Addr); + + /** + * Searches for cache line address in the global monitor + * tagged with this Sequencer object's version id. + * If a match is found, the entry is is erased from + * the global monitor. + * + * @return a boolean indicating if the line address was found. + */ + bool llscStoreConditional(const Addr); + + public: + /** + * Searches for cache line address in the global monitor + * tagged with this Sequencer object's version id. + * + * @return a boolean indicating if the line address was found. + */ + bool llscCheckMonitor(const Addr); }; inline std::ostream&