#include "cpu/op_class.hh"
#include "cpu/static_inst.hh"
#include "cpu/translation.hh"
+#include "debug/HtmCpu.hh"
#include "mem/packet.hh"
#include "mem/request.hh"
#include "sim/byteswap.hh"
IsStrictlyOrdered,
ReqMade,
MemOpDone,
+ HtmFromTransaction,
MaxFlags
};
// Need a copy of main request pointer to verify on writes.
RequestPtr reqToVerify;
+ private:
+ // hardware transactional memory
+ uint64_t htmUid;
+ uint64_t htmDepth;
+
protected:
/** Flattened register index of the destination registers of this
* instruction.
uint64_t getHtmTransactionUid() const override
{
- panic("Not yet implemented\n");
- return 0;
+ assert(instFlags[HtmFromTransaction]);
+ return this->htmUid;
}
uint64_t newHtmTransactionUid() const override
bool inHtmTransactionalState() const override
{
- panic("Not yet implemented\n");
- return false;
+ return instFlags[HtmFromTransaction];
}
uint64_t getHtmTransactionalDepth() const override
{
- panic("Not yet implemented\n");
- return 0;
+ if (inHtmTransactionalState())
+ return this->htmDepth;
+ else
+ return 0;
+ }
+
+ void setHtmTransactionalState(uint64_t htm_uid, uint64_t htm_depth)
+ {
+ instFlags.set(HtmFromTransaction);
+ htmUid = htm_uid;
+ htmDepth = htm_depth;
+ }
+
+ void clearHtmTransactionalState()
+ {
+ if (inHtmTransactionalState()) {
+ DPRINTF(HtmCpu,
+ "clearing instuction's transactional state htmUid=%u\n",
+ getHtmTransactionUid());
+
+ instFlags.reset(HtmFromTransaction);
+ htmUid = -1;
+ htmDepth = 0;
+ }
}
/** Temporarily sets this instruction as a serialize before instruction. */
Fault
BaseDynInst<Impl>::initiateHtmCmd(Request::Flags flags)
{
- panic("Not yet implemented\n");
- return NoFault;
+ return cpu->pushRequest(
+ dynamic_cast<typename DynInstPtr::PtrType>(this),
+ /* ld */ true, nullptr, 8, 0x0ul, flags, nullptr, nullptr);
}
template<class Impl>
physEffAddr = 0;
readyRegs = 0;
memReqFlags = 0;
+ // hardware transactional memory
+ htmUid = -1;
+ htmDepth = 0;
status.reset();
/*
- * Copyright (c) 2010-2012, 2014 ARM Limited
+ * Copyright (c) 2010-2012, 2014, 2019 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
/** Deschedules a thread from scheduling */
void deactivateThread(ThreadID tid);
+ /** Is the CPU currently processing a HTM transaction? */
+ bool executingHtmTransaction(ThreadID) const;
+
+ /* Reset HTM tracking, e.g. after an abort */
+ void resetHtmStartsStops(ThreadID);
+
/** Ticks the commit stage, which tries to commit instructions. */
void tick();
/** Updates commit stats based on this instruction. */
void updateComInstStats(const DynInstPtr &inst);
+
+ // HTM
+ int htmStarts[Impl::MaxThreads];
+ int htmStops[Impl::MaxThreads];
+
/** Stat for the total number of squashed instructions discarded by commit.
*/
Stats::Scalar commitSquashedInsts;
#include "debug/CommitRate.hh"
#include "debug/Drain.hh"
#include "debug/ExecFaulting.hh"
+#include "debug/HtmCpu.hh"
#include "debug/O3PipeView.hh"
#include "params/DerivO3CPU.hh"
#include "sim/faults.hh"
committedStores[tid] = false;
checkEmptyROB[tid] = false;
renameMap[tid] = nullptr;
+ htmStarts[tid] = 0;
+ htmStops[tid] = 0;
}
interrupt = NoFault;
}
{
assert(isDrained());
rob->drainSanityCheck();
+
+ // hardware transactional memory
+ // cannot drain partially through a transaction
+ for (ThreadID tid = 0; tid < numThreads; tid++) {
+ if (executingHtmTransaction(tid)) {
+ panic("cannot drain partially through a HTM transaction");
+ }
+ }
}
template <class Impl>
}
}
+template <class Impl>
+bool
+DefaultCommit<Impl>::executingHtmTransaction(ThreadID tid) const
+{
+ if (tid == InvalidThreadID)
+ return false;
+ else
+ return (htmStarts[tid] > htmStops[tid]);
+}
+
+template <class Impl>
+void
+DefaultCommit<Impl>::resetHtmStartsStops(ThreadID tid)
+{
+ if (tid != InvalidThreadID)
+ {
+ htmStarts[tid] = 0;
+ htmStops[tid] = 0;
+ }
+}
+
template <class Impl>
void
Cycles latency = dynamic_pointer_cast<SyscallRetryFault>(inst_fault) ?
cpu->syscallRetryLatency : trapLatency;
+ // hardware transactional memory
+ if (inst_fault != nullptr &&
+ std::dynamic_pointer_cast<GenericHtmFailureFault>(inst_fault)) {
+ // TODO
+ // latency = default abort/restore latency
+ // could also do some kind of exponential back off if desired
+ }
+
cpu->schedule(trap, cpu->clockEdge(latency));
trapInFlight[tid] = true;
thread[tid]->trapPending = true;
// Commit as many instructions as possible until the commit bandwidth
// limit is reached, or it becomes impossible to commit any more.
while (num_committed < commitWidth) {
+ // hardware transactionally memory
+ // If executing within a transaction,
+ // need to handle interrupts specially
+
+ ThreadID commit_thread = getCommittingThread();
+
// Check for any interrupt that we've already squashed for
// and start processing it.
- if (interrupt != NoFault)
- handleInterrupt();
+ if (interrupt != NoFault) {
+ // If inside a transaction, postpone interrupts
+ if (executingHtmTransaction(commit_thread)) {
+ cpu->clearInterrupts(0);
+ toIEW->commitInfo[0].clearInterrupt = true;
+ interrupt = NoFault;
+ avoidQuiesceLiveLock = true;
+ } else {
+ handleInterrupt();
+ }
+ }
- ThreadID commit_thread = getCommittingThread();
+ // ThreadID commit_thread = getCommittingThread();
if (commit_thread == -1 || !rob->isHeadReady(commit_thread))
break;
statCommittedInstType[tid][head_inst->opClass()]++;
ppCommit->notify(head_inst);
+ // hardware transactional memory
+
+ // update nesting depth
+ if (head_inst->isHtmStart())
+ htmStarts[tid]++;
+
+ // sanity check
+ if (head_inst->inHtmTransactionalState()) {
+ assert(executingHtmTransaction(tid));
+ } else {
+ assert(!executingHtmTransaction(tid));
+ }
+
+ // update nesting depth
+ if (head_inst->isHtmStop())
+ htmStops[tid]++;
+
changedROBNumEntries[tid] = true;
// Set the doneSeqNum to the youngest committed instruction.
// Check if the instruction caused a fault. If so, trap.
Fault inst_fault = head_inst->getFault();
+ // hardware transactional memory
+ // if a fault occurred within a HTM transaction
+ // ensure that the transaction aborts
+ if (inst_fault != NoFault && head_inst->inHtmTransactionalState()) {
+ // There exists a generic HTM fault common to all ISAs
+ if (!std::dynamic_pointer_cast<GenericHtmFailureFault>(inst_fault)) {
+ DPRINTF(HtmCpu, "%s - fault (%s) encountered within transaction"
+ " - converting to GenericHtmFailureFault\n",
+ head_inst->staticInst->getName(), inst_fault->name());
+ inst_fault = std::make_shared<GenericHtmFailureFault>(
+ head_inst->getHtmTransactionUid(),
+ HtmFailureFaultCause::EXCEPTION);
+ }
+ // If this point is reached and the fault inherits from the HTM fault,
+ // then there is no need to raise a new fault
+ }
+
// Stores mark themselves as completed.
if (!head_inst->isStore() && inst_fault == NoFault) {
head_inst->setCompleted();
head_inst->renamedDestRegIdx(i));
}
+ // hardware transactional memory
+ // the HTM UID is purely for correctness and debugging purposes
+ if (head_inst->isHtmStart())
+ iewStage->setLastRetiredHtmUid(tid, head_inst->getHtmTransactionUid());
+
// Finally clear the head ROB entry.
rob->retireHead(tid);
void
FullO3CPU<Impl>::deactivateThread(ThreadID tid)
{
+ // hardware transactional memory
+ // shouldn't deactivate thread in the middle of a transaction
+ assert(!commit.executingHtmTransaction(tid));
+
//Remove From Active List, if Active
list<ThreadID>::iterator thread_it =
std::find(activeThreads.begin(), activeThreads.end(), tid);
template <class Impl>
void
-FullO3CPU<Impl>::htmSendAbortSignal(ThreadID tid, uint64_t htmUid,
+FullO3CPU<Impl>::htmSendAbortSignal(ThreadID tid, uint64_t htm_uid,
HtmFailureFaultCause cause)
{
- panic("not yet supported!");
+ const Addr addr = 0x0ul;
+ const int size = 8;
+ const Request::Flags flags =
+ Request::PHYSICAL|Request::STRICT_ORDER|Request::HTM_ABORT;
+
+ // O3-specific actions
+ this->iew.ldstQueue.resetHtmStartsStops(tid);
+ this->commit.resetHtmStartsStops(tid);
+
+ // notify l1 d-cache (ruby) that core has aborted transaction
+ RequestPtr req =
+ std::make_shared<Request>(addr, size, flags, _dataMasterId);
+
+ req->taskId(taskId());
+ req->setContext(this->thread[tid]->contextId());
+ req->setHtmAbortCause(cause);
+
+ assert(req->isHTMAbort());
+
+ PacketPtr abort_pkt = Packet::createRead(req);
+ uint8_t *memData = new uint8_t[8];
+ assert(memData);
+ abort_pkt->dataStatic(memData);
+ abort_pkt->setHtmTransactional(htm_uid);
+
+ // TODO include correct error handling here
+ if (!this->iew.ldstQueue.getDataPort().sendTimingReq(abort_pkt)) {
+ panic("HTM abort signal was not sent to the memory subsystem.");
+ }
}
// Forward declaration of FullO3CPU.
#include "cpu/base.hh"
#include "cpu/simple_thread.hh"
#include "cpu/timebuf.hh"
-//#include "cpu/o3/thread_context.hh"
#include "params/DerivO3CPU.hh"
#include "sim/process.hh"
/*
- * Copyright (c) 2010-2012, 2014 ARM Limited
+ * Copyright (c) 2010-2012, 2014, 2019 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
/** Check misprediction */
void checkMisprediction(const DynInstPtr &inst);
+ // hardware transactional memory
+ // For debugging purposes, it is useful to keep track of the most recent
+ // htmUid that has been committed (architecturally, not transactionally)
+ // to ensure that the core and the memory subsystem are observing
+ // correct ordering constraints.
+ void setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid)
+ {
+ ldstQueue.setLastRetiredHtmUid(tid, htmUid);
+ }
+
private:
/** Sends commit proper information for a squash due to a branch
* mispredict.
/*
- * Copyright (c) 2010-2013, 2018 ARM Limited
+ * Copyright (c) 2010-2013, 2018-2019 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved.
*
break;
}
+ // hardware transactional memory
+ // CPU needs to track transactional state in program order.
+ const int numHtmStarts = ldstQueue.numHtmStarts(tid);
+ const int numHtmStops = ldstQueue.numHtmStops(tid);
+ const int htmDepth = numHtmStarts - numHtmStops;
+
+ if (htmDepth > 0) {
+ inst->setHtmTransactionalState(ldstQueue.getLatestHtmUid(tid),
+ htmDepth);
+ } else {
+ inst->clearHtmTransactionalState();
+ }
+
+
// Otherwise issue the instruction just fine.
if (inst->isAtomic()) {
DPRINTF(IEW, "[tid:%i] Issue: Memory instruction "
{
flags.set(Flag::Complete);
}
+
+ virtual std::string name() const { return "LSQRequest"; }
};
class SingleDataRequest : public LSQRequest
virtual void buildPackets();
virtual Cycles handleLocalAccess(ThreadContext *thread, PacketPtr pkt);
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask);
+ virtual std::string name() const { return "SingleDataRequest"; }
+ };
+
+ // hardware transactional memory
+ // This class extends SingleDataRequest for the sole purpose
+ // of encapsulating hardware transactional memory command requests
+ class HtmCmdRequest : public SingleDataRequest
+ {
+ protected:
+ /* Given that we are inside templates, children need explicit
+ * declaration of the names in the parent class. */
+ using Flag = typename LSQRequest::Flag;
+ using State = typename LSQRequest::State;
+ using LSQRequest::_addr;
+ using LSQRequest::_size;
+ using LSQRequest::_byteEnable;
+ using LSQRequest::_requests;
+ using LSQRequest::_inst;
+ using LSQRequest::_taskId;
+ using LSQRequest::flags;
+ using LSQRequest::setState;
+ public:
+ HtmCmdRequest(LSQUnit* port, const DynInstPtr& inst,
+ const Request::Flags& flags_);
+ inline virtual ~HtmCmdRequest() {}
+ virtual void initiateTranslation();
+ virtual void finish(const Fault &fault, const RequestPtr &req,
+ ThreadContext* tc, BaseTLB::Mode mode);
+ virtual std::string name() const { return "HtmCmdRequest"; }
};
class SplitDataRequest : public LSQRequest
virtual RequestPtr mainRequest();
virtual PacketPtr mainPacket();
+ virtual std::string name() const { return "SplitDataRequest"; }
};
/** Constructs an LSQ with the given parameters. */
/** Returns the total number of stores for a single thread. */
int numStores(ThreadID tid) { return thread.at(tid).numStores(); }
+
+ // hardware transactional memory
+
+ int numHtmStarts(ThreadID tid) const
+ {
+ if (tid == InvalidThreadID)
+ return 0;
+ else
+ return thread[tid].numHtmStarts();
+ }
+ int numHtmStops(ThreadID tid) const
+ {
+ if (tid == InvalidThreadID)
+ return 0;
+ else
+ return thread[tid].numHtmStops();
+ }
+
+ void resetHtmStartsStops(ThreadID tid)
+ {
+ if (tid != InvalidThreadID)
+ thread[tid].resetHtmStartsStops();
+ }
+
+ uint64_t getLatestHtmUid(ThreadID tid) const
+ {
+ if (tid == InvalidThreadID)
+ return 0;
+ else
+ return thread[tid].getLatestHtmUid();
+ }
+
+ void setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid)
+ {
+ if (tid != InvalidThreadID)
+ thread[tid].setLastRetiredHtmUid(htmUid);
+ }
+
/** Returns the number of free load entries. */
unsigned numFreeLoadEntries();
/*
- * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
+ * Copyright (c) 2011-2012, 2014, 2017-2019 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
#include "cpu/o3/lsq.hh"
#include "debug/Drain.hh"
#include "debug/Fetch.hh"
+#include "debug/HtmCpu.hh"
#include "debug/LSQ.hh"
#include "debug/Writeback.hh"
#include "params/DerivO3CPU.hh"
// lines. For now, such cross-line update is not supported.
assert(!isAtomic || (isAtomic && !needs_burst));
+ const bool htm_cmd = isLoad && (flags & Request::HTM_CMD);
+
if (inst->translationStarted()) {
req = inst->savedReq;
assert(req);
} else {
- if (needs_burst) {
+ if (htm_cmd) {
+ assert(addr == 0x0lu);
+ assert(size == 8);
+ req = new HtmCmdRequest(&thread[tid], inst, flags);
+ } else if (needs_burst) {
req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
size, flags, data, res);
} else {
: Packet::createWrite(request()));
_packets.back()->dataStatic(_inst->memData);
_packets.back()->senderState = _senderState;
+
+ // hardware transactional memory
+ // If request originates in a transaction (not necessarily a HtmCmd),
+ // then the packet should be marked as such.
+ if (_inst->inHtmTransactionalState()) {
+ _packets.back()->setHtmTransactional(
+ _inst->getHtmTransactionUid());
+
+ DPRINTF(HtmCpu,
+ "HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",
+ isLoad() ? "LD" : "ST",
+ _inst->instAddr(),
+ _packets.back()->req->hasVaddr() ?
+ _packets.back()->req->getVaddr() : 0lu,
+ _packets.back()->getAddr(),
+ _inst->getHtmTransactionUid());
+ }
}
assert(_packets.size() == 1);
}
if (isLoad()) {
_mainPacket = Packet::createRead(mainReq);
_mainPacket->dataStatic(_inst->memData);
+
+ // hardware transactional memory
+ // If request originates in a transaction,
+ // packet should be marked as such
+ if (_inst->inHtmTransactionalState()) {
+ _mainPacket->setHtmTransactional(
+ _inst->getHtmTransactionUid());
+ DPRINTF(HtmCpu,
+ "HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
+ _inst->instAddr(),
+ _mainPacket->req->hasVaddr() ?
+ _mainPacket->req->getVaddr() : 0lu,
+ _mainPacket->getAddr(),
+ _inst->getHtmTransactionUid());
+ }
}
for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
RequestPtr r = _requests[i];
}
pkt->senderState = _senderState;
_packets.push_back(pkt);
+
+ // hardware transactional memory
+ // If request originates in a transaction,
+ // packet should be marked as such
+ if (_inst->inHtmTransactionalState()) {
+ _packets.back()->setHtmTransactional(
+ _inst->getHtmTransactionUid());
+ DPRINTF(HtmCpu,
+ "HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
+ isLoad() ? "LD" : "ST",
+ i+1,
+ _inst->instAddr(),
+ _packets.back()->req->hasVaddr() ?
+ _packets.back()->req->getVaddr() : 0lu,
+ _packets.back()->getAddr(),
+ _inst->getHtmTransactionUid());
+ }
}
}
assert(_packets.size() > 0);
lsq->recvReqRetry();
}
+template<class Impl>
+LSQ<Impl>::HtmCmdRequest::HtmCmdRequest(LSQUnit* port,
+ const DynInstPtr& inst,
+ const Request::Flags& flags_) :
+ SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,
+ nullptr, nullptr, nullptr)
+{
+ assert(_requests.size() == 0);
+
+ this->addRequest(_addr, _size, _byteEnable);
+
+ if (_requests.size() > 0) {
+ _requests.back()->setReqInstSeqNum(_inst->seqNum);
+ _requests.back()->taskId(_taskId);
+ _requests.back()->setPaddr(_addr);
+ _requests.back()->setInstCount(_inst->getCpuPtr()->totalInsts());
+
+ _inst->strictlyOrdered(_requests.back()->isStrictlyOrdered());
+ _inst->fault = NoFault;
+ _inst->physEffAddr = _requests.back()->getPaddr();
+ _inst->memReqFlags = _requests.back()->getFlags();
+ _inst->savedReq = this;
+
+ setState(State::Translation);
+ } else {
+ panic("unexpected behaviour");
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::HtmCmdRequest::initiateTranslation()
+{
+ // Transaction commands are implemented as loads to avoid significant
+ // changes to the cpu and memory interfaces
+ // The virtual and physical address uses a dummy value of 0x00
+ // Address translation does not really occur thus the code below
+
+ flags.set(Flag::TranslationStarted);
+ flags.set(Flag::TranslationFinished);
+
+ _inst->translationStarted(true);
+ _inst->translationCompleted(true);
+
+ setState(State::Request);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::HtmCmdRequest::finish(const Fault &fault, const RequestPtr &req,
+ ThreadContext* tc, BaseTLB::Mode mode)
+{
+ panic("unexpected behaviour");
+}
+
#endif//__CPU_O3_LSQ_IMPL_HH__
#include "config/the_isa.hh"
#include "cpu/inst_seq.hh"
#include "cpu/timebuf.hh"
+#include "debug/HtmCpu.hh"
#include "debug/LSQUnit.hh"
#include "mem/packet.hh"
#include "mem/port.hh"
/** Returns the number of stores in the SQ. */
int numStores() { return stores; }
+ // hardware transactional memory
+ int numHtmStarts() const { return htmStarts; }
+ int numHtmStops() const { return htmStops; }
+ void resetHtmStartsStops() { htmStarts = htmStops = 0; }
+ uint64_t getLatestHtmUid() const
+ {
+ const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
+ return htm_cpt->getHtmUid();
+ }
+ void setLastRetiredHtmUid(uint64_t htm_uid)
+ {
+ assert(htm_uid >= lastRetiredHtmUid);
+ lastRetiredHtmUid = htm_uid;
+ }
+
/** Returns if either the LQ or SQ is full. */
bool isFull() { return lqFull() || sqFull(); }
/** The number of store instructions in the SQ waiting to writeback. */
int storesToWB;
+ // hardware transactional memory
+ // nesting depth
+ int htmStarts;
+ int htmStops;
+ // sanity checks and debugging
+ uint64_t lastRetiredHtmUid;
+
/** The index of the first instruction that may be ready to be
* written back, and has not yet been written back.
*/
if (req->mainRequest()->isLocalAccess()) {
assert(!load_inst->memData);
+ assert(!load_inst->inHtmTransactionalState());
load_inst->memData = new uint8_t[MaxDataBytes];
ThreadContext *thread = cpu->tcBase(lsqID);
return NoFault;
}
+ // hardware transactional memory
+ if (req->mainRequest()->isHTMStart() || req->mainRequest()->isHTMCommit())
+ {
+ // don't want to send nested transactionStarts and
+ // transactionStops outside of core, e.g. to Ruby
+ if (req->mainRequest()->getFlags().isSet(Request::NO_ACCESS)) {
+ Cycles delay(0);
+ PacketPtr data_pkt =
+ new Packet(req->mainRequest(), MemCmd::ReadReq);
+
+ // Allocate memory if this is the first time a load is issued.
+ if (!load_inst->memData) {
+ load_inst->memData =
+ new uint8_t[req->mainRequest()->getSize()];
+ // sanity checks espect zero in request's data
+ memset(load_inst->memData, 0, req->mainRequest()->getSize());
+ }
+
+ data_pkt->dataStatic(load_inst->memData);
+ if (load_inst->inHtmTransactionalState()) {
+ data_pkt->setHtmTransactional(
+ load_inst->getHtmTransactionUid());
+ }
+ data_pkt->makeResponse();
+
+ WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
+ cpu->schedule(wb, cpu->clockEdge(delay));
+ return NoFault;
+ }
+ }
+
// Check the SQ for any previous stores that might lead to forwarding
auto store_it = load_inst->sqIt;
assert (store_it >= storeWBIt);
MemCmd::ReadReq);
data_pkt->dataStatic(load_inst->memData);
+ // hardware transactional memory
+ // Store to load forwarding within a transaction
+ // This should be okay because the store will be sent to
+ // the memory subsystem and subsequently get added to the
+ // write set of the transaction. The write set has a stronger
+ // property than the read set, so the load doesn't necessarily
+ // have to be there.
+ assert(!req->mainRequest()->isHTMCmd());
+ if (load_inst->inHtmTransactionalState()) {
+ assert (!storeQueue[store_it._idx].completed());
+ assert (
+ storeQueue[store_it._idx].instruction()->
+ inHtmTransactionalState());
+ assert (
+ load_inst->getHtmTransactionUid() ==
+ storeQueue[store_it._idx].instruction()->
+ getHtmTransactionUid());
+ data_pkt->setHtmTransactional(
+ load_inst->getHtmTransactionUid());
+ DPRINTF(HtmCpu, "HTM LD (ST2LDF) "
+ "pc=0x%lx - vaddr=0x%lx - "
+ "paddr=0x%lx - htmUid=%u\n",
+ load_inst->instAddr(),
+ data_pkt->req->hasVaddr() ?
+ data_pkt->req->getVaddr() : 0lu,
+ data_pkt->getAddr(),
+ load_inst->getHtmTransactionUid());
+ }
+
if (req->isAnyOutstandingRequest()) {
assert(req->_numOutstandingPackets > 0);
// There are memory requests packets in flight already.
load_inst->memData = new uint8_t[req->mainRequest()->getSize()];
}
+
+ // hardware transactional memory
+ if (req->mainRequest()->isHTMCmd()) {
+ // this is a simple sanity check
+ // the Ruby cache controller will set
+ // memData to 0x0ul if successful.
+ *load_inst->memData = (uint64_t) 0x1ull;
+ }
+
// For now, load throughput is constrained by the number of
// load FUs only, and loads do not consume a cache port (only
// stores do).
#include "cpu/o3/lsq.hh"
#include "cpu/o3/lsq_unit.hh"
#include "debug/Activity.hh"
+#include "debug/HtmCpu.hh"
#include "debug/IEW.hh"
#include "debug/LSQUnit.hh"
#include "debug/O3PipeView.hh"
LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
DynInstPtr inst = state->inst;
+ // hardware transactional memory
+ // sanity check
+ if (pkt->isHtmTransactional() && !inst->isSquashed()) {
+ assert(inst->getHtmTransactionUid() == pkt->getHtmTransactionUid());
+ }
+
+ // if in a HTM transaction, it's possible
+ // to abort within the cache hierarchy.
+ // This is signalled back to the processor
+ // through responses to memory requests.
+ if (pkt->htmTransactionFailedInCache()) {
+ // cannot do this for write requests because
+ // they cannot tolerate faults
+ const HtmCacheFailure htm_rc =
+ pkt->getHtmTransactionFailedInCacheRC();
+ if(pkt->isWrite()) {
+ DPRINTF(HtmCpu,
+ "store notification (ignored) of HTM transaction failure "
+ "in cache - addr=0x%lx - rc=%s - htmUid=%d\n",
+ pkt->getAddr(), htmFailureToStr(htm_rc),
+ pkt->getHtmTransactionUid());
+ } else {
+ HtmFailureFaultCause fail_reason =
+ HtmFailureFaultCause::INVALID;
+
+ if (htm_rc == HtmCacheFailure::FAIL_SELF) {
+ fail_reason = HtmFailureFaultCause::SIZE;
+ } else if (htm_rc == HtmCacheFailure::FAIL_REMOTE) {
+ fail_reason = HtmFailureFaultCause::MEMORY;
+ } else if (htm_rc == HtmCacheFailure::FAIL_OTHER) {
+ // these are likely loads that were issued out of order
+ // they are faulted here, but it's unlikely that these will
+ // ever reach the commit head.
+ fail_reason = HtmFailureFaultCause::OTHER;
+ } else {
+ panic("HTM error - unhandled return code from cache (%s)",
+ htmFailureToStr(htm_rc));
+ }
+
+ inst->fault =
+ std::make_shared<GenericHtmFailureFault>(
+ inst->getHtmTransactionUid(),
+ fail_reason);
+
+ DPRINTF(HtmCpu,
+ "load notification of HTM transaction failure "
+ "in cache - pc=%s - addr=0x%lx - "
+ "rc=%u - htmUid=%d\n",
+ inst->pcState(), pkt->getAddr(),
+ htmFailureToStr(htm_rc), pkt->getHtmTransactionUid());
+ }
+ }
+
cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
/* Notify the sender state that the access is complete (for ownership
// after receving the response from the memory
assert(inst->isLoad() || inst->isStoreConditional() ||
inst->isAtomic());
+
+ // hardware transactional memory
+ if (pkt->htmTransactionFailedInCache()) {
+ state->request()->mainPacket()->setHtmTransactionFailedInCache(
+ pkt->getHtmTransactionFailedInCacheRC() );
+ }
+
writeback(inst, state->request()->mainPacket());
if (inst->isStore() || inst->isAtomic()) {
auto ss = dynamic_cast<SQSenderState*>(state);
template <class Impl>
LSQUnit<Impl>::LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
: lsqID(-1), storeQueue(sqEntries+1), loadQueue(lqEntries+1),
- loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
+ loads(0), stores(0), storesToWB(0),
+ htmStarts(0), htmStops(0),
+ lastRetiredHtmUid(0),
+ cacheBlockMask(0), stalled(false),
isStoreBlocked(false), storeInFlight(false), hasPendingRequest(false),
pendingRequest(nullptr)
{
{
loads = stores = storesToWB = 0;
+ // hardware transactional memory
+ // nesting depth
+ htmStarts = htmStops = 0;
storeWBIt = storeQueue.begin();
load_inst->lqIt = loadQueue.getIterator(load_inst->lqIdx);
++loads;
+
+ // hardware transactional memory
+ // transactional state and nesting depth must be tracked
+ // in the in-order part of the core.
+ if (load_inst->isHtmStart()) {
+ htmStarts++;
+ DPRINTF(HtmCpu, ">> htmStarts++ (%d) : htmStops (%d)\n",
+ htmStarts, htmStops);
+
+ const int htm_depth = htmStarts - htmStops;
+ const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
+ auto htm_uid = htm_cpt->getHtmUid();
+
+ // for debugging purposes
+ if (!load_inst->inHtmTransactionalState()) {
+ htm_uid = htm_cpt->newHtmUid();
+ DPRINTF(HtmCpu, "generating new htmUid=%u\n", htm_uid);
+ if (htm_depth != 1) {
+ DPRINTF(HtmCpu,
+ "unusual HTM transactional depth (%d)"
+ " possibly caused by mispeculation - htmUid=%u\n",
+ htm_depth, htm_uid);
+ }
+ }
+ load_inst->setHtmTransactionalState(htm_uid, htm_depth);
+ }
+
+ if (load_inst->isHtmStop()) {
+ htmStops++;
+ DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops++ (%d)\n",
+ htmStarts, htmStops);
+
+ if (htmStops==1 && htmStarts==0) {
+ DPRINTF(HtmCpu,
+ "htmStops==1 && htmStarts==0. "
+ "This generally shouldn't happen "
+ "(unless due to misspeculation)\n");
+ }
+ }
}
template <class Impl>
if (req->request()->isLocalAccess()) {
assert(!inst->isStoreConditional());
+ assert(!inst->inHtmTransactionalState());
ThreadContext *thread = cpu->tcBase(lsqID);
PacketPtr main_pkt = new Packet(req->mainRequest(),
MemCmd::WriteReq);
stallingLoadIdx = 0;
}
+ // hardware transactional memory
+ // Squashing instructions can alter the transaction nesting depth
+ // and must be corrected before fetching resumes.
+ if (loadQueue.back().instruction()->isHtmStart())
+ {
+ htmStarts = (--htmStarts < 0) ? 0 : htmStarts;
+ DPRINTF(HtmCpu, ">> htmStarts-- (%d) : htmStops (%d)\n",
+ htmStarts, htmStops);
+ }
+ if (loadQueue.back().instruction()->isHtmStop())
+ {
+ htmStops = (--htmStops < 0) ? 0 : htmStops;
+ DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops-- (%d)\n",
+ htmStarts, htmStops);
+ }
// Clear the smart pointer to make sure it is decremented.
loadQueue.back().instruction()->setSquashed();
loadQueue.back().clear();
++lsqSquashedLoads;
}
+ // hardware transactional memory
+ // scan load queue (from oldest to youngest) for most recent valid htmUid
+ auto scan_it = loadQueue.begin();
+ uint64_t in_flight_uid = 0;
+ while (scan_it != loadQueue.end()) {
+ if (scan_it->instruction()->isHtmStart() &&
+ !scan_it->instruction()->isSquashed()) {
+ in_flight_uid = scan_it->instruction()->getHtmTransactionUid();
+ DPRINTF(HtmCpu, "loadQueue[%d]: found valid HtmStart htmUid=%u\n",
+ scan_it._idx, in_flight_uid);
+ }
+ scan_it++;
+ }
+ // If there's a HtmStart in the pipeline then use its htmUid,
+ // otherwise use the most recently committed uid
+ const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
+ if (htm_cpt) {
+ const uint64_t old_local_htm_uid = htm_cpt->getHtmUid();
+ uint64_t new_local_htm_uid;
+ if (in_flight_uid > 0)
+ new_local_htm_uid = in_flight_uid;
+ else
+ new_local_htm_uid = lastRetiredHtmUid;
+
+ if (old_local_htm_uid != new_local_htm_uid) {
+ DPRINTF(HtmCpu, "flush: lastRetiredHtmUid=%u\n",
+ lastRetiredHtmUid);
+ DPRINTF(HtmCpu, "flush: resetting localHtmUid=%u\n",
+ new_local_htm_uid);
+
+ htm_cpt->setHtmUid(new_local_htm_uid);
+ }
+ }
+
if (memDepViolator && squashed_num < memDepViolator->seqNum) {
memDepViolator = NULL;
}
// Squashed instructions do not need to complete their access.
if (inst->isSquashed()) {
- assert(!inst->isStore());
+ assert (!inst->isStore() || inst->isStoreConditional());
++lsqIgnoredResponses;
return;
}
// If we have an outstanding fault, the fault should only be of
// type ReExec or - in case of a SplitRequest - a partial
// translation fault
- assert(dynamic_cast<ReExec*>(inst->fault.get()) != nullptr ||
- inst->savedReq->isPartialFault());
+
+ // Unless it's a hardware transactional memory fault
+ auto htm_fault = std::dynamic_pointer_cast<
+ GenericHtmFailureFault>(inst->fault);
+
+ if (!htm_fault) {
+ assert(dynamic_cast<ReExec*>(inst->fault.get()) != nullptr ||
+ inst->savedReq->isPartialFault());
+
+ } else if (!pkt->htmTransactionFailedInCache()) {
+ // Situation in which the instruction has a hardware transactional
+ // memory fault but not the packet itself. This can occur with
+ // ldp_uop microops since access is spread over multiple packets.
+ DPRINTF(HtmCpu,
+ "%s writeback with HTM failure fault, "
+ "however, completing packet is not aware of "
+ "transaction failure. cause=%s htmUid=%u\n",
+ inst->staticInst->getName(),
+ htmFailureToStr(htm_fault->getHtmFailureFaultCause()),
+ htm_fault->getHtmUid());
+ }
DPRINTF(LSQUnit, "Not completing instruction [sn:%lli] access "
"due to pending fault.\n", inst->seqNum);
{
InstSeqNum barr_sn = barr_inst->seqNum;
// Memory barriers block loads and stores, write barriers only stores.
- if (barr_inst->isMemBarrier()) {
+ // Required also for hardware transactional memory commands which
+ // can have strict ordering semantics
+ if (barr_inst->isMemBarrier() || barr_inst->isHtmCmd()) {
loadBarrierSNs.insert(barr_sn);
storeBarrierSNs.insert(barr_sn);
DPRINTF(MemDepUnit, "Inserted a memory barrier %s SN:%lli\n",
DPRINTF(MemDepUnit, "Inserted a write barrier %s SN:%lli\n",
barr_inst->pcState(), barr_sn);
}
+
if (loadBarrierSNs.size() || storeBarrierSNs.size()) {
DPRINTF(MemDepUnit, "Outstanding load barriers = %d; "
"store barriers = %d\n",
wakeDependents(inst);
completed(inst);
InstSeqNum barr_sn = inst->seqNum;
- if (inst->isMemBarrier()) {
+
+ if (inst->isMemBarrier() || inst->isHtmCmd()) {
assert(hasLoadBarrier());
assert(hasStoreBarrier());
loadBarrierSNs.erase(barr_sn);
void
MemDepUnit<MemDepPred, Impl>::wakeDependents(const DynInstPtr &inst)
{
- // Only stores, atomics and barriers have dependents.
+ // Only stores, atomics, barriers and
+ // hardware transactional memory commands have dependents.
if (!inst->isStore() && !inst->isAtomic() && !inst->isMemBarrier() &&
- !inst->isWriteBarrier()) {
+ !inst->isWriteBarrier() && !inst->isHtmCmd()) {
return;
}
O3ThreadContext<Impl>::htmAbortTransaction(uint64_t htmUid,
HtmFailureFaultCause cause)
{
- panic("function not implemented\n");
+ cpu->htmSendAbortSignal(thread->threadId(), htmUid, cause);
+
+ conditionalSquash();
}
template <class Impl>
BaseHTMCheckpointPtr&
O3ThreadContext<Impl>::getHtmCheckpointPtr()
{
- panic("function not implemented\n");
+ return thread->htmCheckpoint;
}
template <class Impl>
void
O3ThreadContext<Impl>::setHtmCheckpointPtr(BaseHTMCheckpointPtr new_cpt)
{
- panic("function not implemented\n");
+ assert(!thread->htmCheckpoint->valid());
+ thread->htmCheckpoint = std::move(new_cpt);
}
#endif //__CPU_O3_THREAD_CONTEXT_IMPL_HH__
/*
- * Copyright (c) 2012 ARM Limited
+ * Copyright (c) 2012, 2019 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
*/
bool trapPending;
+ /** Pointer to the hardware transactional memory checkpoint. */
+ std::unique_ptr<BaseHTMCheckpoint> htmCheckpoint;
+
O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process)
: ThreadState(_cpu, _thread_num, _process), cpu(_cpu),
comInstEventQueue("instruction-based event queue"),