From: Timothy Hayes Date: Wed, 2 Sep 2020 10:33:15 +0000 (+0100) Subject: cpu: HTM Implementation for TimingCPU X-Git-Tag: v20.1.0.0~108 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=79df4341874cd053d124ed9fd7afb54e52a6e13c;p=gem5.git cpu: HTM Implementation for TimingCPU JIRA: https://gem5.atlassian.net/browse/GEM5-587 Change-Id: I3e1de639560ea5492e914470e31bacb321425f0a Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/30327 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index a597f0616..bf940ba98 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -63,6 +63,7 @@ #include "debug/Decode.hh" #include "debug/ExecFaulting.hh" #include "debug/Fetch.hh" +#include "debug/HtmCpu.hh" #include "debug/Quiesce.hh" #include "mem/packet.hh" #include "mem/request.hh" @@ -453,6 +454,17 @@ BaseSimpleCPU::checkForInterrupts() Fault interrupt = interrupts[curThread]->getInterrupt(); if (interrupt != NoFault) { + // hardware transactional memory + // Postpone taking interrupts while executing transactions. + assert(!std::dynamic_pointer_cast( + interrupt)); + if (t_info.inHtmTransactionalState()) { + DPRINTF(HtmCpu, "Deferring pending interrupt - %s -" + "due to transactional state\n", + interrupt->name()); + return; + } + t_info.fetchOffset = 0; interrupts[curThread]->updateIntrInfo(); interrupt->invoke(tc); diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index 41e1d3dd1..2b2afd282 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -475,8 +475,7 @@ class SimpleExecContext : public ExecContext { Fault initiateHtmCmd(Request::Flags flags) override { - panic("Not yet supported\n"); - return NoFault; + return cpu->initiateHtmCmd(flags); } /** @@ -536,29 +535,26 @@ class SimpleExecContext : public ExecContext { uint64_t getHtmTransactionUid() const override { - panic("Not yet supported\n"); - return 0; + return tcBase()->getHtmCheckpointPtr()->getHtmUid(); } uint64_t newHtmTransactionUid() const override { - panic("Not yet supported\n"); - return 0; + return tcBase()->getHtmCheckpointPtr()->newHtmUid(); } bool inHtmTransactionalState() const override { - panic("Not yet supported\n"); - return false; + return (getHtmTransactionalDepth() > 0); } uint64_t getHtmTransactionalDepth() const override { - panic("Not yet supported\n"); - return 0; + assert(thread->htmTransactionStarts >= thread->htmTransactionStops); + return (thread->htmTransactionStarts - thread->htmTransactionStops); } /** diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index d3adbcc52..f22c58ddd 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -48,6 +48,7 @@ #include "debug/Config.hh" #include "debug/Drain.hh" #include "debug/ExecFaulting.hh" +#include "debug/HtmCpu.hh" #include "debug/Mwait.hh" #include "debug/SimpleCPU.hh" #include "mem/packet.hh" @@ -173,6 +174,10 @@ TimingSimpleCPU::switchOut() SimpleExecContext& t_info = *threadInfo[curThread]; M5_VAR_USED SimpleThread* thread = t_info.thread; + // hardware transactional memory + // Cannot switch out the CPU in the middle of a transaction + assert(!t_info.inHtmTransactionalState()); + BaseSimpleCPU::switchOut(); assert(!fetchEvent.scheduled()); @@ -234,6 +239,10 @@ TimingSimpleCPU::suspendContext(ThreadID thread_num) assert(thread_num < numThreads); activeThreads.remove(thread_num); + // hardware transactional memory + // Cannot suspend context in the middle of a transaction. + assert(!threadInfo[curThread]->inHtmTransactionalState()); + if (_status == Idle) return; @@ -260,6 +269,12 @@ TimingSimpleCPU::handleReadPacket(PacketPtr pkt) const RequestPtr &req = pkt->req; + // hardware transactional memory + // sanity check + if (req->isHTMCmd()) { + assert(!req->isLocalAccess()); + } + // We're about the issues a locked load, so tell the monitor // to start caring about this address if (pkt->isRead() && pkt->req->isLLSC()) { @@ -291,6 +306,17 @@ TimingSimpleCPU::sendData(const RequestPtr &req, uint8_t *data, uint64_t *res, PacketPtr pkt = buildPacket(req, read); pkt->dataDynamic(data); + // hardware transactional memory + // If the core is in transactional mode or if the request is HtmCMD + // to abort a transaction, the packet should reflect that it is + // transactional and also contain a HtmUid for debugging. + const bool is_htm_speculative = t_info.inHtmTransactionalState(); + if (is_htm_speculative || req->isHTMAbort()) { + pkt->setHtmTransactional(t_info.getHtmTransactionUid()); + } + if (req->isHTMAbort()) + DPRINTF(HtmCpu, "htmabort htmUid=%u\n", t_info.getHtmTransactionUid()); + if (req->getFlags().isSet(Request::NO_ACCESS)) { assert(!dcache_pkt); pkt->makeResponse(); @@ -322,8 +348,21 @@ void TimingSimpleCPU::sendSplitData(const RequestPtr &req1, const RequestPtr &req2, const RequestPtr &req, uint8_t *data, bool read) { + SimpleExecContext &t_info = *threadInfo[curThread]; PacketPtr pkt1, pkt2; buildSplitPacket(pkt1, pkt2, req1, req2, req, data, read); + + // hardware transactional memory + // HTM commands should never use SplitData + assert(!req1->isHTMCmd() && !req2->isHTMCmd()); + + // If the thread is executing transactionally, + // reflect this in the packets. + if (t_info.inHtmTransactionalState()) { + pkt1->setHtmTransactional(t_info.getHtmTransactionUid()); + pkt2->setHtmTransactional(t_info.getHtmTransactionUid()); + } + if (req->getFlags().isSet(Request::NO_ACCESS)) { assert(!dcache_pkt); pkt1->makeResponse(); @@ -724,6 +763,25 @@ TimingSimpleCPU::advanceInst(const Fault &fault) return; if (fault != NoFault) { + // hardware transactional memory + // If a fault occurred within a transaction + // ensure that the transaction aborts + if (t_info.inHtmTransactionalState() && + !std::dynamic_pointer_cast(fault)) { + DPRINTF(HtmCpu, "fault (%s) occurred - " + "replacing with HTM abort fault htmUid=%u\n", + fault->name(), t_info.getHtmTransactionUid()); + + Fault tmfault = std::make_shared( + t_info.getHtmTransactionUid(), + HtmFailureFaultCause::EXCEPTION); + + advancePC(tmfault); + reschedule(fetchEvent, clockEdge(), true); + _status = Faulting; + return; + } + DPRINTF(SimpleCPU, "Fault occured. Handling the fault\n"); advancePC(fault); @@ -783,6 +841,19 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt) preExecute(); + + // hardware transactional memory + if (curStaticInst && curStaticInst->isHtmStart()) { + // if this HtmStart is not within a transaction, + // then assign it a new htmTransactionUid + if (!t_info.inHtmTransactionalState()) + t_info.newHtmTransactionUid(); + SimpleThread* thread = t_info.thread; + thread->htmTransactionStarts++; + DPRINTF(HtmCpu, "htmTransactionStarts++=%u\n", + thread->htmTransactionStarts); + } + if (curStaticInst && curStaticInst->isMemRef()) { // load or store: just send to dcache Fault fault = curStaticInst->initiateAcc(&t_info, traceData); @@ -838,6 +909,15 @@ bool TimingSimpleCPU::IcachePort::recvTimingResp(PacketPtr pkt) { DPRINTF(SimpleCPU, "Received fetch response %#x\n", pkt->getAddr()); + + // hardware transactional memory + // Currently, there is no support for tracking instruction fetches + // in an transaction's read set. + if (pkt->htmTransactionFailedInCache()) { + panic("HTM transactional support for" + " instruction stream not yet supported\n"); + } + // we should only ever see one response per cycle since we only // issue a new request once this response is sunk assert(!tickEvent.scheduled()); @@ -864,6 +944,12 @@ TimingSimpleCPU::IcachePort::recvReqRetry() void TimingSimpleCPU::completeDataAccess(PacketPtr pkt) { + // hardware transactional memory + + SimpleExecContext *t_info = threadInfo[curThread]; + const bool is_htm_speculative = + t_info->inHtmTransactionalState(); + // received a response from the dcache: complete the load or store // instruction assert(!pkt->isError()); @@ -876,13 +962,35 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt) updateCycleCounters(BaseCPU::CPU_STATE_ON); if (pkt->senderState) { + // hardware transactional memory + // There shouldn't be HtmCmds occurring in multipacket requests + if (pkt->req->isHTMCmd()) { + panic("unexpected HTM case"); + } + SplitFragmentSenderState * send_state = dynamic_cast(pkt->senderState); assert(send_state); - delete pkt; PacketPtr big_pkt = send_state->bigPkt; delete send_state; + if (pkt->isHtmTransactional()) { + assert(is_htm_speculative); + + big_pkt->setHtmTransactional( + pkt->getHtmTransactionUid() + ); + } + + if (pkt->htmTransactionFailedInCache()) { + assert(is_htm_speculative); + big_pkt->setHtmTransactionFailedInCache( + pkt->getHtmTransactionFailedInCacheRC() + ); + } + + delete pkt; + SplitMainSenderState * main_send_state = dynamic_cast(big_pkt->senderState); assert(main_send_state); @@ -901,8 +1009,59 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt) _status = BaseSimpleCPU::Running; - Fault fault = curStaticInst->completeAcc(pkt, threadInfo[curThread], - traceData); + Fault fault; + + // hardware transactional memory + // sanity checks + // ensure htmTransactionUids are equivalent + if (pkt->isHtmTransactional()) + assert (pkt->getHtmTransactionUid() == + t_info->getHtmTransactionUid()); + + // can't have a packet that fails a transaction while not in a transaction + if (pkt->htmTransactionFailedInCache()) + assert(is_htm_speculative); + + // shouldn't fail through stores because this would be inconsistent w/ O3 + // which cannot fault after the store has been sent to memory + if (pkt->htmTransactionFailedInCache() && !pkt->isWrite()) { + const HtmCacheFailure htm_rc = + pkt->getHtmTransactionFailedInCacheRC(); + DPRINTF(HtmCpu, "HTM abortion in cache (rc=%s) detected htmUid=%u\n", + htmFailureToStr(htm_rc), pkt->getHtmTransactionUid()); + + // Currently there are only two reasons why a transaction would + // fail in the memory subsystem-- + // (1) A transactional line was evicted from the cache for + // space (or replacement policy) reasons. + // (2) Another core/device requested a cache line that is in this + // transaction's read/write set that is incompatible with the + // HTM's semantics, e.g. another core requesting exclusive access + // of a line in this core's read set. + if (htm_rc == HtmCacheFailure::FAIL_SELF) { + fault = std::make_shared( + t_info->getHtmTransactionUid(), + HtmFailureFaultCause::SIZE); + } else if (htm_rc == HtmCacheFailure::FAIL_REMOTE) { + fault = std::make_shared( + t_info->getHtmTransactionUid(), + HtmFailureFaultCause::MEMORY); + } else { + panic("HTM - unhandled rc %s", htmFailureToStr(htm_rc)); + } + } else { + fault = curStaticInst->completeAcc(pkt, t_info, + traceData); + } + + // hardware transactional memory + // Track HtmStop instructions, + // e.g. instructions which commit a transaction. + if (curStaticInst && curStaticInst->isHtmStop()) { + t_info->thread->htmTransactionStops++; + DPRINTF(HtmCpu, "htmTransactionStops++=%u\n", + t_info->thread->htmTransactionStops); + } // keep an instruction count if (fault == NoFault) @@ -1058,14 +1217,82 @@ TimingSimpleCPU::printAddr(Addr a) Fault TimingSimpleCPU::initiateHtmCmd(Request::Flags flags) { - panic("not yet supported!"); + SimpleExecContext &t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + + const Addr addr = 0x0ul; + const Addr pc = thread->instAddr(); + const int size = 8; + + if (traceData) + traceData->setMem(addr, size, flags); + + RequestPtr req = std::make_shared( + addr, size, flags, dataMasterId()); + + req->setPC(pc); + req->setContext(thread->contextId()); + req->taskId(taskId()); + req->setInstCount(t_info.numInst); + + assert(req->isHTMCmd()); + + // Use the payload as a sanity check, + // the memory subsystem will clear allocated data + uint8_t *data = new uint8_t[size]; + assert(data); + uint64_t rc = 0xdeadbeeflu; + memcpy (data, &rc, size); + + // debugging output + if (req->isHTMStart()) + DPRINTF(HtmCpu, "HTMstart htmUid=%u\n", t_info.getHtmTransactionUid()); + else if (req->isHTMCommit()) + DPRINTF(HtmCpu, "HTMcommit htmUid=%u\n", t_info.getHtmTransactionUid()); + else if (req->isHTMCancel()) + DPRINTF(HtmCpu, "HTMcancel htmUid=%u\n", t_info.getHtmTransactionUid()); + else + panic("initiateHtmCmd: unknown CMD"); + + sendData(req, data, nullptr, true); + return NoFault; } void TimingSimpleCPU::htmSendAbortSignal(HtmFailureFaultCause cause) { - panic("not yet supported!"); + SimpleExecContext& t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + + const Addr addr = 0x0ul; + const Addr pc = thread->instAddr(); + const int size = 8; + const Request::Flags flags = + Request::PHYSICAL|Request::STRICT_ORDER|Request::HTM_ABORT; + + if (traceData) + traceData->setMem(addr, size, flags); + + // notify l1 d-cache (ruby) that core has aborted transaction + + RequestPtr req = std::make_shared( + addr, size, flags, dataMasterId()); + + req->setPC(pc); + req->setContext(thread->contextId()); + req->taskId(taskId()); + req->setInstCount(t_info.numInst); + req->setHtmAbortCause(cause); + + assert(req->isHTMAbort()); + + uint8_t *data = new uint8_t[size]; + assert(data); + uint64_t rc = 0lu; + memcpy (data, &rc, size); + + sendData(req, data, nullptr, true); } diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc index b0ffc8264..28a1c8069 100644 --- a/src/cpu/simple_thread.cc +++ b/src/cpu/simple_thread.cc @@ -72,7 +72,8 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys, isa(dynamic_cast(_isa)), predicate(true), memAccPredicate(true), comInstEventQueue("instruction-based event queue"), - system(_sys), itb(_itb), dtb(_dtb), decoder(isa) + system(_sys), itb(_itb), dtb(_dtb), decoder(isa), + htmTransactionStarts(0), htmTransactionStops(0) { assert(isa); clearArchRegs(); @@ -84,7 +85,8 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys, isa(dynamic_cast(_isa)), predicate(true), memAccPredicate(true), comInstEventQueue("instruction-based event queue"), - system(_sys), itb(_itb), dtb(_dtb), decoder(isa) + system(_sys), itb(_itb), dtb(_dtb), decoder(isa), + htmTransactionStarts(0), htmTransactionStops(0) { assert(isa); @@ -175,17 +177,25 @@ SimpleThread::copyArchRegs(ThreadContext *src_tc) void SimpleThread::htmAbortTransaction(uint64_t htm_uid, HtmFailureFaultCause cause) { - panic("function not implemented\n"); + BaseSimpleCPU *baseSimpleCpu = dynamic_cast(baseCpu); + assert(baseSimpleCpu); + + baseSimpleCpu->htmSendAbortSignal(cause); + + // these must be reset after the abort signal has been sent + htmTransactionStarts = 0; + htmTransactionStops = 0; } BaseHTMCheckpointPtr& SimpleThread::getHtmCheckpointPtr() { - panic("function not implemented\n"); + return _htmCheckpoint; } void SimpleThread::setHtmCheckpointPtr(BaseHTMCheckpointPtr new_cpt) { - panic("function not implemented\n"); + assert(!_htmCheckpoint->valid()); + _htmCheckpoint = std::move(new_cpt); } diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index eb8810459..5fe52cbd9 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -106,6 +106,9 @@ class SimpleThread : public ThreadState, public ThreadContext TheISA::PCState _pcState; + // hardware transactional memory + std::unique_ptr _htmCheckpoint; + /** Did this instruction execute or is it predicated false */ bool predicate; @@ -132,6 +135,10 @@ class SimpleThread : public ThreadState, public ThreadContext TheISA::Decoder decoder; + // hardware transactional memory + int64_t htmTransactionStarts; + int64_t htmTransactionStops; + // constructor: initialize SimpleThread from given process structure // FS SimpleThread(BaseCPU *_cpu, int _thread_num, System *_system,