cpu: HTM Implementation for TimingCPU
authorTimothy Hayes <timothy.hayes@arm.com>
Wed, 2 Sep 2020 10:33:15 +0000 (11:33 +0100)
committerGiacomo Travaglini <giacomo.travaglini@arm.com>
Tue, 8 Sep 2020 09:13:30 +0000 (09:13 +0000)
JIRA: https://gem5.atlassian.net/browse/GEM5-587

Change-Id: I3e1de639560ea5492e914470e31bacb321425f0a
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/30327
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
src/cpu/simple/base.cc
src/cpu/simple/exec_context.hh
src/cpu/simple/timing.cc
src/cpu/simple_thread.cc
src/cpu/simple_thread.hh

index a597f0616d7b1d04ef75ac1c2be5071e8a697ce8..bf940ba988b43bec9a6ac0fcd0ad456fa2fb32bf 100644 (file)
@@ -63,6 +63,7 @@
 #include "debug/Decode.hh"
 #include "debug/ExecFaulting.hh"
 #include "debug/Fetch.hh"
+#include "debug/HtmCpu.hh"
 #include "debug/Quiesce.hh"
 #include "mem/packet.hh"
 #include "mem/request.hh"
@@ -453,6 +454,17 @@ BaseSimpleCPU::checkForInterrupts()
         Fault interrupt = interrupts[curThread]->getInterrupt();
 
         if (interrupt != NoFault) {
+            // hardware transactional memory
+            // Postpone taking interrupts while executing transactions.
+            assert(!std::dynamic_pointer_cast<GenericHtmFailureFault>(
+                interrupt));
+            if (t_info.inHtmTransactionalState()) {
+                DPRINTF(HtmCpu, "Deferring pending interrupt - %s -"
+                    "due to transactional state\n",
+                    interrupt->name());
+                return;
+            }
+
             t_info.fetchOffset = 0;
             interrupts[curThread]->updateIntrInfo();
             interrupt->invoke(tc);
index 41e1d3dd16c246c32d10b1696a59cd85b6b5b71d..2b2afd282f6090fb8199a9a0333b29bc4fd61aba 100644 (file)
@@ -475,8 +475,7 @@ class SimpleExecContext : public ExecContext {
 
     Fault initiateHtmCmd(Request::Flags flags) override
     {
-        panic("Not yet supported\n");
-        return NoFault;
+        return cpu->initiateHtmCmd(flags);
     }
 
     /**
@@ -536,29 +535,26 @@ class SimpleExecContext : public ExecContext {
     uint64_t
     getHtmTransactionUid() const override
     {
-        panic("Not yet supported\n");
-        return 0;
+        return tcBase()->getHtmCheckpointPtr()->getHtmUid();
     }
 
     uint64_t
     newHtmTransactionUid() const override
     {
-        panic("Not yet supported\n");
-        return 0;
+        return tcBase()->getHtmCheckpointPtr()->newHtmUid();
     }
 
     bool
     inHtmTransactionalState() const override
     {
-        panic("Not yet supported\n");
-        return false;
+        return (getHtmTransactionalDepth() > 0);
     }
 
     uint64_t
     getHtmTransactionalDepth() const override
     {
-        panic("Not yet supported\n");
-        return 0;
+        assert(thread->htmTransactionStarts >= thread->htmTransactionStops);
+        return (thread->htmTransactionStarts - thread->htmTransactionStops);
     }
 
     /**
index d3adbcc5259c0c28e382c718beae3d9283f046d4..f22c58ddd559126650f8cc88b6a8f7809ea9e813 100644 (file)
@@ -48,6 +48,7 @@
 #include "debug/Config.hh"
 #include "debug/Drain.hh"
 #include "debug/ExecFaulting.hh"
+#include "debug/HtmCpu.hh"
 #include "debug/Mwait.hh"
 #include "debug/SimpleCPU.hh"
 #include "mem/packet.hh"
@@ -173,6 +174,10 @@ TimingSimpleCPU::switchOut()
     SimpleExecContext& t_info = *threadInfo[curThread];
     M5_VAR_USED SimpleThread* thread = t_info.thread;
 
+    // hardware transactional memory
+    // Cannot switch out the CPU in the middle of a transaction
+    assert(!t_info.inHtmTransactionalState());
+
     BaseSimpleCPU::switchOut();
 
     assert(!fetchEvent.scheduled());
@@ -234,6 +239,10 @@ TimingSimpleCPU::suspendContext(ThreadID thread_num)
     assert(thread_num < numThreads);
     activeThreads.remove(thread_num);
 
+    // hardware transactional memory
+    // Cannot suspend context in the middle of a transaction.
+    assert(!threadInfo[curThread]->inHtmTransactionalState());
+
     if (_status == Idle)
         return;
 
@@ -260,6 +269,12 @@ TimingSimpleCPU::handleReadPacket(PacketPtr pkt)
 
     const RequestPtr &req = pkt->req;
 
+    // hardware transactional memory
+    // sanity check
+    if (req->isHTMCmd()) {
+        assert(!req->isLocalAccess());
+    }
+
     // We're about the issues a locked load, so tell the monitor
     // to start caring about this address
     if (pkt->isRead() && pkt->req->isLLSC()) {
@@ -291,6 +306,17 @@ TimingSimpleCPU::sendData(const RequestPtr &req, uint8_t *data, uint64_t *res,
     PacketPtr pkt = buildPacket(req, read);
     pkt->dataDynamic<uint8_t>(data);
 
+    // hardware transactional memory
+    // If the core is in transactional mode or if the request is HtmCMD
+    // to abort a transaction, the packet should reflect that it is
+    // transactional and also contain a HtmUid for debugging.
+    const bool is_htm_speculative = t_info.inHtmTransactionalState();
+    if (is_htm_speculative || req->isHTMAbort()) {
+        pkt->setHtmTransactional(t_info.getHtmTransactionUid());
+    }
+    if (req->isHTMAbort())
+        DPRINTF(HtmCpu, "htmabort htmUid=%u\n", t_info.getHtmTransactionUid());
+
     if (req->getFlags().isSet(Request::NO_ACCESS)) {
         assert(!dcache_pkt);
         pkt->makeResponse();
@@ -322,8 +348,21 @@ void
 TimingSimpleCPU::sendSplitData(const RequestPtr &req1, const RequestPtr &req2,
                                const RequestPtr &req, uint8_t *data, bool read)
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
     PacketPtr pkt1, pkt2;
     buildSplitPacket(pkt1, pkt2, req1, req2, req, data, read);
+
+    // hardware transactional memory
+    // HTM commands should never use SplitData
+    assert(!req1->isHTMCmd() && !req2->isHTMCmd());
+
+    // If the thread is executing transactionally,
+    // reflect this in the packets.
+    if (t_info.inHtmTransactionalState()) {
+        pkt1->setHtmTransactional(t_info.getHtmTransactionUid());
+        pkt2->setHtmTransactional(t_info.getHtmTransactionUid());
+    }
+
     if (req->getFlags().isSet(Request::NO_ACCESS)) {
         assert(!dcache_pkt);
         pkt1->makeResponse();
@@ -724,6 +763,25 @@ TimingSimpleCPU::advanceInst(const Fault &fault)
         return;
 
     if (fault != NoFault) {
+        // hardware transactional memory
+        // If a fault occurred within a transaction
+        // ensure that the transaction aborts
+        if (t_info.inHtmTransactionalState() &&
+            !std::dynamic_pointer_cast<GenericHtmFailureFault>(fault)) {
+            DPRINTF(HtmCpu, "fault (%s) occurred - "
+                "replacing with HTM abort fault htmUid=%u\n",
+                fault->name(), t_info.getHtmTransactionUid());
+
+            Fault tmfault = std::make_shared<GenericHtmFailureFault>(
+                t_info.getHtmTransactionUid(),
+                HtmFailureFaultCause::EXCEPTION);
+
+            advancePC(tmfault);
+            reschedule(fetchEvent, clockEdge(), true);
+            _status = Faulting;
+            return;
+        }
+
         DPRINTF(SimpleCPU, "Fault occured. Handling the fault\n");
 
         advancePC(fault);
@@ -783,6 +841,19 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
 
 
     preExecute();
+
+    // hardware transactional memory
+    if (curStaticInst && curStaticInst->isHtmStart()) {
+        // if this HtmStart is not within a transaction,
+        // then assign it a new htmTransactionUid
+        if (!t_info.inHtmTransactionalState())
+            t_info.newHtmTransactionUid();
+        SimpleThread* thread = t_info.thread;
+        thread->htmTransactionStarts++;
+        DPRINTF(HtmCpu, "htmTransactionStarts++=%u\n",
+            thread->htmTransactionStarts);
+    }
+
     if (curStaticInst && curStaticInst->isMemRef()) {
         // load or store: just send to dcache
         Fault fault = curStaticInst->initiateAcc(&t_info, traceData);
@@ -838,6 +909,15 @@ bool
 TimingSimpleCPU::IcachePort::recvTimingResp(PacketPtr pkt)
 {
     DPRINTF(SimpleCPU, "Received fetch response %#x\n", pkt->getAddr());
+
+    // hardware transactional memory
+    // Currently, there is no support for tracking instruction fetches
+    // in an transaction's read set.
+    if (pkt->htmTransactionFailedInCache()) {
+        panic("HTM transactional support for"
+              " instruction stream not yet supported\n");
+    }
+
     // we should only ever see one response per cycle since we only
     // issue a new request once this response is sunk
     assert(!tickEvent.scheduled());
@@ -864,6 +944,12 @@ TimingSimpleCPU::IcachePort::recvReqRetry()
 void
 TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
 {
+    // hardware transactional memory
+
+    SimpleExecContext *t_info = threadInfo[curThread];
+    const bool is_htm_speculative =
+        t_info->inHtmTransactionalState();
+
     // received a response from the dcache: complete the load or store
     // instruction
     assert(!pkt->isError());
@@ -876,13 +962,35 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
     updateCycleCounters(BaseCPU::CPU_STATE_ON);
 
     if (pkt->senderState) {
+        // hardware transactional memory
+        // There shouldn't be HtmCmds occurring in multipacket requests
+        if (pkt->req->isHTMCmd()) {
+            panic("unexpected HTM case");
+        }
+
         SplitFragmentSenderState * send_state =
             dynamic_cast<SplitFragmentSenderState *>(pkt->senderState);
         assert(send_state);
-        delete pkt;
         PacketPtr big_pkt = send_state->bigPkt;
         delete send_state;
 
+        if (pkt->isHtmTransactional()) {
+            assert(is_htm_speculative);
+
+            big_pkt->setHtmTransactional(
+                pkt->getHtmTransactionUid()
+            );
+        }
+
+        if (pkt->htmTransactionFailedInCache()) {
+            assert(is_htm_speculative);
+            big_pkt->setHtmTransactionFailedInCache(
+                pkt->getHtmTransactionFailedInCacheRC()
+            );
+        }
+
+        delete pkt;
+
         SplitMainSenderState * main_send_state =
             dynamic_cast<SplitMainSenderState *>(big_pkt->senderState);
         assert(main_send_state);
@@ -901,8 +1009,59 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
 
     _status = BaseSimpleCPU::Running;
 
-    Fault fault = curStaticInst->completeAcc(pkt, threadInfo[curThread],
-                                             traceData);
+    Fault fault;
+
+    // hardware transactional memory
+    // sanity checks
+    // ensure htmTransactionUids are equivalent
+    if (pkt->isHtmTransactional())
+        assert (pkt->getHtmTransactionUid() ==
+                t_info->getHtmTransactionUid());
+
+    // can't have a packet that fails a transaction while not in a transaction
+    if (pkt->htmTransactionFailedInCache())
+        assert(is_htm_speculative);
+
+    // shouldn't fail through stores because this would be inconsistent w/ O3
+    // which cannot fault after the store has been sent to memory
+    if (pkt->htmTransactionFailedInCache() && !pkt->isWrite()) {
+        const HtmCacheFailure htm_rc =
+            pkt->getHtmTransactionFailedInCacheRC();
+        DPRINTF(HtmCpu, "HTM abortion in cache (rc=%s) detected htmUid=%u\n",
+            htmFailureToStr(htm_rc), pkt->getHtmTransactionUid());
+
+        // Currently there are only two reasons why a transaction would
+        // fail in the memory subsystem--
+        // (1) A transactional line was evicted from the cache for
+        //     space (or replacement policy) reasons.
+        // (2) Another core/device requested a cache line that is in this
+        //     transaction's read/write set that is incompatible with the
+        //     HTM's semantics, e.g. another core requesting exclusive access
+        //     of a line in this core's read set.
+        if (htm_rc == HtmCacheFailure::FAIL_SELF) {
+            fault = std::make_shared<GenericHtmFailureFault>(
+                t_info->getHtmTransactionUid(),
+                HtmFailureFaultCause::SIZE);
+        } else if (htm_rc == HtmCacheFailure::FAIL_REMOTE) {
+            fault = std::make_shared<GenericHtmFailureFault>(
+                t_info->getHtmTransactionUid(),
+                HtmFailureFaultCause::MEMORY);
+        } else {
+            panic("HTM - unhandled rc %s", htmFailureToStr(htm_rc));
+        }
+    } else {
+        fault = curStaticInst->completeAcc(pkt, t_info,
+                                     traceData);
+    }
+
+    // hardware transactional memory
+    // Track HtmStop instructions,
+    // e.g. instructions which commit a transaction.
+    if (curStaticInst && curStaticInst->isHtmStop()) {
+        t_info->thread->htmTransactionStops++;
+        DPRINTF(HtmCpu, "htmTransactionStops++=%u\n",
+            t_info->thread->htmTransactionStops);
+    }
 
     // keep an instruction count
     if (fault == NoFault)
@@ -1058,14 +1217,82 @@ TimingSimpleCPU::printAddr(Addr a)
 Fault
 TimingSimpleCPU::initiateHtmCmd(Request::Flags flags)
 {
-    panic("not yet supported!");
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
+    const Addr addr = 0x0ul;
+    const Addr pc = thread->instAddr();
+    const int size = 8;
+
+    if (traceData)
+        traceData->setMem(addr, size, flags);
+
+    RequestPtr req = std::make_shared<Request>(
+        addr, size, flags, dataMasterId());
+
+    req->setPC(pc);
+    req->setContext(thread->contextId());
+    req->taskId(taskId());
+    req->setInstCount(t_info.numInst);
+
+    assert(req->isHTMCmd());
+
+    // Use the payload as a sanity check,
+    // the memory subsystem will clear allocated data
+    uint8_t *data = new uint8_t[size];
+    assert(data);
+    uint64_t rc = 0xdeadbeeflu;
+    memcpy (data, &rc, size);
+
+    // debugging output
+    if (req->isHTMStart())
+        DPRINTF(HtmCpu, "HTMstart htmUid=%u\n", t_info.getHtmTransactionUid());
+    else if (req->isHTMCommit())
+        DPRINTF(HtmCpu, "HTMcommit htmUid=%u\n", t_info.getHtmTransactionUid());
+    else if (req->isHTMCancel())
+        DPRINTF(HtmCpu, "HTMcancel htmUid=%u\n", t_info.getHtmTransactionUid());
+    else
+        panic("initiateHtmCmd: unknown CMD");
+
+    sendData(req, data, nullptr, true);
+
     return NoFault;
 }
 
 void
 TimingSimpleCPU::htmSendAbortSignal(HtmFailureFaultCause cause)
 {
-    panic("not yet supported!");
+    SimpleExecContext& t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
+    const Addr addr = 0x0ul;
+    const Addr pc = thread->instAddr();
+    const int size = 8;
+    const Request::Flags flags =
+        Request::PHYSICAL|Request::STRICT_ORDER|Request::HTM_ABORT;
+
+    if (traceData)
+        traceData->setMem(addr, size, flags);
+
+    // notify l1 d-cache (ruby) that core has aborted transaction
+
+    RequestPtr req = std::make_shared<Request>(
+        addr, size, flags, dataMasterId());
+
+    req->setPC(pc);
+    req->setContext(thread->contextId());
+    req->taskId(taskId());
+    req->setInstCount(t_info.numInst);
+    req->setHtmAbortCause(cause);
+
+    assert(req->isHTMAbort());
+
+    uint8_t *data = new uint8_t[size];
+    assert(data);
+    uint64_t rc = 0lu;
+    memcpy (data, &rc, size);
+
+    sendData(req, data, nullptr, true);
 }
 
 
index b0ffc82644053dc698112ea60218c8d72bcc1d4c..28a1c80692ea084efca4ea43b1138b7b74121090 100644 (file)
@@ -72,7 +72,8 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys,
       isa(dynamic_cast<TheISA::ISA *>(_isa)),
       predicate(true), memAccPredicate(true),
       comInstEventQueue("instruction-based event queue"),
-      system(_sys), itb(_itb), dtb(_dtb), decoder(isa)
+      system(_sys), itb(_itb), dtb(_dtb), decoder(isa),
+      htmTransactionStarts(0), htmTransactionStops(0)
 {
     assert(isa);
     clearArchRegs();
@@ -84,7 +85,8 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys,
       isa(dynamic_cast<TheISA::ISA *>(_isa)),
       predicate(true), memAccPredicate(true),
       comInstEventQueue("instruction-based event queue"),
-      system(_sys), itb(_itb), dtb(_dtb), decoder(isa)
+      system(_sys), itb(_itb), dtb(_dtb), decoder(isa),
+      htmTransactionStarts(0), htmTransactionStops(0)
 {
     assert(isa);
 
@@ -175,17 +177,25 @@ SimpleThread::copyArchRegs(ThreadContext *src_tc)
 void
 SimpleThread::htmAbortTransaction(uint64_t htm_uid, HtmFailureFaultCause cause)
 {
-    panic("function not implemented\n");
+    BaseSimpleCPU *baseSimpleCpu = dynamic_cast<BaseSimpleCPU*>(baseCpu);
+    assert(baseSimpleCpu);
+
+    baseSimpleCpu->htmSendAbortSignal(cause);
+
+    // these must be reset after the abort signal has been sent
+    htmTransactionStarts = 0;
+    htmTransactionStops = 0;
 }
 
 BaseHTMCheckpointPtr&
 SimpleThread::getHtmCheckpointPtr()
 {
-    panic("function not implemented\n");
+    return _htmCheckpoint;
 }
 
 void
 SimpleThread::setHtmCheckpointPtr(BaseHTMCheckpointPtr new_cpt)
 {
-    panic("function not implemented\n");
+    assert(!_htmCheckpoint->valid());
+    _htmCheckpoint = std::move(new_cpt);
 }
index eb88104599ec90751389e942b06493f0ab6caeb5..5fe52cbd9f3a719ea475b5a5d265e4828e9a9b91 100644 (file)
@@ -106,6 +106,9 @@ class SimpleThread : public ThreadState, public ThreadContext
 
     TheISA::PCState _pcState;
 
+    // hardware transactional memory
+    std::unique_ptr<BaseHTMCheckpoint> _htmCheckpoint;
+
     /** Did this instruction execute or is it predicated false */
     bool predicate;
 
@@ -132,6 +135,10 @@ class SimpleThread : public ThreadState, public ThreadContext
 
     TheISA::Decoder decoder;
 
+    // hardware transactional memory
+    int64_t htmTransactionStarts;
+    int64_t htmTransactionStops;
+
     // constructor: initialize SimpleThread from given process structure
     // FS
     SimpleThread(BaseCPU *_cpu, int _thread_num, System *_system,