From b12422c79b1b5d30afb89d006cf777424729652d Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 4 Nov 2020 01:03:25 -0800 Subject: [PATCH] cpu: Make the NonCachingSimpleCPU use a back door for fetch. If the memory system can provide a back door to memory, store that, and use it for subsequent accesses to the range it covers. For now, this covers only fetch. That's because fetch will generally happen more than loads and stores, and because it's relatively simple to implement since we can ignore atomic operations, etc. Some limitted benchmarking suggests that this speeds up x86 linux boot by about 20%, although my modifications to the config to remove caching (which blocks the back door mechanism) also made gem5 crash, so it's hard to say for sure if that's a valid result. The crash happened in the same way before and after, so it's probably at least relatively representative. While this gives a pretty substantial performance boost, it will prevent statistics from being collected at the memory, or on intermediate objects in the interconnect like the bus. That is to be expected with this memory mode, however. Change-Id: I73f73017e454300fd4d61f58462eb4ec719b8d85 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36979 Reviewed-by: Jason Lowe-Power Reviewed-by: Andreas Sandberg Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/cpu/simple/atomic.cc | 25 +++++++++++++++--------- src/cpu/simple/atomic.hh | 2 +- src/cpu/simple/noncaching.cc | 37 +++++++++++++++++++++++++++++++----- src/cpu/simple/noncaching.hh | 5 +++++ 4 files changed, 54 insertions(+), 15 deletions(-) diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index bceed391f..e20b33b01 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -674,15 +674,7 @@ AtomicSimpleCPU::tick() //if (decoder.needMoreBytes()) //{ icache_access = true; - Packet ifetch_pkt = Packet(ifetch_req, MemCmd::ReadReq); - ifetch_pkt.dataStatic(&inst); - - icache_latency = sendPacket(icachePort, &ifetch_pkt); - - assert(!ifetch_pkt.isError()); - - // ifetch_req is initialized to read the instruction - // directly into the CPU object's inst field. + icache_latency = fetchInstMem(); //} } @@ -747,6 +739,21 @@ AtomicSimpleCPU::tick() reschedule(tickEvent, curTick() + latency, true); } +Tick +AtomicSimpleCPU::fetchInstMem() +{ + Packet pkt = Packet(ifetch_req, MemCmd::ReadReq); + + // ifetch_req is initialized to read the instruction + // directly into the CPU object's inst field. + pkt.dataStatic(&inst); + + Tick latency = sendPacket(icachePort, &pkt); + assert(!pkt.isError()); + + return latency; +} + void AtomicSimpleCPU::regProbePoints() { diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 3ff25800a..febba9eb5 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -57,7 +57,6 @@ class AtomicSimpleCPU : public BaseSimpleCPU void init() override; protected: - EventFunctionWrapper tickEvent; const int width; @@ -102,6 +101,7 @@ class AtomicSimpleCPU : public BaseSimpleCPU bool tryCompleteDrain(); virtual Tick sendPacket(RequestPort &port, const PacketPtr &pkt); + virtual Tick fetchInstMem(); /** * An AtomicCPUPort overrides the default behaviour of the diff --git a/src/cpu/simple/noncaching.cc b/src/cpu/simple/noncaching.cc index 44d57fb94..28878e2de 100644 --- a/src/cpu/simple/noncaching.cc +++ b/src/cpu/simple/noncaching.cc @@ -59,10 +59,37 @@ NonCachingSimpleCPU::verifyMemoryMode() const Tick NonCachingSimpleCPU::sendPacket(RequestPort &port, const PacketPtr &pkt) { - if (system->isMemAddr(pkt->getAddr())) { - system->getPhysMem().access(pkt); - return 0; - } else { - return port.sendAtomic(pkt); + MemBackdoorPtr bd = nullptr; + Tick latency = port.sendAtomicBackdoor(pkt, bd); + + // If the target gave us a backdoor for next time and we didn't + // already have it, record it. + if (bd && memBackdoors.insert(bd->range(), bd) != memBackdoors.end()) { + // Install a callback to erase this backdoor if it goes away. + auto callback = [this](const MemBackdoor &backdoor) { + for (auto it = memBackdoors.begin(); + it != memBackdoors.end(); it++) { + if (it->second == &backdoor) { + memBackdoors.erase(it); + return; + } + } + panic("Got invalidation for unknown memory backdoor."); + }; + bd->addInvalidationCallback(callback); } + return latency; +} + +Tick +NonCachingSimpleCPU::fetchInstMem() +{ + auto bd_it = memBackdoors.contains(ifetch_req->getPaddr()); + if (bd_it == memBackdoors.end()) + return AtomicSimpleCPU::fetchInstMem(); + + auto *bd = bd_it->second; + Addr offset = ifetch_req->getPaddr() - bd->range().start(); + memcpy(&inst, bd->ptr() + offset, ifetch_req->getSize()); + return 0; } diff --git a/src/cpu/simple/noncaching.hh b/src/cpu/simple/noncaching.hh index 1bc87184b..4cb9638be 100644 --- a/src/cpu/simple/noncaching.hh +++ b/src/cpu/simple/noncaching.hh @@ -38,7 +38,9 @@ #ifndef __CPU_SIMPLE_NONCACHING_HH__ #define __CPU_SIMPLE_NONCACHING_HH__ +#include "base/addr_range_map.hh" #include "cpu/simple/atomic.hh" +#include "mem/backdoor.hh" #include "params/NonCachingSimpleCPU.hh" /** @@ -53,7 +55,10 @@ class NonCachingSimpleCPU : public AtomicSimpleCPU void verifyMemoryMode() const override; protected: + AddrRangeMap memBackdoors; + Tick sendPacket(RequestPort &port, const PacketPtr &pkt) override; + Tick fetchInstMem() override; }; #endif // __CPU_SIMPLE_NONCACHING_HH__ -- 2.30.2