From b904bd5437ead0dfc2c4c0977f3d29d63299c601 Mon Sep 17 00:00:00 2001 From: Andreas Sandberg Date: Fri, 15 Feb 2013 17:40:09 -0500 Subject: [PATCH] sim: Add a system-global option to bypass caches Virtualized CPUs and the fastmem mode of the atomic CPU require direct access to physical memory. We currently require caches to be disabled when using them to prevent chaos. This is not ideal when switching between hardware virutalized CPUs and other CPU models as it would require a configuration change on each switch. This changeset introduces a new version of the atomic memory mode, 'atomic_noncaching', where memory accesses are inserted into the memory system as atomic accesses, but bypass caches. To make memory mode tests cleaner, the following methods are added to the System class: * isAtomicMode() -- True if the memory mode is 'atomic' or 'direct'. * isTimingMode() -- True if the memory mode is 'timing'. * bypassCaches() -- True if caches should be bypassed. The old getMemoryMode() and setMemoryMode() methods should never be used from the C++ world anymore. --- src/arch/arm/table_walker.cc | 2 +- src/arch/x86/interrupts.cc | 2 +- src/arch/x86/pagetable_walker.cc | 2 +- src/cpu/inorder/cpu.cc | 2 +- src/cpu/o3/cpu.cc | 2 +- src/cpu/simple/atomic.cc | 2 +- src/cpu/simple/timing.cc | 2 +- src/cpu/testers/traffic_gen/traffic_gen.cc | 6 +-- src/dev/dma_device.cc | 5 +- src/dev/x86/i82094aa.cc | 3 +- src/mem/Bus.py | 4 ++ src/mem/cache/cache_impl.hh | 29 ++++++++++ src/mem/coherent_bus.cc | 19 +++++-- src/mem/coherent_bus.hh | 6 +++ src/python/m5/simulate.py | 8 +++ src/sim/System.py | 5 +- src/sim/system.cc | 4 +- src/sim/system.hh | 61 ++++++++++++++++++---- 18 files changed, 131 insertions(+), 33 deletions(-) diff --git a/src/arch/arm/table_walker.cc b/src/arch/arm/table_walker.cc index 99a7592c1..44f12833b 100644 --- a/src/arch/arm/table_walker.cc +++ b/src/arch/arm/table_walker.cc @@ -104,7 +104,7 @@ void TableWalker::drainResume() { Drainable::drainResume(); - if ((params()->sys->getMemoryMode() == Enums::timing) && currState) { + if (params()->sys->isTimingMode() && currState) { delete currState; currState = NULL; } diff --git a/src/arch/x86/interrupts.cc b/src/arch/x86/interrupts.cc index b34124ce7..8eae2d390 100644 --- a/src/arch/x86/interrupts.cc +++ b/src/arch/x86/interrupts.cc @@ -510,7 +510,7 @@ X86ISA::Interrupts::setReg(ApicRegIndex reg, uint32_t val) message.destMode = low.destMode; message.level = low.level; message.trigger = low.trigger; - bool timing = sys->getMemoryMode() == Enums::timing; + bool timing(sys->isTimingMode()); // Be careful no updates of the delivery status bit get lost. regs[APIC_INTERRUPT_COMMAND_LOW] = low; ApicList apics; diff --git a/src/arch/x86/pagetable_walker.cc b/src/arch/x86/pagetable_walker.cc index 1e42e5593..b096fbfe8 100644 --- a/src/arch/x86/pagetable_walker.cc +++ b/src/arch/x86/pagetable_walker.cc @@ -88,7 +88,7 @@ Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation, // outstanding requests, see if this request can be coalesced with // another one (i.e. either coalesce or start walk) WalkerState * newState = new WalkerState(this, _translation, _req); - newState->initState(_tc, _mode, sys->getMemoryMode() == Enums::timing); + newState->initState(_tc, _mode, sys->isTimingMode()); if (currStates.size()) { assert(newState->isTiming()); DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size()); diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 5490cb3f2..1ba8e55b6 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -812,7 +812,7 @@ InOrderCPU::init() void InOrderCPU::verifyMemoryMode() const { - if (system->getMemoryMode() != Enums::timing) { + if (!system->isTimingMode()) { fatal("The in-order CPU requires the memory system to be in " "'timing' mode.\n"); } diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 53250d495..9caa49ad6 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -1316,7 +1316,7 @@ template void FullO3CPU::verifyMemoryMode() const { - if (system->getMemoryMode() != Enums::timing) { + if (!system->isTimingMode()) { fatal("The O3 CPU requires the memory system to be in " "'timing' mode.\n"); } diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 7a0778961..d7c4190ee 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -212,7 +212,7 @@ AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) void AtomicSimpleCPU::verifyMemoryMode() const { - if (system->getMemoryMode() != Enums::atomic) { + if (!system->isAtomicMode()) { fatal("The atomic CPU requires the memory system to be in " "'atomic' mode.\n"); } diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 7423d082c..ab4ea9256 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -191,7 +191,7 @@ TimingSimpleCPU::takeOverFrom(BaseCPU *oldCPU) void TimingSimpleCPU::verifyMemoryMode() const { - if (system->getMemoryMode() != Enums::timing) { + if (!system->isTimingMode()) { fatal("The timing CPU requires the memory system to be in " "'timing' mode.\n"); } diff --git a/src/cpu/testers/traffic_gen/traffic_gen.cc b/src/cpu/testers/traffic_gen/traffic_gen.cc index 34e3b2c1e..d9d040858 100644 --- a/src/cpu/testers/traffic_gen/traffic_gen.cc +++ b/src/cpu/testers/traffic_gen/traffic_gen.cc @@ -83,10 +83,8 @@ TrafficGen::init() if (!port.isConnected()) fatal("The port of %s is not connected!\n", name()); - Enums::MemoryMode mode = system->getMemoryMode(); - // if the system is in timing mode active the request generator - if (mode == Enums::timing) { + if (system->isTimingMode()) { DPRINTF(TrafficGen, "Timing mode, activating request generator\n"); // enter initial state @@ -101,7 +99,7 @@ void TrafficGen::initState() { // when not restoring from a checkpoint, make sure we kick things off - if (system->getMemoryMode() == Enums::timing) { + if (system->isTimingMode()) { Tick nextStateGraphEvent = stateGraph.nextEventTick(); schedule(updateStateGraphEvent, nextStateGraphEvent); } else { diff --git a/src/dev/dma_device.cc b/src/dev/dma_device.cc index 770370320..f25f52334 100644 --- a/src/dev/dma_device.cc +++ b/src/dev/dma_device.cc @@ -232,8 +232,7 @@ DmaPort::sendDma() // switching actually work assert(transmitList.size()); - Enums::MemoryMode state = sys->getMemoryMode(); - if (state == Enums::timing) { + if (sys->isTimingMode()) { // if we are either waiting for a retry or are still waiting // after sending the last packet, then do not proceed if (inRetry || sendEvent.scheduled()) { @@ -242,7 +241,7 @@ DmaPort::sendDma() } trySendTimingReq(); - } else if (state == Enums::atomic) { + } else if (sys->isAtomicMode()) { // send everything there is to send in zero time while (!transmitList.empty()) { PacketPtr pkt = transmitList.front(); diff --git a/src/dev/x86/i82094aa.cc b/src/dev/x86/i82094aa.cc index 54824c778..0692718bf 100644 --- a/src/dev/x86/i82094aa.cc +++ b/src/dev/x86/i82094aa.cc @@ -222,8 +222,7 @@ X86ISA::I82094AA::signalInterrupt(int line) apics.push_back(selected); } } - intMasterPort.sendMessage(apics, message, - sys->getMemoryMode() == Enums::timing); + intMasterPort.sendMessage(apics, message, sys->isTimingMode()); } } diff --git a/src/mem/Bus.py b/src/mem/Bus.py index 4637b0ebc..ca0f40e1e 100644 --- a/src/mem/Bus.py +++ b/src/mem/Bus.py @@ -40,7 +40,9 @@ # Andreas Hansson from MemObject import MemObject +from System import System from m5.params import * +from m5.proxy import * class BaseBus(MemObject): type = 'BaseBus' @@ -72,3 +74,5 @@ class NoncoherentBus(BaseBus): class CoherentBus(BaseBus): type = 'CoherentBus' cxx_header = "mem/coherent_bus.hh" + + system = Param.System(Parent.any, "System that the bus belongs to.") diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index a1d945103..21c8e16d6 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -390,6 +390,7 @@ Cache::timingAccess(PacketPtr pkt) // must be cache-to-cache response from upper to lower level ForwardResponseRecord *rec = dynamic_cast(pkt->senderState); + assert(!system->bypassCaches()); if (rec == NULL) { assert(pkt->cmd == MemCmd::HardPFResp); @@ -409,6 +410,12 @@ Cache::timingAccess(PacketPtr pkt) assert(pkt->isRequest()); + // Just forward the packet if caches are disabled. + if (system->bypassCaches()) { + memSidePort->sendTimingReq(pkt); + return true; + } + if (pkt->memInhibitAsserted()) { DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n", pkt->getAddr()); @@ -629,6 +636,10 @@ Cache::atomicAccess(PacketPtr pkt) // @TODO: make this a parameter bool last_level_cache = false; + // Forward the request if the system is in cache bypass mode. + if (system->bypassCaches()) + return memSidePort->sendAtomic(pkt); + if (pkt->memInhibitAsserted()) { assert(!pkt->req->isUncacheable()); // have to invalidate ourselves and any lower caches even if @@ -744,6 +755,17 @@ template void Cache::functionalAccess(PacketPtr pkt, bool fromCpuSide) { + if (system->bypassCaches()) { + // Packets from the memory side are snoop request and + // shouldn't happen in bypass mode. + assert(fromCpuSide); + + // The cache should be flushed if we are in cache bypass mode, + // so we don't need to check if we need to update anything. + memSidePort->sendFunctional(pkt); + return; + } + Addr blk_addr = blockAlign(pkt->getAddr()); BlkType *blk = tags->findBlock(pkt->getAddr()); MSHR *mshr = mshrQueue.findMatch(blk_addr); @@ -1354,6 +1376,9 @@ template void Cache::snoopTiming(PacketPtr pkt) { + // Snoops shouldn't happen when bypassing caches + assert(!system->bypassCaches()); + // Note that some deferred snoops don't have requests, since the // original access may have already completed if ((pkt->req && pkt->req->isUncacheable()) || @@ -1438,6 +1463,9 @@ template Cycles Cache::snoopAtomic(PacketPtr pkt) { + // Snoops shouldn't happen when bypassing caches + assert(!system->bypassCaches()); + if (pkt->req->isUncacheable() || pkt->cmd == MemCmd::Writeback) { // Can't get a hit on an uncacheable address // Revisit this for multi level coherence @@ -1683,6 +1711,7 @@ Cache::CpuSidePort::recvTimingReq(PacketPtr pkt) { // always let inhibited requests through even if blocked if (!pkt->memInhibitAsserted() && blocked) { + assert(!cache->system->bypassCaches()); DPRINTF(Cache,"Scheduling a retry while blocked\n"); mustSendRetry = true; return false; diff --git a/src/mem/coherent_bus.cc b/src/mem/coherent_bus.cc index b1ac6dbcf..f74ca48e9 100644 --- a/src/mem/coherent_bus.cc +++ b/src/mem/coherent_bus.cc @@ -52,11 +52,13 @@ #include "debug/BusAddrRanges.hh" #include "debug/CoherentBus.hh" #include "mem/coherent_bus.hh" +#include "sim/system.hh" CoherentBus::CoherentBus(const CoherentBusParams *p) : BaseBus(p), reqLayer(*this, ".reqLayer", p->clock), respLayer(*this, ".respLayer", p->clock), - snoopRespLayer(*this, ".snoopRespLayer", p->clock) + snoopRespLayer(*this, ".snoopRespLayer", p->clock), + system(p->system) { // create the ports based on the size of the master and slave // vector ports, and the presence of the default port, the ports @@ -137,7 +139,7 @@ CoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id) Tick packetFinishTime = is_express_snoop ? 0 : pkt->finishTime; // uncacheable requests need never be snooped - if (!pkt->req->isUncacheable()) { + if (!pkt->req->isUncacheable() && !system->bypassCaches()) { // the packet is a memory-mapped request and should be // broadcasted to our snoopers but the source forwardTiming(pkt, slave_port_id); @@ -323,6 +325,9 @@ CoherentBus::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id) void CoherentBus::forwardTiming(PacketPtr pkt, PortID exclude_slave_port_id) { + // snoops should only happen if the system isn't bypassing caches + assert(!system->bypassCaches()); + for (SlavePortIter s = snoopPorts.begin(); s != snoopPorts.end(); ++s) { SlavePort *p = *s; // we could have gotten this request from a snooping master @@ -357,7 +362,7 @@ CoherentBus::recvAtomic(PacketPtr pkt, PortID slave_port_id) Tick snoop_response_latency = 0; // uncacheable requests need never be snooped - if (!pkt->req->isUncacheable()) { + if (!pkt->req->isUncacheable() && !system->bypassCaches()) { // forward to all snoopers but the source std::pair snoop_result = forwardAtomic(pkt, slave_port_id); @@ -414,6 +419,9 @@ CoherentBus::forwardAtomic(PacketPtr pkt, PortID exclude_slave_port_id) MemCmd snoop_response_cmd = MemCmd::InvalidCmd; Tick snoop_response_latency = 0; + // snoops should only happen if the system isn't bypassing caches + assert(!system->bypassCaches()); + for (SlavePortIter s = snoopPorts.begin(); s != snoopPorts.end(); ++s) { SlavePort *p = *s; // we could have gotten this request from a snooping master @@ -458,7 +466,7 @@ CoherentBus::recvFunctional(PacketPtr pkt, PortID slave_port_id) } // uncacheable requests need never be snooped - if (!pkt->req->isUncacheable()) { + if (!pkt->req->isUncacheable() && !system->bypassCaches()) { // forward to all snoopers but the source forwardFunctional(pkt, slave_port_id); } @@ -490,6 +498,9 @@ CoherentBus::recvFunctionalSnoop(PacketPtr pkt, PortID master_port_id) void CoherentBus::forwardFunctional(PacketPtr pkt, PortID exclude_slave_port_id) { + // snoops should only happen if the system isn't bypassing caches + assert(!system->bypassCaches()); + for (SlavePortIter s = snoopPorts.begin(); s != snoopPorts.end(); ++s) { SlavePort *p = *s; // we could have gotten this request from a snooping master diff --git a/src/mem/coherent_bus.hh b/src/mem/coherent_bus.hh index 61406608b..05c45f69a 100644 --- a/src/mem/coherent_bus.hh +++ b/src/mem/coherent_bus.hh @@ -224,6 +224,12 @@ class CoherentBus : public BaseBus */ std::set outstandingReq; + /** + * Keep a pointer to the system to be allow to querying memory system + * properties. + */ + System *system; + /** Function called by the port when the bus is recieving a Timing request packet.*/ virtual bool recvTimingReq(PacketPtr pkt, PortID slave_port_id); diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py index 3583e8264..682104c26 100644 --- a/src/python/m5/simulate.py +++ b/src/python/m5/simulate.py @@ -63,6 +63,7 @@ MaxTick = 2**63 - 1 _memory_modes = { "atomic" : objects.params.atomic, "timing" : objects.params.timing, + "atomic_noncaching" : objects.params.atomic_noncaching, } # The final hook to generate .ini files. Called from the user script @@ -288,6 +289,13 @@ def switchCpus(system, cpuList, do_drain=True): # Change the memory mode if required. We check if this is needed # to avoid printing a warning if no switch was performed. if system.getMemoryMode() != memory_mode: + # Flush the memory system if we are switching to a memory mode + # that disables caches. This typically happens when switching to a + # hardware virtualized CPU. + if memory_mode == objects.params.atomic_noncaching: + memWriteback(system) + memInvalidate(system) + _changeMemoryMode(system, memory_mode) for old_cpu, new_cpu in cpuList: diff --git a/src/sim/System.py b/src/sim/System.py index 69ae61e8f..031331375 100644 --- a/src/sim/System.py +++ b/src/sim/System.py @@ -35,7 +35,8 @@ from m5.proxy import * from SimpleMemory import * -class MemoryMode(Enum): vals = ['invalid', 'atomic', 'timing'] +class MemoryMode(Enum): vals = ['invalid', 'atomic', 'timing', + 'atomic_noncaching'] class System(MemObject): type = 'System' @@ -55,7 +56,7 @@ class System(MemObject): @classmethod def export_methods(cls, code): code(''' - Enums::MemoryMode getMemoryMode(); + Enums::MemoryMode getMemoryMode() const; void setMemoryMode(Enums::MemoryMode mode); ''') diff --git a/src/sim/system.cc b/src/sim/system.cc index 259ed3e88..03f8f8180 100644 --- a/src/sim/system.cc +++ b/src/sim/system.cc @@ -454,8 +454,8 @@ System::getMasterName(MasterID master_id) return masterIds[master_id]; } -const char *System::MemoryModeStrings[3] = {"invalid", "atomic", - "timing"}; +const char *System::MemoryModeStrings[4] = {"invalid", "atomic", "timing", + "atomic_noncaching"}; System * SystemParams::create() diff --git a/src/sim/system.hh b/src/sim/system.hh index d1b79bbf4..05b1f2077 100644 --- a/src/sim/system.hh +++ b/src/sim/system.hh @@ -120,20 +120,63 @@ class System : public MemObject BaseMasterPort& getMasterPort(const std::string &if_name, PortID idx = InvalidPortID); - static const char *MemoryModeStrings[3]; + static const char *MemoryModeStrings[4]; - Enums::MemoryMode - getMemoryMode() - { - assert(memoryMode); - return memoryMode; + /** @{ */ + /** + * Is the system in atomic mode? + * + * There are currently two different atomic memory modes: + * 'atomic', which supports caches; and 'atomic_noncaching', which + * bypasses caches. The latter is used by hardware virtualized + * CPUs. SimObjects are expected to use Port::sendAtomic() and + * Port::recvAtomic() when accessing memory in this mode. + */ + bool isAtomicMode() const { + return memoryMode == Enums::atomic || + memoryMode == Enums::atomic_noncaching; } - /** Change the memory mode of the system. This should only be called by the - * python!! - * @param mode Mode to change to (atomic/timing) + /** + * Is the system in timing mode? + * + * SimObjects are expected to use Port::sendTiming() and + * Port::recvTiming() when accessing memory in this mode. + */ + bool isTimingMode() const { + return memoryMode == Enums::timing; + } + + /** + * Should caches be bypassed? + * + * Some CPUs need to bypass caches to allow direct memory + * accesses, which is required for hardware virtualization. + */ + bool bypassCaches() const { + return memoryMode == Enums::atomic_noncaching; + } + /** @} */ + + /** @{ */ + /** + * Get the memory mode of the system. + * + * \warn This should only be used by the Python world. The C++ + * world should use one of the query functions above + * (isAtomicMode(), isTimingMode(), bypassCaches()). + */ + Enums::MemoryMode getMemoryMode() const { return memoryMode; } + + /** + * Change the memory mode of the system. + * + * \warn This should only be called by the Python! + * + * @param mode Mode to change to (atomic/timing/...) */ void setMemoryMode(Enums::MemoryMode mode); + /** @} */ PCEventQueue pcEventQueue; -- 2.30.2