From 45f881a4ced25105267799432c0f526400f0ba9e Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Thu, 5 Oct 2006 21:10:03 -0400 Subject: [PATCH] First pass at snooping stuff that compiles and doesn't break. Still need: -Handle NACK's on the recieve side -Distinguish top level caches -Handle repsonses from caches failing the fast path -Handle BusError and propogate it -Fix the invalidate packet associated with snooping in the cache src/mem/bus.cc: Make sure to snoop on functional accesses src/mem/cache/base_cache.cc: Wait to make a request into a response until it is ready to be issued src/mem/cache/base_cache.hh: Support range changes for snoops Set up snoop responses for cache->cache transfers src/mem/cache/cache_impl.hh: Only access the cache if it wasn't satisfied by cache->cache transfer Handle snoop phases (detect block, then snoop) Fix functional access to work properly (still need to fix snoop path for functional accesses) --HG-- extra : convert_revision : 4c25f11d7a996c1f56f4f7b55dde87a344e5fdf8 --- src/mem/bus.cc | 1 + src/mem/cache/base_cache.cc | 4 +- src/mem/cache/base_cache.hh | 42 +++++++++++---- src/mem/cache/cache_impl.hh | 34 ++++++++---- tests/configs/o3-timing-mp.py | 90 +++++++++++++++++++++++++++++++ tests/configs/simple-atomic-mp.py | 86 +++++++++++++++++++++++++++++ 6 files changed, 237 insertions(+), 20 deletions(-) create mode 100644 tests/configs/o3-timing-mp.py create mode 100644 tests/configs/simple-atomic-mp.py diff --git a/src/mem/bus.cc b/src/mem/bus.cc index cf9e54e62..3c5283a77 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -252,6 +252,7 @@ Bus::recvFunctional(Packet *pkt) DPRINTF(Bus, "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); + atomicSnoop(pkt); findPort(pkt->getAddr(), pkt->getSrc())->sendFunctional(pkt); } diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index a172847df..e6138e320 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -199,7 +199,9 @@ BaseCache::CacheEvent::process() return; } //Response - //Know the packet to send, no need to mark in service (must succed) + //Know the packet to send + pkt->result = Packet::Success; + pkt->makeTimingResponse(); assert(cachePort->sendTiming(pkt)); } diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 069dbab58..49999dcb4 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -127,6 +127,8 @@ class BaseCache : public MemObject CachePort *cpuSidePort; CachePort *memSidePort; + bool snoopRangesSent; + public: virtual Port *getPort(const std::string &if_name, int idx = -1); @@ -149,17 +151,22 @@ class BaseCache : public MemObject void recvStatusChange(Port::Status status, bool isCpuSide) { - if (status == Port::RangeChange) - { - if (!isCpuSide) - { + if (status == Port::RangeChange){ + if (!isCpuSide) { cpuSidePort->sendStatusChange(Port::RangeChange); + if (topLevelCache && !snoopRangesSent) { + snoopRangesSent = true; + memSidePort->sendStatusChange(Port::RangeChange); + } } - else - { + else { memSidePort->sendStatusChange(Port::RangeChange); } } + else if (status == Port::SnoopSquash) { + assert(snoopPhase2); + snoopPhase2 = false; + } } virtual Packet *getPacket() @@ -205,6 +212,10 @@ class BaseCache : public MemObject /** True if this cache is connected to the CPU. */ bool topLevelCache; + + /** True if we are now in phase 2 of the snoop process. */ + bool snoopPhase2; + /** Stores time the cache blocked for statistics. */ Tick blockedCycle; @@ -332,6 +343,7 @@ class BaseCache : public MemObject //Start ports at null if more than one is created we should panic cpuSidePort = NULL; memSidePort = NULL; + snoopRangesSent = false; } virtual void init(); @@ -519,8 +531,6 @@ class BaseCache : public MemObject if (!pkt->req->isUncacheable()) { missLatency[pkt->cmdToIndex()][pkt->req->getThreadNum()] += time - pkt->time; } - pkt->makeTimingResponse(); - pkt->result = Packet::Success; CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); reqCpu->schedule(time); } @@ -529,10 +539,12 @@ class BaseCache : public MemObject * Suppliess the data if cache to cache transfers are enabled. * @param pkt The bus transaction to fulfill. */ - void respondToSnoop(Packet *pkt) + void respondToSnoop(Packet *pkt, Tick time) { - assert("Implement\n" && 0); +// assert("Implement\n" && 0); // mi->respond(pkt,curTick + hitLatency); + CacheEvent *reqMem = new CacheEvent(memSidePort, pkt); + reqMem->schedule(time); } /** @@ -551,6 +563,16 @@ class BaseCache : public MemObject else { //This is where snoops get updated + AddrRangeList dummy; + if (!topLevelCache) + { + cpuSidePort->getPeerAddressRanges(dummy, snoop); + } + else + { + snoop.push_back(RangeSize(0,-1)); + } + return; } } diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 11cd84e88..bea495f9f 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -63,14 +63,26 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) { pkt->req->setScResult(1); } - access(pkt); + if (!(pkt->flags & SATISFIED)) { + access(pkt); + } } else { if (pkt->isResponse()) handleResponse(pkt); - else - snoop(pkt); + else { + //Check if we are in phase1 + if (!snoopPhase2) { + snoopPhase2 = true; + } + else { + //Check if we should do the snoop + if (pkt->flags && SNOOP_COMMIT) + snoop(pkt); + snoopPhase2 = false; + } + } } return true; } @@ -117,7 +129,7 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide) assert("Can't handle LL/SC on functional path\n"); } - probe(pkt, true); + probe(pkt, false); //TEMP ALWAYS SUCCESFUL FOR NOW pkt->result = Packet::Success; } @@ -126,7 +138,7 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide) if (pkt->isResponse()) handleResponse(pkt); else - snoopProbe(pkt, true); + snoopProbe(pkt, false); } } @@ -372,7 +384,7 @@ template void Cache::snoop(Packet * &pkt) { - + DPRINTF(Cache, "SNOOPING"); Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); BlkType *blk = tags->findBlock(pkt); MSHR *mshr = missQueue->findMSHR(blk_addr); @@ -385,7 +397,10 @@ Cache::snoop(Packet * &pkt) //If the outstanding request was an invalidate (upgrade,readex,..) //Then we need to ACK the request until we get the data //Also NACK if the outstanding request is not a cachefill (writeback) + pkt->flags |= SATISFIED; pkt->flags |= NACKED_LINE; + assert("Don't detect these on the other side yet\n"); + respondToSnoop(pkt, curTick + hitLatency); return; } else { @@ -398,6 +413,7 @@ Cache::snoop(Packet * &pkt) //@todo Make it so that a read to a pending read can't be exclusive now. //Set the address so find match works + assert("Don't have invalidates yet\n"); invalidatePkt->addrOverride(pkt->getAddr()); //Append the invalidate on @@ -433,7 +449,7 @@ Cache::snoop(Packet * &pkt) assert(offset + pkt->getSize() <=blkSize); memcpy(pkt->getPtr(), mshr->pkt->getPtr() + offset, pkt->getSize()); - respondToSnoop(pkt); + respondToSnoop(pkt, curTick + hitLatency); } if (pkt->isInvalidate()) { @@ -449,7 +465,7 @@ Cache::snoop(Packet * &pkt) bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); if (satisfy) { tags->handleSnoop(blk, new_state, pkt); - respondToSnoop(pkt); + respondToSnoop(pkt, curTick + hitLatency); return; } tags->handleSnoop(blk, new_state); @@ -517,7 +533,7 @@ Cache::probe(Packet * &pkt, bool update) missQueue->findWrites(blk_addr, writes); if (!update) { - memSidePort->sendFunctional(pkt); + memSidePort->sendFunctional(pkt); // Check for data in MSHR and writebuffer. if (mshr) { warn("Found outstanding miss on an non-update probe"); diff --git a/tests/configs/o3-timing-mp.py b/tests/configs/o3-timing-mp.py new file mode 100644 index 000000000..881c23156 --- /dev/null +++ b/tests/configs/o3-timing-mp.py @@ -0,0 +1,90 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ron Dreslinski + +import m5 +from m5.objects import * +m5.AddToPath('../configs/common') +from FullO3Config import * + +# -------------------- +# Base L1 Cache +# ==================== + +class L1(BaseCache): + latency = 1 + block_size = 64 + mshrs = 4 + tgts_per_mshr = 8 + protocol = CoherenceProtocol(protocol='moesi') + +# ---------------------- +# Base L2 Cache +# ---------------------- + +class L2(BaseCache): + block_size = 64 + latency = 100 + mshrs = 92 + tgts_per_mshr = 16 + write_buffers = 8 + +nb_cores = 4 +cpus = [ DetailedO3CPU() for i in xrange(nb_cores) ] + +# system simulated +system = System(cpu = cpus, physmem = PhysicalMemory(), membus = +Bus()) + +# l2cache & bus +system.toL2Bus = Bus() +system.l2c = L2(size='4MB', assoc=8) +system.l2c.cpu_side = system.toL2Bus.port + +# connect l2c to membus +system.l2c.mem_side = system.membus.port + +# add L1 caches +for cpu in cpus: + cpu.addPrivateSplitL1Caches(L1(size = '32kB', assoc = 1), + L1(size = '32kB', assoc = 4)) + cpu.mem = cpu.dcache + # connect cpu level-1 caches to shared level-2 cache + cpu.connectMemPorts(system.toL2Bus) + +# connect memory to membus +system.physmem.port = system.membus.port + + +# ----------------------- +# run simulation +# ----------------------- + +root = Root( system = system ) +root.system.mem_mode = 'timing' +root.trace.flags="Bus Cache" +#root.trace.flags = "BusAddrRanges" diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py new file mode 100644 index 000000000..cc1a36dda --- /dev/null +++ b/tests/configs/simple-atomic-mp.py @@ -0,0 +1,86 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ron Dreslinski + +import m5 +from m5.objects import * + +# -------------------- +# Base L1 Cache +# ==================== + +class L1(BaseCache): + latency = 1 + block_size = 64 + mshrs = 4 + tgts_per_mshr = 8 + protocol = CoherenceProtocol(protocol='moesi') + +# ---------------------- +# Base L2 Cache +# ---------------------- + +class L2(BaseCache): + block_size = 64 + latency = 100 + mshrs = 92 + tgts_per_mshr = 16 + write_buffers = 8 + +nb_cores = 4 +cpus = [ AtomicSimpleCPU() for i in xrange(nb_cores) ] + +# system simulated +system = System(cpu = cpus, physmem = PhysicalMemory(), membus = +Bus()) + +# l2cache & bus +system.toL2Bus = Bus() +system.l2c = L2(size='4MB', assoc=8) +system.l2c.cpu_side = system.toL2Bus.port + +# connect l2c to membus +system.l2c.mem_side = system.membus.port + +# add L1 caches +for cpu in cpus: + cpu.addPrivateSplitL1Caches(L1(size = '32kB', assoc = 1), + L1(size = '32kB', assoc = 4)) + cpu.mem = cpu.dcache + # connect cpu level-1 caches to shared level-2 cache + cpu.connectMemPorts(system.toL2Bus) + +# connect memory to membus +system.physmem.port = system.membus.port + + +# ----------------------- +# run simulation +# ----------------------- + +root = Root( system = system ) +root.system.mem_mode = 'atomic' -- 2.30.2