Fixes for functional accesses to use the snoop path.
authorRon Dreslinski <rdreslin@umich.edu>
Fri, 6 Oct 2006 03:28:03 +0000 (23:28 -0400)
committerRon Dreslinski <rdreslin@umich.edu>
Fri, 6 Oct 2006 03:28:03 +0000 (23:28 -0400)
And small other tweaks to snooping coherence.

src/mem/cache/base_cache.hh:
    Make timing response at the time of send.
src/mem/cache/cache.hh:
src/mem/cache/cache_impl.hh:
    Update probe interface to be bi-directional for functional accesses
src/mem/packet.hh:
    Add the function to create an atomic response to a given request

--HG--
extra : convert_revision : 04075a117cf30a7df16e6d3ce485543cc77d4ca6

src/mem/cache/base_cache.hh
src/mem/cache/cache.hh
src/mem/cache/cache_impl.hh
src/mem/packet.hh
tests/configs/simple-timing-mp.py [new file with mode: 0644]

index 49999dcb46dd237e3468040f2ce1ad2e6e72abfe..19cfe1335d9f13887d906349f2860e350a3f7946 100644 (file)
@@ -515,8 +515,6 @@ class BaseCache : public MemObject
      */
     void respond(Packet *pkt, Tick time)
     {
-        pkt->makeTimingResponse();
-        pkt->result = Packet::Success;
         CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
         reqCpu->schedule(time);
     }
index 989b8743e6a1594431826ce2f5f9dde9a7859319..4b8870c959b2a6ca028148d91ba28a2b87a6554b 100644 (file)
@@ -251,7 +251,7 @@ class Cache : public BaseCache
      * request.
      * @return The estimated completion time.
      */
-    Tick probe(Packet * &pkt, bool update);
+    Tick probe(Packet * &pkt, bool update, CachePort * otherSidePort);
 
     /**
      * Snoop for the provided request in the cache and return the estimated
@@ -262,7 +262,7 @@ class Cache : public BaseCache
      * request.
      * @return The estimated completion time.
      */
-    Tick snoopProbe(Packet * &pkt, bool update);
+    Tick snoopProbe(Packet * &pkt);
 };
 
 #endif // __CACHE_HH__
index bea495f9fbffef02b0751db556d0723668d8b6bc..228cd2d7360afa1019ed1710ac2b4b96e369426b 100644 (file)
@@ -99,7 +99,7 @@ doAtomicAccess(Packet *pkt, bool isCpuSide)
             pkt->req->setScResult(1);
         }
 
-        probe(pkt, true);
+        probe(pkt, true, NULL);
         //TEMP ALWAYS SUCCES FOR NOW
         pkt->result = Packet::Success;
     }
@@ -108,7 +108,7 @@ doAtomicAccess(Packet *pkt, bool isCpuSide)
         if (pkt->isResponse())
             handleResponse(pkt);
         else
-            snoopProbe(pkt, true);
+            snoopProbe(pkt);
     }
     //Fix this timing info
     return hitLatency;
@@ -129,16 +129,13 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide)
             assert("Can't handle LL/SC on functional path\n");
         }
 
-        probe(pkt, false);
+        probe(pkt, false, memSidePort);
         //TEMP ALWAYS SUCCESFUL FOR NOW
         pkt->result = Packet::Success;
     }
     else
     {
-        if (pkt->isResponse())
-            handleResponse(pkt);
-        else
-            snoopProbe(pkt, false);
+            probe(pkt, false, cpuSidePort);
     }
 }
 
@@ -251,7 +248,7 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
             pkt->getAddr() & ~((Addr)blkSize - 1), pkt->req->getPC());
     if (blk) {
         // Hit
-        hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+        hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
         // clear dirty bit if write through
         if (pkt->needsResponse())
             respond(pkt, curTick+lat);
@@ -261,7 +258,7 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
 
     // Miss
     if (!pkt->req->isUncacheable()) {
-        misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+        misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
         /** @todo Move miss count code into BaseCache */
         if (missCount) {
             --missCount;
@@ -282,7 +279,7 @@ Cache<TagStore,Buffering,Coherence>::getPacket()
     Packet * pkt = missQueue->getPacket();
     if (pkt) {
         if (!pkt->req->isUncacheable()) {
-            if (pkt->cmd == Packet::HardPFReq) misses[Packet::HardPFReq][pkt->req->getThreadNum()]++;
+            if (pkt->cmd == Packet::HardPFReq) misses[Packet::HardPFReq][0/*pkt->req->getThreadNum()*/]++;
             BlkType *blk = tags->findBlock(pkt);
             Packet::Command cmd = coherence->getBusCmd(pkt->cmd,
                                               (blk)? blk->status : 0);
@@ -326,7 +323,7 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt)
             PacketList writebacks;
             blk = tags->handleFill(blk, (MSHR*)pkt->senderState,
                                    coherence->getNewState(pkt,old_state),
-                                   writebacks);
+                                   writebacks, pkt);
             while (!writebacks.empty()) {
                     missQueue->doWriteback(writebacks.front());
             }
@@ -384,7 +381,6 @@ template<class TagStore, class Buffering, class Coherence>
 void
 Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
 {
-    DPRINTF(Cache, "SNOOPING");
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
     BlkType *blk = tags->findBlock(pkt);
     MSHR *mshr = missQueue->findMSHR(blk_addr);
@@ -502,7 +498,7 @@ Cache<TagStore,Buffering,Coherence>::invalidateBlk(Addr addr)
  */
 template<class TagStore, class Buffering, class Coherence>
 Tick
-Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update)
+Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort* otherSidePort)
 {
 //    MemDebug::cacheProbe(pkt);
     if (!pkt->req->isUncacheable()) {
@@ -533,7 +529,8 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update)
         missQueue->findWrites(blk_addr, writes);
 
         if (!update) {
-                memSidePort->sendFunctional(pkt);
+                otherSidePort->sendFunctional(pkt);
+
             // Check for data in MSHR and writebuffer.
             if (mshr) {
                 warn("Found outstanding miss on an non-update probe");
@@ -628,12 +625,15 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update)
 
                 lat = memSidePort->sendAtomic(busPkt);
 
+                //Be sure to flip the response to a request for coherence
+                busPkt->makeAtomicResponse();
+
 /*             if (!(busPkt->flags & SATISFIED)) {
                     // blocked at a higher level, just return
                     return 0;
                 }
 
-*/             misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+*/             misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
 
                 CacheBlk::State old_state = (blk) ? blk->status : 0;
                 tags->handleFill(blk, busPkt,
@@ -658,10 +658,10 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update)
         }
 
         if (update) {
-            hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+            hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
         } else if (pkt->isWrite()) {
             // Still need to change data in all locations.
-            return memSidePort->sendAtomic(pkt);
+            return otherSidePort->sendAtomic(pkt);
         }
         return curTick + lat;
     }
@@ -671,18 +671,18 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update)
 
 template<class TagStore, class Buffering, class Coherence>
 Tick
-Cache<TagStore,Buffering,Coherence>::snoopProbe(PacketPtr &pkt, bool update)
+Cache<TagStore,Buffering,Coherence>::snoopProbe(PacketPtr &pkt)
 {
-    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-    BlkType *blk = tags->findBlock(pkt);
-    MSHR *mshr = missQueue->findMSHR(blk_addr);
-    CacheBlk::State new_state = 0;
-    bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
-    if (satisfy) {
-        tags->handleSnoop(blk, new_state, pkt);
-        return hitLatency;
-    }
-    tags->handleSnoop(blk, new_state);
-    return 0;
+        Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
+        BlkType *blk = tags->findBlock(pkt);
+        MSHR *mshr = missQueue->findMSHR(blk_addr);
+        CacheBlk::State new_state = 0;
+        bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
+        if (satisfy) {
+            tags->handleSnoop(blk, new_state, pkt);
+            return hitLatency;
+        }
+        tags->handleSnoop(blk, new_state);
+        return 0;
 }
 
index c7d28010cb453e3a1c9d2d18ed34d1cd5c16ae5b..be9bf5f57e8c7b641dfbfdd2e8d927d116e69598 100644 (file)
@@ -312,7 +312,7 @@ class Packet
      *   for returning as a response to that request.  Used for timing
      *   accesses only.  For atomic and functional accesses, the
      *   request packet is always implicitly passed back *without*
-     *   modifying the command or destination fields, so this function
+     *   modifying the destination fields, so this function
      *   should not be called. */
     void makeTimingResponse() {
         assert(needsResponse());
@@ -325,6 +325,18 @@ class Packet
         srcValid = false;
     }
 
+    /** Take a request packet and modify it in place to be suitable
+     *   for returning as a response to that request.
+     */
+    void makeAtomicResponse() {
+        assert(needsResponse());
+        assert(isRequest());
+        int icmd = (int)cmd;
+        icmd &= ~(IsRequest);
+        icmd |= IsResponse;
+        cmd = (Command)icmd;
+    }
+
     /** Take a request packet that has been returned as NACKED and modify it so
      * that it can be sent out again. Only packets that need a response can be
      * NACKED, so verify that that is true. */
diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py
new file mode 100644 (file)
index 0000000..9fc5f38
--- /dev/null
@@ -0,0 +1,86 @@
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Ron Dreslinski
+
+import m5
+from m5.objects import *
+
+# --------------------
+# Base L1 Cache
+# ====================
+
+class L1(BaseCache):
+    latency = 1
+    block_size = 64
+    mshrs = 4
+    tgts_per_mshr = 8
+    protocol = CoherenceProtocol(protocol='moesi')
+
+# ----------------------
+# Base L2 Cache
+# ----------------------
+
+class L2(BaseCache):
+    block_size = 64
+    latency = 100
+    mshrs = 92
+    tgts_per_mshr = 16
+    write_buffers = 8
+
+nb_cores = 4
+cpus = [ TimingSimpleCPU() for i in xrange(nb_cores) ]
+
+# system simulated
+system = System(cpu = cpus, physmem = PhysicalMemory(), membus =
+Bus())
+
+# l2cache & bus
+system.toL2Bus = Bus()
+system.l2c = L2(size='4MB', assoc=8)
+system.l2c.cpu_side = system.toL2Bus.port
+
+# connect l2c to membus
+system.l2c.mem_side = system.membus.port
+
+# add L1 caches
+for cpu in cpus:
+    cpu.addPrivateSplitL1Caches(L1(size = '32kB', assoc = 1),
+                                L1(size = '32kB', assoc = 4))
+    cpu.mem = cpu.dcache
+    # connect cpu level-1 caches to shared level-2 cache
+    cpu.connectMemPorts(system.toL2Bus)
+
+# connect memory to membus
+system.physmem.port = system.membus.port
+
+
+# -----------------------
+# run simulation
+# -----------------------
+
+root = Root( system = system )
+root.system.mem_mode = 'timing'