Mem: Fix issue with dirty block being lost when entire block transferred to non-cache.

author Ali Saidi <Ali.Saidi@ARM.com>

Fri, 18 Mar 2011 00:20:19 +0000 (19:20 -0500)

committer Ali Saidi <Ali.Saidi@ARM.com>

Fri, 18 Mar 2011 00:20:19 +0000 (19:20 -0500)
author Ali Saidi <Ali.Saidi@ARM.com>
Fri, 18 Mar 2011 00:20:19 +0000 (19:20 -0500)
committer Ali Saidi <Ali.Saidi@ARM.com>
Fri, 18 Mar 2011 00:20:19 +0000 (19:20 -0500)
diff --git a/configs/common/Caches.py b/configs/common/Caches.py

index 3adc7e5c9b91b3d2e64e6c637ba5fa9fd5df1a75..ffcd63c49696f6560663dca3714bc738aaad4ad3 100644 (file)
--- a/configs/common/Caches.py
+++ b/configs/common/Caches.py
@@ -34,6 +34,7 @@ class L1Cache(BaseCache):
      latency = '1ns'
      mshrs = 10
      tgts_per_mshr = 5
+    is_top_level = True
  
  class L2Cache(BaseCache):
      assoc = 8
@@ -49,6 +50,7 @@ class PageTableWalkerCache(BaseCache):
      mshrs = 10
      size = '1kB'
      tgts_per_mshr = 12
+    is_top_level = True
  
  class IOCache(BaseCache):
      assoc = 8
@@ -58,3 +60,4 @@ class IOCache(BaseCache):
      size = '1kB'
      tgts_per_mshr = 12
      forward_snoops = False
+    is_top_level = True
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh

index a2f2b4f8a1bfb2df618055196e466de67c39df04..3092bd937d97cb8e5108204b6790c7faf1ce1802 100644 (file)
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -112,6 +112,9 @@ DefaultFetch<Impl>::IcachePort::recvTiming(PacketPtr pkt)
  {
      DPRINTF(Fetch, "Received timing\n");
      if (pkt->isResponse()) {
+        // We shouldn't ever get a block in ownership state
+        assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted()));
+
          fetch->processCacheCompletion(pkt);
      }
      //else Snooped a coherence request, just return
diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc

index be97bc4ad49ae65161afca5688c09d9646170ce2..ffe8fdf06d1e7d211842fbdfec94c292244bc051 100644 (file)
--- a/src/dev/io_device.cc
+++ b/src/dev/io_device.cc
@@ -139,6 +139,9 @@ DmaPort::recvTiming(PacketPtr pkt)
          assert(pendingCount >= 0);
          assert(state);
  
+        // We shouldn't ever get a block in ownership state
+        assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted()));
+
          state->numBytes += pkt->req->getSize();
          assert(state->totBytes >= state->numBytes);
          if (state->totBytes == state->numBytes) {
diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py

index dffac2234f224e19edff14e9cd81a6dd8ef99a6f..5c7ae527416aedae00d254f4c02d58b62c56d13b 100644 (file)
--- a/src/mem/cache/BaseCache.py
+++ b/src/mem/cache/BaseCache.py
@@ -48,6 +48,7 @@ class BaseCache(MemObject):
      size = Param.MemorySize("capacity in bytes")
      forward_snoops = Param.Bool(True,
          "forward snoops from mem side to cpu side")
+    is_top_level = Param.Bool(False, "Is this cache at the top level (e.g. L1)")
      subblock_size = Param.Int(0,
          "Size of subblock in IIC used for compression")
      tgts_per_mshr = Param.Int("max number of accesses per MSHR")
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc

index 9166e1a0993829f85112afab61e47fcb58f95881..b7e331d5427205d1ba93e17d840e34c830b271c8 100644 (file)
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -58,6 +58,7 @@ BaseCache::BaseCache(const Params *p)
        hitLatency(p->latency),
        numTarget(p->tgts_per_mshr),
        forwardSnoops(p->forward_snoops),
+      isTopLevel(p->is_top_level),
        blocked(0),
        noTargetMSHR(NULL),
        missCount(p->max_miss_count),
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh

index e8a6442966e8a8007a347f0946ec590108c00bea..28ddf5054d5f268f145405b406b3e930a916413b 100644 (file)
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -194,6 +194,11 @@ class BaseCache : public MemObject
      /** Do we forward snoops from mem side port through to cpu side port? */
      bool forwardSnoops;
  
+    /** Is this cache a toplevel cache (e.g. L1, I/O cache). If so we should
+     * never try to forward ownership and similar optimizations to the cpu
+     * side */
+    bool isTopLevel;
+
      /**
       * Bit vector of the blocking reasons for the access path.
       * @sa #BlockedCause
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh

index e4e4a3c9231a19f86cf41450f47f4d2543d2f410..0b2b273f9683ba9057ade605ff75f6955efe20f9 100644 (file)
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -216,7 +216,7 @@ Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk,
                  
                  if (blk->isDirty()) {
                      // special considerations if we're owner:
-                    if (!deferred_response) {
+                    if (!deferred_response && !isTopLevel) {
                          // if we are responding immediately and can
                          // signal that we're transferring ownership
                          // along with exclusivity, do so
diff --git a/tests/configs/inorder-timing.py b/tests/configs/inorder-timing.py

index af58cafa59a5657fa996fbd32996a1125a9c353e..ddf37b5ec093f618236b71bcdc3a2828c46b859b 100644 (file)
--- a/tests/configs/inorder-timing.py
+++ b/tests/configs/inorder-timing.py
@@ -37,8 +37,12 @@ class MyCache(BaseCache):
      mshrs = 10
      tgts_per_mshr = 5
  
+class MyL1Cache(MyCache):
+    is_top_level = True
+
  cpu = InOrderCPU(cpu_id=0)
-cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'),
+cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'),
+                              MyL1Cache(size = '256kB'),
                                MyCache(size = '2MB', latency='10ns'))
  
  cpu.clock = '2GHz'
diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py

index d75bd3d8c6cb0ee35f5bab008d0d0bb035bad41c..f6238147391640f8754a6cd1a7eef8831af31e1e 100644 (file)
--- a/tests/configs/memtest.py
+++ b/tests/configs/memtest.py
@@ -38,6 +38,7 @@ class L1(BaseCache):
      block_size = 64
      mshrs = 12
      tgts_per_mshr = 8
+    is_top_level = True
  
  # ----------------------
  # Base L2 Cache
diff --git a/tests/configs/o3-timing-mp.py b/tests/configs/o3-timing-mp.py

index 5c770cdbc967bee76d50a892f61c7884737b58fe..35811282c47ad47beeea4d8edaf7ac556e2a4650 100644 (file)
--- a/tests/configs/o3-timing-mp.py
+++ b/tests/configs/o3-timing-mp.py
@@ -39,6 +39,7 @@ class L1(BaseCache):
      block_size = 64
      mshrs = 4
      tgts_per_mshr = 8
+    is_top_level = True
  
  # ----------------------
  # Base L2 Cache
diff --git a/tests/configs/o3-timing.py b/tests/configs/o3-timing.py

index a4c054122e1079d80e323b78b28ca7f24c7de86f..d4a69d94ae4b93f31744dd05e1c58906a9467bf5 100644 (file)
--- a/tests/configs/o3-timing.py
+++ b/tests/configs/o3-timing.py
@@ -37,8 +37,12 @@ class MyCache(BaseCache):
      mshrs = 10
      tgts_per_mshr = 5
  
+class MyL1Cache(MyCache):
+    is_top_level = True
+
  cpu = DerivO3CPU(cpu_id=0)
-cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'),
+cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'),
+                              MyL1Cache(size = '256kB'),
                                MyCache(size = '2MB'))
  cpu.clock = '2GHz'
  
diff --git a/tests/configs/pc-simple-atomic.py b/tests/configs/pc-simple-atomic.py

index 382899eb598e4b207905136c7ce8f01f06390475..1c35ff2d9ae7c45dc2fd0268d2d899cb519dce18 100644 (file)
--- a/tests/configs/pc-simple-atomic.py
+++ b/tests/configs/pc-simple-atomic.py
@@ -43,6 +43,7 @@ class L1(BaseCache):
      block_size = 64
      mshrs = 4
      tgts_per_mshr = 8
+    is_top_level = True
  
  # ----------------------
  # Base L2 Cache
@@ -65,6 +66,7 @@ class PageTableWalkerCache(BaseCache):
      mshrs = 10
      size = '1kB'
      tgts_per_mshr = 12
+    is_top_level = True
  
  # ---------------------
  # I/O Cache
@@ -78,6 +80,7 @@ class IOCache(BaseCache):
      tgts_per_mshr = 12
      addr_range = AddrRange(0, size=mem_size)
      forward_snoops = False
+    is_top_level = True
  
  #cpu
  cpu = AtomicSimpleCPU(cpu_id=0)
diff --git a/tests/configs/pc-simple-timing.py b/tests/configs/pc-simple-timing.py

index 7452e25423b4773cc9751ec138e7a65ec33b7d80..9c9f4aeca425a27bf0ecfdfd669bc3a0b9f4df03 100644 (file)
--- a/tests/configs/pc-simple-timing.py
+++ b/tests/configs/pc-simple-timing.py
@@ -44,6 +44,7 @@ class L1(BaseCache):
      block_size = 64
      mshrs = 4
      tgts_per_mshr = 8
+    is_top_level = True
  
  # ----------------------
  # Base L2 Cache
diff --git a/tests/configs/realview-simple-atomic.py b/tests/configs/realview-simple-atomic.py

index ab6d612d46899047906a9640a841ec71c259e361..7340be7a4a20fea2dbd94c5150d66a774b073ed0 100644 (file)
--- a/tests/configs/realview-simple-atomic.py
+++ b/tests/configs/realview-simple-atomic.py
@@ -40,6 +40,7 @@ class L1(BaseCache):
      block_size = 64
      mshrs = 4
      tgts_per_mshr = 8
+    is_top_level = True
  
  # ----------------------
  # Base L2 Cache
diff --git a/tests/configs/realview-simple-timing.py b/tests/configs/realview-simple-timing.py

index 53b6ab2b23c4ef37ec65da825586a7c3808e4212..83b643c5285be8166e9fa2e275ca06b7dff6d1b1 100644 (file)
--- a/tests/configs/realview-simple-timing.py
+++ b/tests/configs/realview-simple-timing.py
@@ -41,6 +41,7 @@ class L1(BaseCache):
      block_size = 64
      mshrs = 4
      tgts_per_mshr = 8
+    is_top_level = True
  
  # ----------------------
  # Base L2 Cache
diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py

index d88a9b395f2a737eef8aa2fcbe233381f4e47c78..4db741b8ac0048b0a606c56268b39fb8d6881279 100644 (file)
--- a/tests/configs/simple-atomic-mp.py
+++ b/tests/configs/simple-atomic-mp.py
@@ -38,6 +38,7 @@ class L1(BaseCache):
      block_size = 64
      mshrs = 4
      tgts_per_mshr = 8
+    is_top_level = True
  
  # ----------------------
  # Base L2 Cache
diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py

index f5793b282879c07d43bebcf167d607455eb6becd..6f4090ec2aa80f632df450b4d97f9f88797800d9 100644 (file)
--- a/tests/configs/simple-timing-mp.py
+++ b/tests/configs/simple-timing-mp.py
@@ -38,6 +38,7 @@ class L1(BaseCache):
      block_size = 64
      mshrs = 4
      tgts_per_mshr = 8
+    is_top_level = True
  
  # ----------------------
  # Base L2 Cache
diff --git a/tests/configs/simple-timing.py b/tests/configs/simple-timing.py

index 739e11e5547e79c3274e32417de98822925ea4c4..bc9d016c5b07e917badd2fe2ea9c47b8182c25e9 100644 (file)
--- a/tests/configs/simple-timing.py
+++ b/tests/configs/simple-timing.py
@@ -36,8 +36,12 @@ class MyCache(BaseCache):
      mshrs = 10
      tgts_per_mshr = 5
  
+class MyL1Cache(MyCache):
+    is_top_level = True
+
  cpu = TimingSimpleCPU(cpu_id=0)
-cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'),
+cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'),
+                              MyL1Cache(size = '256kB'),
                                MyCache(size = '2MB', latency='10ns'))
  system = System(cpu = cpu,
                  physmem = PhysicalMemory(),
diff --git a/tests/configs/tsunami-o3-dual.py b/tests/configs/tsunami-o3-dual.py

index 7744560f9985139033cb97ec2d02529fd77072de..125e228a7191a37dc6a25c1615834fa181fd93be 100644 (file)
--- a/tests/configs/tsunami-o3-dual.py
+++ b/tests/configs/tsunami-o3-dual.py
@@ -41,6 +41,7 @@ class L1(BaseCache):
      block_size = 64
      mshrs = 4
      tgts_per_mshr = 8
+    is_top_level = True
  
  # ----------------------
  # Base L2 Cache
@@ -65,6 +66,7 @@ class IOCache(BaseCache):
      tgts_per_mshr = 12
      addr_range=AddrRange(0, size='8GB')
      forward_snoops = False
+    is_top_level = True
  
  #cpu
  cpus = [ DerivO3CPU(cpu_id=i) for i in xrange(2) ]
diff --git a/tests/configs/tsunami-o3.py b/tests/configs/tsunami-o3.py

index fd2d6643134643c77257cda872e19b085c436320..13212d5d94b35ab97c5dbbd344907c24f3c83c76 100644 (file)
--- a/tests/configs/tsunami-o3.py
+++ b/tests/configs/tsunami-o3.py
@@ -41,6 +41,7 @@ class L1(BaseCache):
      block_size = 64
      mshrs = 4
      tgts_per_mshr = 8
+    is_top_level = True
  
  # ----------------------
  # Base L2 Cache
@@ -65,6 +66,7 @@ class IOCache(BaseCache):
      tgts_per_mshr = 12
      addr_range=AddrRange(0, size='8GB')
      forward_snoops = False
+    is_top_level = True
  
  #cpu
  cpu = DerivO3CPU(cpu_id=0)
diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py

index 9d3dbaa918738bbfdca0994ddd48fff6d9128c7f..2e56ce8510ea737cad1a8c7a605f6eabcf548919 100644 (file)
--- a/tests/configs/tsunami-simple-atomic-dual.py
+++ b/tests/configs/tsunami-simple-atomic-dual.py
@@ -40,6 +40,7 @@ class L1(BaseCache):
      block_size = 64
      mshrs = 4
      tgts_per_mshr = 8
+    is_top_level = True
  
  # ----------------------
  # Base L2 Cache
@@ -64,6 +65,7 @@ class IOCache(BaseCache):
      tgts_per_mshr = 12
      addr_range=AddrRange(0, size='8GB')
      forward_snoops = False
+    is_top_level = True
  
  #cpu
  cpus = [ AtomicSimpleCPU(cpu_id=i) for i in xrange(2) ]
diff --git a/tests/configs/tsunami-simple-atomic.py b/tests/configs/tsunami-simple-atomic.py

index cbacf1995ea2a899d1530265baf9bfe21ea51d3b..3c1981464e59a19e1ae1f14a50e6768783efb35d 100644 (file)
--- a/tests/configs/tsunami-simple-atomic.py
+++ b/tests/configs/tsunami-simple-atomic.py
@@ -40,6 +40,7 @@ class L1(BaseCache):
      block_size = 64
      mshrs = 4
      tgts_per_mshr = 8
+    is_top_level = True
  
  # ----------------------
  # Base L2 Cache
@@ -64,6 +65,7 @@ class IOCache(BaseCache):
      tgts_per_mshr = 12
      addr_range=AddrRange(0, size='8GB')
      forward_snoops = False
+    is_top_level = True
  
  #cpu
  cpu = AtomicSimpleCPU(cpu_id=0)
diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py

index f0105461d55a2850a767fba1f24d123bda67f132..747cdac180ddbadfc79b99b1711e0a1b6169f776 100644 (file)
--- a/tests/configs/tsunami-simple-timing-dual.py
+++ b/tests/configs/tsunami-simple-timing-dual.py
@@ -40,6 +40,7 @@ class L1(BaseCache):
      block_size = 64
      mshrs = 4
      tgts_per_mshr = 8
+    is_top_level = True
  
  # ----------------------
  # Base L2 Cache
@@ -64,6 +65,7 @@ class IOCache(BaseCache):
      tgts_per_mshr = 12
      addr_range=AddrRange(0, size='8GB')
      forward_snoops = False
+    is_top_level = True
  
  #cpu
  cpus = [ TimingSimpleCPU(cpu_id=i) for i in xrange(2) ]
diff --git a/tests/configs/tsunami-simple-timing.py b/tests/configs/tsunami-simple-timing.py

index 9a262b3b228e49b2d0afa3f9bcad9f279459edd0..110e6ee74cbe8a1481de4e5b65c47208c6450465 100644 (file)
--- a/tests/configs/tsunami-simple-timing.py
+++ b/tests/configs/tsunami-simple-timing.py
@@ -41,6 +41,7 @@ class L1(BaseCache):
      block_size = 64
      mshrs = 4
      tgts_per_mshr = 8
+    is_top_level = True
  
  # ----------------------
  # Base L2 Cache
@@ -65,6 +66,7 @@ class IOCache(BaseCache):
      tgts_per_mshr = 12
      addr_range=AddrRange(0, size='8GB')
      forward_snoops = False
+    is_top_level = True
  
  #cpu
  cpu = TimingSimpleCPU(cpu_id=0)
author	Ali Saidi <Ali.Saidi@ARM.com>
	Fri, 18 Mar 2011 00:20:19 +0000 (19:20 -0500)
committer	Ali Saidi <Ali.Saidi@ARM.com>
	Fri, 18 Mar 2011 00:20:19 +0000 (19:20 -0500)
configs/common/Caches.py		patch \| blob \| history
src/cpu/o3/fetch_impl.hh		patch \| blob \| history
src/dev/io_device.cc		patch \| blob \| history
src/mem/cache/BaseCache.py		patch \| blob \| history
src/mem/cache/base.cc		patch \| blob \| history
src/mem/cache/base.hh		patch \| blob \| history
src/mem/cache/cache_impl.hh		patch \| blob \| history
tests/configs/inorder-timing.py		patch \| blob \| history
tests/configs/memtest.py		patch \| blob \| history
tests/configs/o3-timing-mp.py		patch \| blob \| history
tests/configs/o3-timing.py		patch \| blob \| history
tests/configs/pc-simple-atomic.py		patch \| blob \| history
tests/configs/pc-simple-timing.py		patch \| blob \| history
tests/configs/realview-simple-atomic.py		patch \| blob \| history
tests/configs/realview-simple-timing.py		patch \| blob \| history
tests/configs/simple-atomic-mp.py		patch \| blob \| history
tests/configs/simple-timing-mp.py		patch \| blob \| history
tests/configs/simple-timing.py		patch \| blob \| history
tests/configs/tsunami-o3-dual.py		patch \| blob \| history
tests/configs/tsunami-o3.py		patch \| blob \| history
tests/configs/tsunami-simple-atomic-dual.py		patch \| blob \| history
tests/configs/tsunami-simple-atomic.py		patch \| blob \| history
tests/configs/tsunami-simple-timing-dual.py		patch \| blob \| history
tests/configs/tsunami-simple-timing.py		patch \| blob \| history