From d4273cc9a6f3c00566e97ebcd71509ed14477b37 Mon Sep 17 00:00:00 2001
From: Andreas Hansson <andreas.hansson@arm.com>
Date: Thu, 18 Jul 2013 08:31:16 -0400
Subject: [PATCH] mem: Set the cache line size on a system level

This patch removes the notion of a peer block size and instead sets
the cache line size on the system level.

Previously the size was set per cache, and communicated through the
interconnect. There were plenty checks to ensure that everyone had the
same size specified, and these checks are now removed. Another benefit
that is not yet harnessed is that the cache line size is now known at
construction time, rather than after the port binding. Hence, the
block size can be locally stored and does not have to be queried every
time it is used.

A follow-on patch updates the configuration scripts accordingly.
---
 src/arch/arm/isa.cc                        |  4 +--
 src/cpu/base.cc                            |  2 +-
 src/cpu/base.hh                            | 10 +++++-
 src/cpu/base_dyn_inst.hh                   |  4 +--
 src/cpu/checker/cpu.cc                     |  8 ++---
 src/cpu/inorder/resources/cache_unit.cc    |  2 +-
 src/cpu/o3/fetch.hh                        |  5 +--
 src/cpu/o3/fetch_impl.hh                   | 40 ++++++----------------
 src/cpu/o3/lsq_unit_impl.hh                | 13 ++-----
 src/cpu/simple/atomic.cc                   |  8 ++---
 src/cpu/simple/timing.cc                   |  4 +--
 src/cpu/testers/memtest/memtest.cc         | 11 +++---
 src/cpu/testers/traffic_gen/traffic_gen.cc |  7 ++--
 src/cpu/thread_state.cc                    |  3 +-
 src/dev/dma_device.cc                      |  2 +-
 src/dev/dma_device.hh                      |  6 ++--
 src/mem/Bus.py                             |  2 --
 src/mem/SimpleDRAM.py                      |  2 +-
 src/mem/addr_mapper.cc                     | 18 ----------
 src/mem/addr_mapper.hh                     | 14 --------
 src/mem/bridge.cc                          |  5 ---
 src/mem/bus.cc                             | 32 +----------------
 src/mem/bus.hh                             | 11 ------
 src/mem/cache/BaseCache.py                 |  1 -
 src/mem/cache/base.cc                      |  4 +--
 src/mem/cache/cache.hh                     |  6 ----
 src/mem/cache/tags/Tags.py                 |  4 +--
 src/mem/coherent_bus.hh                    | 12 -------
 src/mem/comm_monitor.cc                    | 12 -------
 src/mem/comm_monitor.hh                    | 14 --------
 src/mem/fs_translating_port_proxy.cc       | 11 +++---
 src/mem/fs_translating_port_proxy.hh       |  2 +-
 src/mem/noncoherent_bus.hh                 | 12 -------
 src/mem/port.cc                            | 12 -------
 src/mem/port.hh                            | 22 ------------
 src/mem/port_proxy.cc                      |  2 +-
 src/mem/port_proxy.hh                      |  8 +++--
 src/mem/ruby/system/RubyPort.cc            |  6 ----
 src/mem/ruby/system/RubyPort.hh            |  1 -
 src/mem/se_translating_port_proxy.cc       |  5 +--
 src/mem/simple_dram.cc                     |  3 +-
 src/sim/System.py                          |  2 ++
 src/sim/system.cc                          | 12 +++++--
 src/sim/system.hh                          |  8 +++++
 44 files changed, 94 insertions(+), 278 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index 117873450..2b67e6cf6 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -238,7 +238,7 @@ ISA::readMiscReg(int misc_reg, ThreadContext *tc)
             //all caches have the same line size in gem5
             //4 byte words in ARM
             unsigned lineSizeWords =
-                tc->getCpuPtr()->getInstPort().peerBlockSize() / 4;
+                tc->getSystemPtr()->cacheLineSize() / 4;
             unsigned log2LineSizeWords = 0;
 
             while (lineSizeWords >>= 1) {
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 5b76f3f7c..25fe9bf97 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -119,7 +119,7 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
       _instMasterId(p->system->getMasterId(name() + ".inst")),
       _dataMasterId(p->system->getMasterId(name() + ".data")),
       _taskId(ContextSwitchTaskId::Unknown), _pid(Request::invldPid),
-      _switchedOut(p->switched_out),
+      _switchedOut(p->switched_out), _cacheLineSize(p->system->cacheLineSize()),
       interrupts(p->interrupts), profileEvent(NULL),
       numThreads(p->numThreads), system(p->system)
 {
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 0a3600764..a91ba862d 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -57,12 +57,12 @@
 #include "sim/eventq.hh"
 #include "sim/full_system.hh"
 #include "sim/insttracer.hh"
+#include "sim/system.hh"
 
 struct BaseCPUParams;
 class BranchPred;
 class CheckerCPU;
 class ThreadContext;
-class System;
 
 class CPUProgressEvent : public Event
 {
@@ -117,6 +117,9 @@ class BaseCPU : public MemObject
     /** Is the CPU switched out or active? */
     bool _switchedOut;
 
+    /** Cache the cache line size that we get from the system */
+    const unsigned int _cacheLineSize;
+
   public:
 
     /**
@@ -342,6 +345,11 @@ class BaseCPU : public MemObject
 
     System *system;
 
+    /**
+     * Get the cache line size of the system.
+     */
+    inline unsigned int cacheLineSize() const { return _cacheLineSize; }
+
     /**
      * Serialize this object to the given output stream.
      *
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 20278bd30..8989a438a 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 ARM Limited
+ * Copyright (c) 2011,2013 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -976,7 +976,7 @@ BaseDynInst<Impl>::splitRequest(RequestPtr req, RequestPtr &sreqLow,
                                 RequestPtr &sreqHigh)
 {
     // Check to see if the request crosses the next level block boundary.
-    unsigned block_size = cpu->getDataPort().peerBlockSize();
+    unsigned block_size = cpu->cacheLineSize();
     Addr addr = req->getVaddr();
     Addr split_addr = roundDown(addr + req->getSize() - 1, block_size);
     assert(split_addr <= addr || split_addr - addr < block_size);
diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc
index c824121be..5cb1ccf18 100644
--- a/src/cpu/checker/cpu.cc
+++ b/src/cpu/checker/cpu.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 ARM Limited
+ * Copyright (c) 2011,2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -142,9 +142,8 @@ Fault
 CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size, unsigned flags)
 {
     Fault fault = NoFault;
-    unsigned blockSize = dcachePort->peerBlockSize();
     int fullSize = size;
-    Addr secondAddr = roundDown(addr + size - 1, blockSize);
+    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
     bool checked_flags = false;
     bool flags_match = true;
     Addr pAddr = 0x0;
@@ -236,10 +235,9 @@ CheckerCPU::writeMem(uint8_t *data, unsigned size,
     bool flags_match = true;
     Addr pAddr = 0x0;
 
-    unsigned blockSize = dcachePort->peerBlockSize();
     int fullSize = size;
 
-    Addr secondAddr = roundDown(addr + size - 1, blockSize);
+    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
 
     if (secondAddr > addr)
         size = secondAddr - addr;
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index e380c79d4..9a46641ac 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -112,7 +112,7 @@ CacheUnit::init()
         reqs[i] = new CacheRequest(this);
     }
 
-    cacheBlkSize = cachePort->peerBlockSize();
+    cacheBlkSize = cpu->cacheLineSize();
     cacheBlkMask = cacheBlkSize  - 1;
 
     initSlots();
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 23245d496..35f58ff74 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -216,9 +216,6 @@ class DefaultFetch
     /** Initialize stage. */
     void startupStage();
 
-    /** Tells the fetch stage that the Icache is set. */
-    void setIcache();
-
     /** Handles retrying the fetch access. */
     void recvRetry();
 
@@ -464,7 +461,7 @@ class DefaultFetch
     ThreadID retryTid;
 
     /** Cache block size. */
-    int cacheBlkSize;
+    unsigned int cacheBlkSize;
 
     /** Mask to get a cache block's address. */
     Addr cacheBlkMask;
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 43effa9d7..0445de921 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -81,6 +81,8 @@ DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
       fetchWidth(params->fetchWidth),
       retryPkt(NULL),
       retryTid(InvalidThreadID),
+      cacheBlkSize(cpu->cacheLineSize()),
+      cacheBlkMask(cacheBlkSize - 1),
       numThreads(params->numThreads),
       numFetchingThreads(params->smtNumFetchingThreads),
       finishTranslationEvent(this)
@@ -126,11 +128,17 @@ DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
     instSize = sizeof(TheISA::MachInst);
 
     for (int i = 0; i < Impl::MaxThreads; i++) {
-        cacheData[i] = NULL;
         decoder[i] = new TheISA::Decoder;
     }
 
     branchPred = params->branchPred;
+
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        // Create space to store a cache line.
+        cacheData[tid] = new uint8_t[cacheBlkSize];
+        cacheDataPC[tid] = 0;
+        cacheDataValid[tid] = false;
+    }
 }
 
 template <class Impl>
@@ -336,34 +344,6 @@ DefaultFetch<Impl>::resetStage()
 
     wroteToTimeBuffer = false;
     _status = Inactive;
-
-    // this CPU could still be unconnected if we are restoring from a
-    // checkpoint and this CPU is to be switched in, thus we can only
-    // do this here if the instruction port is actually connected, if
-    // not we have to do it as part of takeOverFrom.
-    if (cpu->getInstPort().isConnected())
-        setIcache();
-}
-
-template<class Impl>
-void
-DefaultFetch<Impl>::setIcache()
-{
-    assert(cpu->getInstPort().isConnected());
-
-    // Size of cache block.
-    cacheBlkSize = cpu->getInstPort().peerBlockSize();
-
-    // Create mask to get rid of offset bits.
-    cacheBlkMask = (cacheBlkSize - 1);
-
-    for (ThreadID tid = 0; tid < numThreads; tid++) {
-        // Create space to store a cache line.
-        if (!cacheData[tid])
-            cacheData[tid] = new uint8_t[cacheBlkSize];
-        cacheDataPC[tid] = 0;
-        cacheDataValid[tid] = false;
-    }
 }
 
 template<class Impl>
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index f0b27ba41..077af1dd7 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -1,3 +1,4 @@
+
 /*
  * Copyright (c) 2010-2012 ARM Limited
  * All rights reserved
@@ -190,7 +191,7 @@ LSQUnit<Impl>::resetState()
     isLoadBlocked = false;
     loadBlockedHandled = false;
 
-    cacheBlockMask = 0;
+    cacheBlockMask = ~(cpu->cacheLineSize() - 1);
 }
 
 template<class Impl>
@@ -419,16 +420,6 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
 {
     int load_idx = loadHead;
 
-    if (!cacheBlockMask) {
-        assert(dcachePort);
-        Addr bs = dcachePort->peerBlockSize();
-
-        // Make sure we actually got a size
-        assert(bs != 0);
-
-        cacheBlockMask = ~(bs - 1);
-    }
-
     // Unlock the cpu-local monitor when the CPU sees a snoop to a locked
     // address. The CPU can speculatively execute a LL operation after a pending
     // SC operation in the pipeline and that can make the cache monitor the CPU
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 1dd9675f9..ffd1c4d43 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -287,14 +287,12 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
         traceData->setAddr(addr);
     }
 
-    //The block size of our peer.
-    unsigned blockSize = dcachePort.peerBlockSize();
     //The size of the data we're trying to read.
     int fullSize = size;
 
     //The address of the second part of this access if it needs to be split
     //across a cache line boundary.
-    Addr secondAddr = roundDown(addr + size - 1, blockSize);
+    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
 
     if (secondAddr > addr)
         size = secondAddr - addr;
@@ -375,14 +373,12 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
         traceData->setAddr(addr);
     }
 
-    //The block size of our peer.
-    unsigned blockSize = dcachePort.peerBlockSize();
     //The size of the data we're trying to read.
     int fullSize = size;
 
     //The address of the second part of this access if it needs to be split
     //across a cache line boundary.
-    Addr secondAddr = roundDown(addr + size - 1, blockSize);
+    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
 
     if(secondAddr > addr)
         size = secondAddr - addr;
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 1f453ca63..87a5245b2 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -404,7 +404,7 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
     const int asid = 0;
     const ThreadID tid = 0;
     const Addr pc = thread->instAddr();
-    unsigned block_size = dcachePort.peerBlockSize();
+    unsigned block_size = cacheLineSize();
     BaseTLB::Mode mode = BaseTLB::Read;
 
     if (traceData) {
@@ -473,7 +473,7 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
     const int asid = 0;
     const ThreadID tid = 0;
     const Addr pc = thread->instAddr();
-    unsigned block_size = dcachePort.peerBlockSize();
+    unsigned block_size = cacheLineSize();
     BaseTLB::Mode mode = BaseTLB::Write;
 
     if (traceData) {
diff --git a/src/cpu/testers/memtest/memtest.cc b/src/cpu/testers/memtest/memtest.cc
index 37684ec5d..7f3ff0d03 100644
--- a/src/cpu/testers/memtest/memtest.cc
+++ b/src/cpu/testers/memtest/memtest.cc
@@ -95,7 +95,7 @@ MemTest::MemTest(const Params *p)
       tickEvent(this),
       cachePort("test", this),
       funcPort("functional", this),
-      funcProxy(funcPort),
+      funcProxy(funcPort, p->sys->cacheLineSize()),
       retryPkt(NULL),
 //      mainMem(main_mem),
 //      checkMem(check_mem),
@@ -105,6 +105,7 @@ MemTest::MemTest(const Params *p)
       percentUncacheable(p->percent_uncacheable),
       issueDmas(p->issue_dmas),
       masterId(p->sys->getMasterId(name())),
+      blockSize(p->sys->cacheLineSize()),
       progressInterval(p->progress_interval),
       nextProgressMessage(p->progress_interval),
       percentSourceUnaligned(p->percent_source_unaligned),
@@ -121,6 +122,9 @@ MemTest::MemTest(const Params *p)
     baseAddr2 = 0x400000;
     uncacheAddr = 0x800000;
 
+    blockAddrMask = blockSize - 1;
+    traceBlockAddr = blockAddr(traceBlockAddr);
+
     // set up counters
     noResponseCycles = 0;
     numReads = 0;
@@ -145,11 +149,6 @@ MemTest::getMasterPort(const std::string &if_name, PortID idx)
 void
 MemTest::init()
 {
-    // By the time init() is called, the ports should be hooked up.
-    blockSize = cachePort.peerBlockSize();
-    blockAddrMask = blockSize - 1;
-    traceBlockAddr = blockAddr(traceBlockAddr);
-
     // initial memory contents for both physical memory and functional
     // memory should be 0; no need to initialize them.
 }
diff --git a/src/cpu/testers/traffic_gen/traffic_gen.cc b/src/cpu/testers/traffic_gen/traffic_gen.cc
index ed3518bb4..0f006e67e 100644
--- a/src/cpu/testers/traffic_gen/traffic_gen.cc
+++ b/src/cpu/testers/traffic_gen/traffic_gen.cc
@@ -268,11 +268,10 @@ TrafficGen::parseConfig()
                             max_period, read_percent);
 
 
-                    if (port.deviceBlockSize() &&
-                        blocksize > port.deviceBlockSize())
+                    if (blocksize > system->cacheLineSize())
                         fatal("TrafficGen %s block size (%d) is larger than "
-                              "port block size (%d)\n", name(),
-                              blocksize, port.deviceBlockSize());
+                              "system block size (%d)\n", name(),
+                              blocksize, system->cacheLineSize());
 
                     if (read_percent > 100)
                         fatal("%s cannot have more than 100% reads", name());
diff --git a/src/cpu/thread_state.cc b/src/cpu/thread_state.cc
index d0f946989..7953b53c8 100644
--- a/src/cpu/thread_state.cc
+++ b/src/cpu/thread_state.cc
@@ -110,7 +110,8 @@ ThreadState::initMemProxies(ThreadContext *tc)
         // This cannot be done in the constructor as the thread state
         // itself is created in the base cpu constructor and the
         // getDataPort is a virtual function
-        physProxy = new PortProxy(baseCpu->getDataPort());
+        physProxy = new PortProxy(baseCpu->getDataPort(),
+                                  baseCpu->cacheLineSize());
 
         assert(virtProxy == NULL);
         virtProxy = new FSTranslatingPortProxy(tc);
diff --git a/src/dev/dma_device.cc b/src/dev/dma_device.cc
index f25f52334..5eb5e1f9d 100644
--- a/src/dev/dma_device.cc
+++ b/src/dev/dma_device.cc
@@ -163,7 +163,7 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
 
     DPRINTF(DMA, "Starting DMA for addr: %#x size: %d sched: %d\n", addr, size,
             event ? event->scheduled() : -1);
-    for (ChunkGenerator gen(addr, size, peerBlockSize());
+    for (ChunkGenerator gen(addr, size, sys->cacheLineSize());
          !gen.done(); gen.next()) {
         Request *req = new Request(gen.addr(), gen.size(), flag, masterId);
         PacketPtr pkt = new Packet(req, cmd);
diff --git a/src/dev/dma_device.hh b/src/dev/dma_device.hh
index 3b4bb522d..d1245b977 100644
--- a/src/dev/dma_device.hh
+++ b/src/dev/dma_device.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 ARM Limited
+ * Copyright (c) 2012-2013 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -49,6 +49,7 @@
 #include "dev/io_device.hh"
 #include "params/DmaDevice.hh"
 #include "sim/drain.hh"
+#include "sim/system.hh"
 
 class DmaPort : public MasterPort
 {
@@ -146,7 +147,6 @@ class DmaPort : public MasterPort
 
     bool dmaPending() const { return pendingCount > 0; }
 
-    unsigned cacheBlockSize() const { return peerBlockSize(); }
     unsigned int drain(DrainManager *drainManger);
 };
 
@@ -178,7 +178,7 @@ class DmaDevice : public PioDevice
 
     unsigned int drain(DrainManager *drainManger);
 
-    unsigned cacheBlockSize() const { return dmaPort.cacheBlockSize(); }
+    unsigned int cacheBlockSize() const { return sys->cacheLineSize(); }
 
     virtual BaseMasterPort &getMasterPort(const std::string &if_name,
                                           PortID idx = InvalidPortID);
diff --git a/src/mem/Bus.py b/src/mem/Bus.py
index ca0f40e1e..e8e03ccb4 100644
--- a/src/mem/Bus.py
+++ b/src/mem/Bus.py
@@ -52,8 +52,6 @@ class BaseBus(MemObject):
     master = VectorMasterPort("vector port for connecting slaves")
     header_cycles = Param.Cycles(1, "cycles of overhead per transaction")
     width = Param.Unsigned(8, "bus width (bytes)")
-    block_size = Param.Unsigned(64, "The default block size if not set by " \
-                                    "any connected module")
 
     # The default port can be left unconnected, or be used to connect
     # a default slave port
diff --git a/src/mem/SimpleDRAM.py b/src/mem/SimpleDRAM.py
index 64e9f272b..ec76542d8 100644
--- a/src/mem/SimpleDRAM.py
+++ b/src/mem/SimpleDRAM.py
@@ -170,7 +170,7 @@ class SimpleDRAM(AbstractMemory):
 
     # tRC  - assumed to be 4 * tRP
 
-    # burst length for an access derived from peerBlockSize
+    # burst length for an access derived from the cache line size
 
 # A single DDR3 x64 interface (one command and address bus), with
 # default timings based on DDR3-1600 4 Gbit parts in an 8x8
diff --git a/src/mem/addr_mapper.cc b/src/mem/addr_mapper.cc
index 4aff9dcd8..0cc2e9c2f 100644
--- a/src/mem/addr_mapper.cc
+++ b/src/mem/addr_mapper.cc
@@ -51,12 +51,6 @@ AddrMapper::init()
 {
     if (!slavePort.isConnected() || !masterPort.isConnected())
         fatal("Address mapper is not connected on both sides.\n");
-
-    if ((slavePort.peerBlockSize() != masterPort.peerBlockSize()) &&
-        slavePort.peerBlockSize() && masterPort.peerBlockSize())
-        fatal("Slave port size %d, master port size %d \n "
-              "don't have the same block size... Not supported.\n",
-              slavePort.peerBlockSize(), masterPort.peerBlockSize());
 }
 
 BaseMasterPort&
@@ -195,18 +189,6 @@ AddrMapper::isSnooping() const
     return false;
 }
 
-unsigned
-AddrMapper::deviceBlockSizeMaster()
-{
-    return slavePort.peerBlockSize();
-}
-
-unsigned
-AddrMapper::deviceBlockSizeSlave()
-{
-    return masterPort.peerBlockSize();
-}
-
 void
 AddrMapper::recvRetryMaster()
 {
diff --git a/src/mem/addr_mapper.hh b/src/mem/addr_mapper.hh
index 6604096bd..6564a7490 100644
--- a/src/mem/addr_mapper.hh
+++ b/src/mem/addr_mapper.hh
@@ -143,11 +143,6 @@ class AddrMapper : public MemObject
             return mapper.isSnooping();
         }
 
-        unsigned deviceBlockSize() const
-        {
-            return mapper.deviceBlockSizeMaster();
-        }
-
         void recvRetry()
         {
             mapper.recvRetryMaster();
@@ -193,11 +188,6 @@ class AddrMapper : public MemObject
             return mapper.recvTimingSnoopResp(pkt);
         }
 
-        unsigned deviceBlockSize() const
-        {
-            return mapper.deviceBlockSizeSlave();
-        }
-
         AddrRangeList getAddrRanges() const
         {
             return mapper.getAddrRanges();
@@ -233,10 +223,6 @@ class AddrMapper : public MemObject
 
     bool recvTimingSnoopResp(PacketPtr pkt);
 
-    unsigned deviceBlockSizeMaster();
-
-    unsigned deviceBlockSizeSlave();
-
     virtual AddrRangeList getAddrRanges() const = 0;
 
     bool isSnooping() const;
diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc
index f8258086c..3b101ceab 100644
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@@ -110,11 +110,6 @@ Bridge::init()
     if (!slavePort.isConnected() || !masterPort.isConnected())
         fatal("Both ports of bus bridge are not connected to a bus.\n");
 
-    if (slavePort.peerBlockSize() != masterPort.peerBlockSize())
-        fatal("Slave port size %d, master port size %d \n " \
-              "Busses don't have the same block size... Not supported.\n",
-              slavePort.peerBlockSize(), masterPort.peerBlockSize());
-
     // notify the master side  of our address ranges
     slavePort.sendRangeChange();
 }
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index d94bd6a28..5ab7e5b04 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -60,8 +60,7 @@ BaseBus::BaseBus(const BaseBusParams *p)
       gotAddrRanges(p->port_default_connection_count +
                           p->port_master_connection_count, false),
       gotAllAddrRanges(false), defaultPortID(InvalidPortID),
-      useDefaultRange(p->use_default_range),
-      blockSize(p->block_size)
+      useDefaultRange(p->use_default_range)
 {}
 
 BaseBus::~BaseBus()
@@ -80,29 +79,6 @@ BaseBus::~BaseBus()
 void
 BaseBus::init()
 {
-    // determine the maximum peer block size, look at both the
-    // connected master and slave modules
-    uint32_t peer_block_size = 0;
-
-    for (MasterPortConstIter m = masterPorts.begin(); m != masterPorts.end();
-         ++m) {
-        peer_block_size = std::max((*m)->peerBlockSize(), peer_block_size);
-    }
-
-    for (SlavePortConstIter s = slavePorts.begin(); s != slavePorts.end();
-         ++s) {
-        peer_block_size = std::max((*s)->peerBlockSize(), peer_block_size);
-    }
-
-    // if the peers do not have a block size, use the default value
-    // set through the bus parameters
-    if (peer_block_size != 0)
-        blockSize = peer_block_size;
-
-    // check if the block size is a value known to work
-    if (!(blockSize == 16 || blockSize == 32 || blockSize == 64 ||
-          blockSize == 128))
-        warn_once("Block size is neither 16, 32, 64 or 128 bytes.\n");
 }
 
 BaseMasterPort &
@@ -559,12 +535,6 @@ BaseBus::getAddrRanges() const
     return busRanges;
 }
 
-unsigned
-BaseBus::deviceBlockSize() const
-{
-    return blockSize;
-}
-
 void
 BaseBus::regStats()
 {
diff --git a/src/mem/bus.hh b/src/mem/bus.hh
index fc240a22b..bdcf319f4 100644
--- a/src/mem/bus.hh
+++ b/src/mem/bus.hh
@@ -344,15 +344,6 @@ class BaseBus : public MemObject
      */
     void calcPacketTiming(PacketPtr pkt);
 
-    /**
-     * Ask everyone on the bus what their size is and determine the
-     * bus size as either the maximum, or if no device specifies a
-     * block size return the default.
-     *
-     * @return the max of all the sizes or the default if none is set
-     */
-    unsigned deviceBlockSize() const;
-
     /**
      * Remember for each of the master ports of the bus if we got an
      * address range from the connected slave. For convenience, also
@@ -381,8 +372,6 @@ class BaseBus : public MemObject
        addresses not handled by another port to default device. */
     const bool useDefaultRange;
 
-    uint32_t blockSize;
-
     BaseBus(const BaseBusParams *p);
 
     virtual ~BaseBus();
diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py
index 7f2c1cc6f..df4602199 100644
--- a/src/mem/cache/BaseCache.py
+++ b/src/mem/cache/BaseCache.py
@@ -48,7 +48,6 @@ class BaseCache(MemObject):
     type = 'BaseCache'
     cxx_header = "mem/cache/base.hh"
     assoc = Param.Int("associativity")
-    block_size = Param.Int("block size in bytes")
     hit_latency = Param.Cycles("The hit latency for this cache")
     response_latency = Param.Cycles(
             "Additional cache latency for the return path to core on a miss");
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index 62f1bb21b..03b4d5dc1 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -71,7 +71,7 @@ BaseCache::BaseCache(const Params *p)
       mshrQueue("MSHRs", p->mshrs, 4, MSHRQueue_MSHRs),
       writeBuffer("write buffer", p->write_buffers, p->mshrs+1000,
                   MSHRQueue_WriteBuffer),
-      blkSize(p->block_size),
+      blkSize(p->system->cacheLineSize()),
       hitLatency(p->hit_latency),
       responseLatency(p->response_latency),
       numTarget(p->tgts_per_mshr),
@@ -773,7 +773,7 @@ BaseCache::drain(DrainManager *dm)
 BaseCache *
 BaseCacheParams::create()
 {
-    unsigned numSets = size / (assoc * block_size);
+    unsigned numSets = size / (assoc * system->cacheLineSize());
 
     assert(tags);
 
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 729e1f32c..ab884372c 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -99,9 +99,6 @@ class Cache : public BaseCache
 
         virtual void recvFunctional(PacketPtr pkt);
 
-        virtual unsigned deviceBlockSize() const
-        { return cache->getBlockSize(); }
-
         virtual AddrRangeList getAddrRanges() const;
 
       public:
@@ -163,9 +160,6 @@ class Cache : public BaseCache
 
         virtual void recvFunctionalSnoop(PacketPtr pkt);
 
-        virtual unsigned deviceBlockSize() const
-        { return cache->getBlockSize(); }
-
       public:
 
         MemSidePort(const std::string &_name, Cache<TagStore> *_cache,
diff --git a/src/mem/cache/tags/Tags.py b/src/mem/cache/tags/Tags.py
index c1aad2444..c5beff4a7 100644
--- a/src/mem/cache/tags/Tags.py
+++ b/src/mem/cache/tags/Tags.py
@@ -46,8 +46,8 @@ class BaseTags(ClockedObject):
     # Get the size from the parent (cache)
     size = Param.MemorySize(Parent.size, "capacity in bytes")
 
-    # Get the block size from the parent (cache)
-    block_size = Param.Int(Parent.block_size, "block size in bytes")
+    # Get the block size from the parent (system)
+    block_size = Param.Int(Parent.cache_line_size, "block size in bytes")
 
     # Get the hit latency from the parent (cache)
     hit_latency = Param.Cycles(Parent.hit_latency,
diff --git a/src/mem/coherent_bus.hh b/src/mem/coherent_bus.hh
index 4908ba034..034b855ec 100644
--- a/src/mem/coherent_bus.hh
+++ b/src/mem/coherent_bus.hh
@@ -140,12 +140,6 @@ class CoherentBus : public BaseBus
         virtual AddrRangeList getAddrRanges() const
         { return bus.getAddrRanges(); }
 
-        /**
-         * Get the maximum block size as seen by the bus.
-         */
-        virtual unsigned deviceBlockSize() const
-        { return bus.deviceBlockSize(); }
-
     };
 
     /**
@@ -211,12 +205,6 @@ class CoherentBus : public BaseBus
         virtual void recvRetry()
         { bus.recvRetry(id); }
 
-        // Ask the bus to ask everyone on the bus what their block size is and
-        // take the max of it. This might need to be changed a bit if we ever
-        // support multiple block sizes.
-        virtual unsigned deviceBlockSize() const
-        { return bus.deviceBlockSize(); }
-
     };
 
     /**
diff --git a/src/mem/comm_monitor.cc b/src/mem/comm_monitor.cc
index 562f572d8..5db6f5e9a 100644
--- a/src/mem/comm_monitor.cc
+++ b/src/mem/comm_monitor.cc
@@ -378,18 +378,6 @@ CommMonitor::isSnooping() const
     return slavePort.isSnooping();
 }
 
-unsigned
-CommMonitor::deviceBlockSizeMaster()
-{
-    return slavePort.peerBlockSize();
-}
-
-unsigned
-CommMonitor::deviceBlockSizeSlave()
-{
-    return masterPort.peerBlockSize();
-}
-
 AddrRangeList
 CommMonitor::getAddrRanges() const
 {
diff --git a/src/mem/comm_monitor.hh b/src/mem/comm_monitor.hh
index d386eeba0..9dcd065ca 100644
--- a/src/mem/comm_monitor.hh
+++ b/src/mem/comm_monitor.hh
@@ -171,11 +171,6 @@ class CommMonitor : public MemObject
             return mon.isSnooping();
         }
 
-        unsigned deviceBlockSize() const
-        {
-            return mon.deviceBlockSizeMaster();
-        }
-
         void recvRetry()
         {
             mon.recvRetryMaster();
@@ -227,11 +222,6 @@ class CommMonitor : public MemObject
             return mon.recvTimingSnoopResp(pkt);
         }
 
-        unsigned deviceBlockSize() const
-        {
-            return mon.deviceBlockSizeSlave();
-        }
-
         AddrRangeList getAddrRanges() const
         {
             return mon.getAddrRanges();
@@ -267,10 +257,6 @@ class CommMonitor : public MemObject
 
     bool recvTimingSnoopResp(PacketPtr pkt);
 
-    unsigned deviceBlockSizeMaster();
-
-    unsigned deviceBlockSizeSlave();
-
     AddrRangeList getAddrRanges() const;
 
     bool isSnooping() const;
diff --git a/src/mem/fs_translating_port_proxy.cc b/src/mem/fs_translating_port_proxy.cc
index 9f1f7d019..8fb9b91ef 100644
--- a/src/mem/fs_translating_port_proxy.cc
+++ b/src/mem/fs_translating_port_proxy.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 ARM Limited
+ * Copyright (c) 2011,2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -50,16 +50,19 @@
 #include "cpu/base.hh"
 #include "cpu/thread_context.hh"
 #include "mem/fs_translating_port_proxy.hh"
+#include "sim/system.hh"
 
 using namespace TheISA;
 
 FSTranslatingPortProxy::FSTranslatingPortProxy(ThreadContext *tc)
-    : PortProxy(tc->getCpuPtr()->getDataPort()), _tc(tc)
+    : PortProxy(tc->getCpuPtr()->getDataPort(),
+                tc->getSystemPtr()->cacheLineSize()), _tc(tc)
 {
 }
 
-FSTranslatingPortProxy::FSTranslatingPortProxy(MasterPort &port)
-    : PortProxy(port), _tc(NULL)
+FSTranslatingPortProxy::FSTranslatingPortProxy(MasterPort &port,
+                                               unsigned int cacheLineSize)
+    : PortProxy(port, cacheLineSize), _tc(NULL)
 {
 }
 
diff --git a/src/mem/fs_translating_port_proxy.hh b/src/mem/fs_translating_port_proxy.hh
index c022f3d09..2a9223050 100644
--- a/src/mem/fs_translating_port_proxy.hh
+++ b/src/mem/fs_translating_port_proxy.hh
@@ -78,7 +78,7 @@ class FSTranslatingPortProxy : public PortProxy
 
     FSTranslatingPortProxy(ThreadContext* tc);
 
-    FSTranslatingPortProxy(MasterPort &port);
+    FSTranslatingPortProxy(MasterPort &port, unsigned int cacheLineSize);
 
     virtual ~FSTranslatingPortProxy();
 
diff --git a/src/mem/noncoherent_bus.hh b/src/mem/noncoherent_bus.hh
index e2148c60e..a1b9da6e3 100644
--- a/src/mem/noncoherent_bus.hh
+++ b/src/mem/noncoherent_bus.hh
@@ -132,12 +132,6 @@ class NoncoherentBus : public BaseBus
         virtual AddrRangeList getAddrRanges() const
         { return bus.getAddrRanges(); }
 
-        /**
-         * Get the maximum block size as seen by the bus.
-         */
-        virtual unsigned deviceBlockSize() const
-        { return bus.deviceBlockSize(); }
-
     };
 
     /**
@@ -177,12 +171,6 @@ class NoncoherentBus : public BaseBus
         virtual void recvRetry()
         { bus.recvRetry(id); }
 
-        /**
-         * Get the maximum block size as seen by the bus.
-         */
-        virtual unsigned deviceBlockSize() const
-        { return bus.deviceBlockSize(); }
-
     };
 
     /** Function called by the port when the bus is recieving a Timing
diff --git a/src/mem/port.cc b/src/mem/port.cc
index 45045f40e..898f19c08 100644
--- a/src/mem/port.cc
+++ b/src/mem/port.cc
@@ -155,12 +155,6 @@ MasterPort::unbind()
     _baseSlavePort = NULL;
 }
 
-unsigned
-MasterPort::peerBlockSize() const
-{
-    return _slavePort->deviceBlockSize();
-}
-
 AddrRangeList
 MasterPort::getAddrRanges() const
 {
@@ -238,12 +232,6 @@ SlavePort::bind(MasterPort& master_port)
     _masterPort = &master_port;
 }
 
-unsigned
-SlavePort::peerBlockSize() const
-{
-    return _masterPort->deviceBlockSize();
-}
-
 Tick
 SlavePort::sendAtomicSnoop(PacketPtr pkt)
 {
diff --git a/src/mem/port.hh b/src/mem/port.hh
index 18db800b6..8fefb2f3a 100644
--- a/src/mem/port.hh
+++ b/src/mem/port.hh
@@ -264,17 +264,6 @@ class MasterPort : public BaseMasterPort
      */
     virtual bool isSnooping() const { return false; }
 
-    /**
-     * Called by a peer port in order to determine the block size of
-     * the owner of this port.
-     */
-    virtual unsigned deviceBlockSize() const { return 0; }
-
-    /** Called by the associated device if it wishes to find out the blocksize
-        of the device on attached to the peer port.
-    */
-    unsigned peerBlockSize() const;
-
     /**
      * Get the address ranges of the connected slave port.
      */
@@ -405,17 +394,6 @@ class SlavePort : public BaseSlavePort
      */
     void sendRetry();
 
-    /**
-     * Called by a peer port in order to determine the block size of
-     * the owner of this port.
-     */
-    virtual unsigned deviceBlockSize() const { return 0; }
-
-    /** Called by the associated device if it wishes to find out the blocksize
-        of the device on attached to the peer port.
-    */
-    unsigned peerBlockSize() const;
-
     /**
      * Find out if the peer master port is snooping or not.
      *
diff --git a/src/mem/port_proxy.cc b/src/mem/port_proxy.cc
index 38162e1bf..ea6dd0fc2 100644
--- a/src/mem/port_proxy.cc
+++ b/src/mem/port_proxy.cc
@@ -45,7 +45,7 @@ PortProxy::blobHelper(Addr addr, uint8_t *p, int size, MemCmd cmd) const
 {
     Request req;
 
-    for (ChunkGenerator gen(addr, size, _port.peerBlockSize());
+    for (ChunkGenerator gen(addr, size, _cacheLineSize);
          !gen.done(); gen.next()) {
         req.setPhys(gen.addr(), gen.size(), 0, Request::funcMasterId);
         Packet pkt(&req, cmd);
diff --git a/src/mem/port_proxy.hh b/src/mem/port_proxy.hh
index 4600cd407..f1e01c385 100644
--- a/src/mem/port_proxy.hh
+++ b/src/mem/port_proxy.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2011-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -88,10 +88,14 @@ class PortProxy
     /** The actual physical port used by this proxy. */
     MasterPort &_port;
 
+    /** Granularity of any transactions issued through this proxy. */
+    const unsigned int _cacheLineSize;
+
     void blobHelper(Addr addr, uint8_t *p, int size, MemCmd cmd) const;
 
   public:
-    PortProxy(MasterPort &port) : _port(port) { }
+    PortProxy(MasterPort &port, unsigned int cacheLineSize) :
+        _port(port), _cacheLineSize(cacheLineSize) { }
     virtual ~PortProxy() { }
 
     /**
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc
index 72dc1447a..719e2f73f 100644
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -476,12 +476,6 @@ RubyPort::M5Port::isPhysMemAddress(Addr addr)
     return ruby_port->system->isMemAddr(addr);
 }
 
-unsigned
-RubyPort::M5Port::deviceBlockSize() const
-{
-    return (unsigned) RubySystem::getBlockSizeBytes();
-}
-
 void
 RubyPort::ruby_eviction_callback(const Address& address)
 {
diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh
index 70f74d83b..1e9336d76 100644
--- a/src/mem/ruby/system/RubyPort.hh
+++ b/src/mem/ruby/system/RubyPort.hh
@@ -73,7 +73,6 @@ class RubyPort : public MemObject
                RubySystem*_system, bool _access_phys_mem);
         void hitCallback(PacketPtr pkt);
         void evictionCallback(const Address& address);
-        unsigned deviceBlockSize() const;
         
         bool onRetryList() 
         { return _onRetryList; }
diff --git a/src/mem/se_translating_port_proxy.cc b/src/mem/se_translating_port_proxy.cc
index 7857217f6..1060a3270 100644
--- a/src/mem/se_translating_port_proxy.cc
+++ b/src/mem/se_translating_port_proxy.cc
@@ -50,13 +50,14 @@
 #include "mem/page_table.hh"
 #include "mem/se_translating_port_proxy.hh"
 #include "sim/process.hh"
+#include "sim/system.hh"
 
 using namespace TheISA;
 
 SETranslatingPortProxy::SETranslatingPortProxy(MasterPort& port, Process *p,
                                            AllocType alloc)
-    : PortProxy(port), pTable(p->pTable), process(p),
-      allocating(alloc)
+    : PortProxy(port, p->system->cacheLineSize()), pTable(p->pTable),
+      process(p), allocating(alloc)
 { }
 
 SETranslatingPortProxy::~SETranslatingPortProxy()
diff --git a/src/mem/simple_dram.cc b/src/mem/simple_dram.cc
index bd3a70d25..2bb1cec9d 100644
--- a/src/mem/simple_dram.cc
+++ b/src/mem/simple_dram.cc
@@ -43,6 +43,7 @@
 #include "debug/DRAM.hh"
 #include "debug/DRAMWR.hh"
 #include "mem/simple_dram.hh"
+#include "sim/system.hh"
 
 using namespace std;
 
@@ -94,7 +95,7 @@ SimpleDRAM::init()
 
     // get the burst size from the connected port as it is currently
     // assumed to be equal to the cache line size
-    bytesPerCacheLine = port.peerBlockSize();
+    bytesPerCacheLine = _system->cacheLineSize();
 
     // we could deal with plenty options here, but for now do a quick
     // sanity check
diff --git a/src/sim/System.py b/src/sim/System.py
index 2cc171881..302e2fa60 100644
--- a/src/sim/System.py
+++ b/src/sim/System.py
@@ -63,6 +63,8 @@ class System(MemObject):
     # I/O bridge or cache
     mem_ranges = VectorParam.AddrRange([], "Ranges that constitute main memory")
 
+    cache_line_size = Param.Unsigned(64, "Cache line size in bytes")
+
     work_item_id = Param.Int(-1, "specific work item id")
     num_work_ids = Param.Int(16, "Number of distinct work item types")
     work_begin_cpu_id_exit = Param.Int(-1,
diff --git a/src/sim/system.cc b/src/sim/system.cc
index 03f8f8180..f9799d26f 100644
--- a/src/sim/system.cc
+++ b/src/sim/system.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2011-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -78,12 +78,13 @@ System::System(Params *p)
       _numContexts(0),
       pagePtr(0),
       init_param(p->init_param),
-      physProxy(_systemPort),
-      virtProxy(_systemPort),
+      physProxy(_systemPort, p->cache_line_size),
+      virtProxy(_systemPort, p->cache_line_size),
       loadAddrMask(p->load_addr_mask),
       nextPID(0),
       physmem(name() + ".physmem", p->memories),
       memoryMode(p->mem_mode),
+      _cacheLineSize(p->cache_line_size),
       workItemsBegin(0),
       workItemsEnd(0),
       numWorkIds(p->num_work_ids),
@@ -100,6 +101,11 @@ System::System(Params *p)
             debugSymbolTable = new SymbolTable;
     }
 
+    // check if the cache line size is a value known to work
+    if (!(_cacheLineSize == 16 || _cacheLineSize == 32 ||
+          _cacheLineSize == 64 || _cacheLineSize == 128))
+        warn_once("Cache line size is neither 16, 32, 64 nor 128 bytes.\n");
+
     // Get the generic system master IDs
     MasterID tmp_id M5_VAR_USED;
     tmp_id = getMasterId("writebacks");
diff --git a/src/sim/system.hh b/src/sim/system.hh
index e7407105a..6e44d0ab5 100644
--- a/src/sim/system.hh
+++ b/src/sim/system.hh
@@ -179,6 +179,11 @@ class System : public MemObject
     void setMemoryMode(Enums::MemoryMode mode);
     /** @} */
 
+    /**
+     * Get the cache line size of the system.
+     */
+    unsigned int cacheLineSize() const { return _cacheLineSize; }
+
     PCEventQueue pcEventQueue;
 
     std::vector<ThreadContext *> threadContexts;
@@ -263,6 +268,9 @@ class System : public MemObject
     PhysicalMemory physmem;
 
     Enums::MemoryMode memoryMode;
+
+    const unsigned int _cacheLineSize;
+
     uint64_t workItemsBegin;
     uint64_t workItemsEnd;
     uint32_t numWorkIds;
-- 
2.30.2