From 88554790c34f6fef4ba6285927fb9742b90ab258 Mon Sep 17 00:00:00 2001 From: Andreas Hansson Date: Mon, 15 Oct 2012 08:10:54 -0400 Subject: [PATCH] Mem: Use cycles to express cache-related latencies This patch changes the cache-related latencies from an absolute time expressed in Ticks, to a number of cycles that can be scaled with the clock period of the caches. Ultimately this patch serves to enable future work that involves dynamic frequency scaling. As an immediate benefit it also makes it more convenient to specify cache performance without implicitly assuming a specific CPU core operating frequency. The stat blocked_cycles that actually counter in ticks is now updated to count in cycles. As the timing is now rounded to the clock edges of the cache, there are some regressions that change. Plenty of them have very minor changes, whereas some regressions with a short run-time are perturbed quite significantly. A follow-on patch updates all the statistics for the regressions. --- configs/common/Caches.py | 28 ++++++++---- configs/common/O3_ARM_v7a.py | 21 ++++----- configs/example/fs.py | 3 +- src/mem/cache/BaseCache.py | 16 ++++++- src/mem/cache/base.cc | 4 ++ src/mem/cache/base.hh | 10 ++--- src/mem/cache/cache.hh | 8 ++-- src/mem/cache/cache_impl.hh | 26 +++++------ src/mem/cache/prefetch/Prefetcher.py | 47 ++++++++++++++++++-- src/mem/cache/prefetch/base.cc | 8 ++-- src/mem/cache/prefetch/base.hh | 8 ++-- src/mem/cache/prefetch/ghb.cc | 2 +- src/mem/cache/prefetch/ghb.hh | 2 +- src/mem/cache/prefetch/stride.cc | 2 +- src/mem/cache/prefetch/stride.hh | 2 +- src/mem/cache/prefetch/tagged.cc | 2 +- src/mem/cache/prefetch/tagged.hh | 2 +- src/mem/cache/tags/fa_lru.cc | 4 +- src/mem/cache/tags/fa_lru.hh | 8 ++-- src/mem/cache/tags/iic.cc | 17 ++++--- src/mem/cache/tags/iic.hh | 8 ++-- src/mem/cache/tags/lru.cc | 6 +-- src/mem/cache/tags/lru.hh | 6 +-- tests/configs/inorder-timing.py | 8 ++-- tests/configs/memtest.py | 10 ++--- tests/configs/o3-timing-checker.py | 7 ++- tests/configs/o3-timing-mp.py | 12 ++--- tests/configs/o3-timing.py | 7 ++- tests/configs/pc-o3-timing.py | 18 ++++---- tests/configs/pc-simple-atomic.py | 18 ++++---- tests/configs/pc-simple-timing.py | 18 ++++---- tests/configs/realview-o3-checker.py | 14 +++--- tests/configs/realview-o3-dual.py | 18 ++++---- tests/configs/realview-o3.py | 14 +++--- tests/configs/realview-simple-atomic-dual.py | 18 ++++---- tests/configs/realview-simple-atomic.py | 14 +++--- tests/configs/realview-simple-timing-dual.py | 18 ++++---- tests/configs/realview-simple-timing.py | 14 +++--- tests/configs/simple-atomic-mp.py | 12 ++--- tests/configs/simple-timing-mp.py | 12 ++--- tests/configs/simple-timing.py | 7 +-- tests/configs/tsunami-inorder.py | 14 +++--- tests/configs/tsunami-o3-dual.py | 18 ++++---- tests/configs/tsunami-o3.py | 14 +++--- tests/configs/tsunami-simple-atomic-dual.py | 18 ++++---- tests/configs/tsunami-simple-atomic.py | 14 +++--- tests/configs/tsunami-simple-timing-dual.py | 18 ++++---- tests/configs/tsunami-simple-timing.py | 14 +++--- 48 files changed, 330 insertions(+), 259 deletions(-) diff --git a/configs/common/Caches.py b/configs/common/Caches.py index f16a83559..0b5f9e182 100644 --- a/configs/common/Caches.py +++ b/configs/common/Caches.py @@ -1,3 +1,15 @@ +# Copyright (c) 2012 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# # Copyright (c) 2006-2007 The Regents of The University of Michigan # All rights reserved. # @@ -31,8 +43,8 @@ from m5.objects import * class L1Cache(BaseCache): assoc = 2 block_size = 64 - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 mshrs = 10 tgts_per_mshr = 20 is_top_level = True @@ -40,16 +52,16 @@ class L1Cache(BaseCache): class L2Cache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 20 tgts_per_mshr = 12 class PageTableWalkerCache(BaseCache): assoc = 2 block_size = 64 - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 mshrs = 10 size = '1kB' tgts_per_mshr = 12 @@ -58,8 +70,8 @@ class PageTableWalkerCache(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 diff --git a/configs/common/O3_ARM_v7a.py b/configs/common/O3_ARM_v7a.py index 20ef10ebc..c971df7fb 100644 --- a/configs/common/O3_ARM_v7a.py +++ b/configs/common/O3_ARM_v7a.py @@ -145,10 +145,9 @@ class O3_ARM_v7a_3(DerivO3CPU): defer_registration= False # Instruction Cache -# All latencys assume a 1GHz clock rate, with a faster clock they would be faster class O3_ARM_v7a_ICache(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 1 + response_latency = 1 block_size = 64 mshrs = 2 tgts_per_mshr = 8 @@ -157,10 +156,9 @@ class O3_ARM_v7a_ICache(BaseCache): is_top_level = 'true' # Data Cache -# All latencys assume a 1GHz clock rate, with a faster clock they would be faster class O3_ARM_v7a_DCache(BaseCache): - hit_latency = '2ns' - response_latency = '2ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 6 tgts_per_mshr = 8 @@ -172,8 +170,8 @@ class O3_ARM_v7a_DCache(BaseCache): # TLB Cache # Use a cache as a L2 TLB class O3_ARM_v7aWalkCache(BaseCache): - hit_latency = '4ns' - response_latency = '4ns' + hit_latency = 4 + response_latency = 4 block_size = 64 mshrs = 6 tgts_per_mshr = 8 @@ -184,10 +182,9 @@ class O3_ARM_v7aWalkCache(BaseCache): # L2 Cache -# All latencys assume a 1GHz clock rate, with a faster clock they would be faster class O3_ARM_v7aL2(BaseCache): - hit_latency = '12ns' - response_latency = '12ns' + hit_latency = 12 + response_latency = 12 block_size = 64 mshrs = 16 tgts_per_mshr = 8 @@ -196,5 +193,5 @@ class O3_ARM_v7aL2(BaseCache): write_buffers = 8 prefetch_on_access = 'true' # Simple stride prefetcher - prefetcher = StridePrefetcher(degree=8, latency='1.0ns') + prefetcher = StridePrefetcher(degree=8, latency = 1) diff --git a/configs/example/fs.py b/configs/example/fs.py index 724e32813..ddba8554a 100644 --- a/configs/example/fs.py +++ b/configs/example/fs.py @@ -122,7 +122,8 @@ if bm[0]: else: mem_size = SysConfig().mem() if options.caches or options.l2cache: - test_sys.iocache = IOCache(addr_ranges=[test_sys.physmem.range]) + test_sys.iocache = IOCache(clock = '1GHz', + addr_ranges=[test_sys.physmem.range]) test_sys.iocache.cpu_side = test_sys.iobus.master test_sys.iocache.mem_side = test_sys.membus.slave else: diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py index fde0aa492..6fe73a9c2 100644 --- a/src/mem/cache/BaseCache.py +++ b/src/mem/cache/BaseCache.py @@ -1,3 +1,15 @@ +# Copyright (c) 2012 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# # Copyright (c) 2005-2007 The Regents of The University of Michigan # All rights reserved. # @@ -36,8 +48,8 @@ class BaseCache(MemObject): type = 'BaseCache' assoc = Param.Int("associativity") block_size = Param.Int("block size in bytes") - hit_latency = Param.Latency("The hit latency for this cache") - response_latency = Param.Latency( + hit_latency = Param.Cycles("The hit latency for this cache") + response_latency = Param.Cycles( "Additional cache latency for the return path to core on a miss"); hash_delay = Param.Cycles(1, "time in cycles of hash access") max_miss_count = Param.Counter(0, diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc index 4dd428a2e..c95999f3e 100644 --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -81,6 +81,10 @@ BaseCache::BaseCache(const Params *p) addrRanges(p->addr_ranges.begin(), p->addr_ranges.end()), system(p->system) { + // ensure the clock is not running at an unreasonable clock speed + if (clock == 1) + panic("Cache %s has a cycle time of 1 tick. Specify a clock.\n", + name()); } void diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index da72667b3..2e31836c0 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -229,7 +229,7 @@ class BaseCache : public MemObject /** * The latency of a hit in this device. */ - const Tick hitLatency; + const Cycles hitLatency; /** * The latency of sending reponse to its upper level cache/core on a @@ -237,7 +237,7 @@ class BaseCache : public MemObject * miss is much quicker that the hit latency. The responseLatency parameter * tries to capture this latency. */ - const Tick responseLatency; + const Cycles responseLatency; /** The number of targets for each MSHR. */ const int numTarget; @@ -260,7 +260,7 @@ class BaseCache : public MemObject uint64_t order; /** Stores time the cache blocked for statistics. */ - Tick blockedCycle; + Cycles blockedCycle; /** Pointer to the MSHR that has no targets. */ MSHR *noTargetMSHR; @@ -492,7 +492,7 @@ class BaseCache : public MemObject uint8_t flag = 1 << cause; if (blocked == 0) { blocked_causes[cause]++; - blockedCycle = curTick(); + blockedCycle = curCycle(); cpuSidePort->setBlocked(); } blocked |= flag; @@ -512,7 +512,7 @@ class BaseCache : public MemObject blocked &= ~flag; DPRINTF(Cache,"Unblocking for cause %d, mask=%d\n", cause, blocked); if (blocked == 0) { - blocked_cycles[cause] += curTick() - blockedCycle; + blocked_cycles[cause] += curCycle() - blockedCycle; cpuSidePort->clearBlocked(); } } diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 3c34c10f7..be81736aa 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -206,7 +206,7 @@ class Cache : public BaseCache * @return Boolean indicating whether the request was satisfied. */ bool access(PacketPtr pkt, BlkType *&blk, - int &lat, PacketList &writebacks); + Cycles &lat, PacketList &writebacks); /** *Handle doing the Compare and Swap function for SPARC. @@ -272,7 +272,7 @@ class Cache : public BaseCache /** * Performs the access specified by the request. * @param pkt The request to perform. - * @return The result of the access. + * @return The number of ticks required for the access. */ Tick atomicAccess(PacketPtr pkt); @@ -299,9 +299,9 @@ class Cache : public BaseCache * Snoop for the provided request in the cache and return the estimated * time of completion. * @param pkt The memory request to snoop - * @return The estimated completion time. + * @return The number of cycles required for the snoop. */ - Tick snoopAtomic(PacketPtr pkt); + Cycles snoopAtomic(PacketPtr pkt); /** * Squash all requests associated with specified thread. diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index a22003c4f..44acaef5b 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -275,7 +275,7 @@ Cache::squash(int threadNum) template bool Cache::access(PacketPtr pkt, BlkType *&blk, - int &lat, PacketList &writebacks) + Cycles &lat, PacketList &writebacks) { if (pkt->req->isUncacheable()) { if (pkt->req->isClearLL()) { @@ -392,7 +392,7 @@ Cache::timingAccess(PacketPtr pkt) pendingDelete.clear(); // we charge hitLatency for doing just about anything here - Tick time = curTick() + hitLatency; + Tick time = clockEdge(hitLatency); if (pkt->isResponse()) { // must be cache-to-cache response from upper to lower level @@ -463,7 +463,7 @@ Cache::timingAccess(PacketPtr pkt) return true; } - int lat = hitLatency; + Cycles lat = hitLatency; BlkType *blk = NULL; PacketList writebacks; @@ -505,7 +505,7 @@ Cache::timingAccess(PacketPtr pkt) if (needsResponse) { pkt->makeTimingResponse(); - cpuSidePort->schedTimingResp(pkt, curTick()+lat); + cpuSidePort->schedTimingResp(pkt, clockEdge(lat)); } else { /// @todo nominally we should just delete the packet here, /// however, until 4-phase stuff we can't because sending @@ -637,7 +637,7 @@ template Tick Cache::atomicAccess(PacketPtr pkt) { - int lat = hitLatency; + Cycles lat = hitLatency; // @TODO: make this a parameter bool last_level_cache = false; @@ -657,7 +657,7 @@ Cache::atomicAccess(PacketPtr pkt) if (!last_level_cache) { DPRINTF(Cache, "forwarding mem-inhibited %s on 0x%x\n", pkt->cmdString(), pkt->getAddr()); - lat += memSidePort->sendAtomic(pkt); + lat += ticksToCycles(memSidePort->sendAtomic(pkt)); } } else { DPRINTF(Cache, "rcvd mem-inhibited %s on 0x%x: not responding\n", @@ -693,7 +693,7 @@ Cache::atomicAccess(PacketPtr pkt) CacheBlk::State old_state = blk ? blk->status : 0; #endif - lat += memSidePort->sendAtomic(bus_pkt); + lat += ticksToCycles(memSidePort->sendAtomic(bus_pkt)); DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n", bus_pkt->cmdString(), bus_pkt->getAddr(), old_state); @@ -821,7 +821,7 @@ template void Cache::handleResponse(PacketPtr pkt) { - Tick time = curTick() + hitLatency; + Tick time = clockEdge(hitLatency); MSHR *mshr = dynamic_cast(pkt->senderState); bool is_error = pkt->isError(); @@ -901,7 +901,7 @@ Cache::handleResponse(PacketPtr pkt) // responseLatency is the latency of the return path // from lower level caches/memory to an upper level cache or // the core. - completion_time = responseLatency + + completion_time = responseLatency * clock + (transfer_offset ? pkt->finishTime : pkt->firstWordTime); assert(!target->pkt->req->isUncacheable()); @@ -917,13 +917,13 @@ Cache::handleResponse(PacketPtr pkt) // responseLatency is the latency of the return path // from lower level caches/memory to an upper level cache or // the core. - completion_time = responseLatency + pkt->finishTime; + completion_time = responseLatency * clock + pkt->finishTime; target->pkt->req->setExtraData(0); } else { // not a cache fill, just forwarding response // responseLatency is the latency of the return path // from lower level cahces/memory to the core. - completion_time = responseLatency + pkt->finishTime; + completion_time = responseLatency * clock + pkt->finishTime; if (pkt->isRead() && !is_error) { target->pkt->setData(pkt->getPtr()); } @@ -1173,7 +1173,7 @@ doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data, // invalidate it. pkt->cmd = MemCmd::ReadRespWithInvalidate; } - memSidePort->schedTimingSnoopResp(pkt, curTick() + hitLatency); + memSidePort->schedTimingSnoopResp(pkt, clockEdge(hitLatency)); } template @@ -1366,7 +1366,7 @@ Cache::CpuSidePort::recvTimingSnoopResp(PacketPtr pkt) } template -Tick +Cycles Cache::snoopAtomic(PacketPtr pkt) { if (pkt->req->isUncacheable() || pkt->cmd == MemCmd::Writeback) { diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py index fa926e235..e590410ae 100644 --- a/src/mem/cache/prefetch/Prefetcher.py +++ b/src/mem/cache/prefetch/Prefetcher.py @@ -1,8 +1,48 @@ -from m5.SimObject import SimObject +# Copyright (c) 2012 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ron Dreslinski + +from ClockedObject import ClockedObject from m5.params import * from m5.proxy import * -class BasePrefetcher(SimObject): +class BasePrefetcher(ClockedObject): type = 'BasePrefetcher' abstract = True size = Param.Int(100, @@ -13,8 +53,7 @@ class BasePrefetcher(SimObject): "Squash prefetches with a later time on a subsequent miss") degree = Param.Int(1, "Degree of the prefetch depth") - latency = Param.Latency('10t', - "Latency of the prefetcher") + latency = Param.Cycles('1', "Latency of the prefetcher") use_master_id = Param.Bool(True, "Use the master id to separate calculations of prefetches") data_accesses_only = Param.Bool(False, diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc index f3ac5f046..be05a464f 100644 --- a/src/mem/cache/prefetch/base.cc +++ b/src/mem/cache/prefetch/base.cc @@ -45,7 +45,7 @@ #include "sim/system.hh" BasePrefetcher::BasePrefetcher(const Params *p) - : SimObject(p), size(p->size), latency(p->latency), degree(p->degree), + : ClockedObject(p), size(p->size), latency(p->latency), degree(p->degree), useMasterId(p->use_master_id), pageStop(!p->cross_pages), serialSquash(p->serial_squash), onlyData(p->data_accesses_only), system(p->sys), masterId(system->getMasterId(name())) @@ -212,11 +212,11 @@ BasePrefetcher::notify(PacketPtr &pkt, Tick time) std::list addresses; - std::list delays; + std::list delays; calculatePrefetch(pkt, addresses, delays); std::list::iterator addrIter = addresses.begin(); - std::list::iterator delayIter = delays.begin(); + std::list::iterator delayIter = delays.begin(); for (; addrIter != addresses.end(); ++addrIter, ++delayIter) { Addr addr = *addrIter; @@ -241,7 +241,7 @@ BasePrefetcher::notify(PacketPtr &pkt, Tick time) prefetch->req->setThreadContext(pkt->req->contextId(), pkt->req->threadId()); - prefetch->time = time + (*delayIter); // @todo ADD LATENCY HERE + prefetch->time = time + clock * *delayIter; // We just remove the head if we are full if (pf.size() == size) { diff --git a/src/mem/cache/prefetch/base.hh b/src/mem/cache/prefetch/base.hh index 1517be50c..99385c1c1 100644 --- a/src/mem/cache/prefetch/base.hh +++ b/src/mem/cache/prefetch/base.hh @@ -41,11 +41,11 @@ #include "base/statistics.hh" #include "mem/packet.hh" #include "params/BaseCache.hh" -#include "sim/sim_object.hh" +#include "sim/clocked_object.hh" class BaseCache; -class BasePrefetcher : public SimObject +class BasePrefetcher : public ClockedObject { protected: @@ -64,7 +64,7 @@ class BasePrefetcher : public SimObject int blkSize; /** The latency before a prefetch is issued */ - Tick latency; + const Cycles latency; /** The number of prefetches to issue */ unsigned degree; @@ -133,7 +133,7 @@ class BasePrefetcher : public SimObject virtual void calculatePrefetch(PacketPtr &pkt, std::list &addresses, - std::list &delays) = 0; + std::list &delays) = 0; std::list::iterator inPrefetch(Addr address); diff --git a/src/mem/cache/prefetch/ghb.cc b/src/mem/cache/prefetch/ghb.cc index 8e42a4e2b..9ceb051a7 100644 --- a/src/mem/cache/prefetch/ghb.cc +++ b/src/mem/cache/prefetch/ghb.cc @@ -40,7 +40,7 @@ void GHBPrefetcher::calculatePrefetch(PacketPtr &pkt, std::list &addresses, - std::list &delays) + std::list &delays) { Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1); int master_id = useMasterId ? pkt->req->masterId() : 0; diff --git a/src/mem/cache/prefetch/ghb.hh b/src/mem/cache/prefetch/ghb.hh index ff713876a..3e4123de0 100644 --- a/src/mem/cache/prefetch/ghb.hh +++ b/src/mem/cache/prefetch/ghb.hh @@ -57,7 +57,7 @@ class GHBPrefetcher : public BasePrefetcher ~GHBPrefetcher() {} void calculatePrefetch(PacketPtr &pkt, std::list &addresses, - std::list &delays); + std::list &delays); }; #endif // __MEM_CACHE_PREFETCH_GHB_PREFETCHER_HH__ diff --git a/src/mem/cache/prefetch/stride.cc b/src/mem/cache/prefetch/stride.cc index feaa9494e..cb67f50f8 100644 --- a/src/mem/cache/prefetch/stride.cc +++ b/src/mem/cache/prefetch/stride.cc @@ -40,7 +40,7 @@ void StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list &addresses, - std::list &delays) + std::list &delays) { if (!pkt->req->hasPC()) { DPRINTF(HWPrefetch, "ignoring request with no PC"); diff --git a/src/mem/cache/prefetch/stride.hh b/src/mem/cache/prefetch/stride.hh index 51b4252a1..89ac7acad 100644 --- a/src/mem/cache/prefetch/stride.hh +++ b/src/mem/cache/prefetch/stride.hh @@ -75,7 +75,7 @@ class StridePrefetcher : public BasePrefetcher ~StridePrefetcher() {} void calculatePrefetch(PacketPtr &pkt, std::list &addresses, - std::list &delays); + std::list &delays); }; #endif // __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__ diff --git a/src/mem/cache/prefetch/tagged.cc b/src/mem/cache/prefetch/tagged.cc index c875b586b..f34b28b14 100644 --- a/src/mem/cache/prefetch/tagged.cc +++ b/src/mem/cache/prefetch/tagged.cc @@ -43,7 +43,7 @@ TaggedPrefetcher::TaggedPrefetcher(const Params *p) void TaggedPrefetcher:: calculatePrefetch(PacketPtr &pkt, std::list &addresses, - std::list &delays) + std::list &delays) { Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1); diff --git a/src/mem/cache/prefetch/tagged.hh b/src/mem/cache/prefetch/tagged.hh index 8037196f8..a93d424e3 100644 --- a/src/mem/cache/prefetch/tagged.hh +++ b/src/mem/cache/prefetch/tagged.hh @@ -49,7 +49,7 @@ class TaggedPrefetcher : public BasePrefetcher ~TaggedPrefetcher() {} void calculatePrefetch(PacketPtr &pkt, std::list &addresses, - std::list &delays); + std::list &delays); }; #endif // __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__ diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc index cc2f12eef..1a607dc80 100644 --- a/src/mem/cache/tags/fa_lru.cc +++ b/src/mem/cache/tags/fa_lru.cc @@ -42,7 +42,7 @@ using namespace std; -FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency) +FALRU::FALRU(unsigned _blkSize, unsigned _size, Cycles hit_latency) : blkSize(_blkSize), size(_size), hitLatency(hit_latency) { if (!isPowerOf2(blkSize)) @@ -159,7 +159,7 @@ FALRU::invalidate(FALRU::BlkType *blk) } FALRUBlk* -FALRU::accessBlock(Addr addr, int &lat, int context_src, int *inCache) +FALRU::accessBlock(Addr addr, Cycles &lat, int context_src, int *inCache) { accesses++; int tmp_in_cache = 0; diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh index 66f70a89b..19e21688c 100644 --- a/src/mem/cache/tags/fa_lru.hh +++ b/src/mem/cache/tags/fa_lru.hh @@ -85,7 +85,7 @@ class FALRU : public BaseTags /** The size of the cache. */ const unsigned size; /** The hit latency of the cache. */ - const unsigned hitLatency; + const Cycles hitLatency; /** Array of pointers to blocks at the cache size boundaries. */ FALRUBlk **cacheBoundaries; @@ -155,7 +155,7 @@ public: * @param size The size of the cache. * @param hit_latency The hit latency of the cache. */ - FALRU(unsigned blkSize, unsigned size, unsigned hit_latency); + FALRU(unsigned blkSize, unsigned size, Cycles hit_latency); ~FALRU(); /** @@ -181,7 +181,7 @@ public: * @param inCache The FALRUBlk::inCache flags. * @return Pointer to the cache block. */ - FALRUBlk* accessBlock(Addr addr, int &lat, int context_src, int *inCache = 0); + FALRUBlk* accessBlock(Addr addr, Cycles &lat, int context_src, int *inCache = 0); /** * Find the block in the cache, do not update the replacement data. @@ -205,7 +205,7 @@ public: * Return the hit latency of this cache. * @return The hit latency. */ - int getHitLatency() const + Cycles getHitLatency() const { return hitLatency; } diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc index 45807ef28..b9e582c29 100644 --- a/src/mem/cache/tags/iic.cc +++ b/src/mem/cache/tags/iic.cc @@ -220,11 +220,11 @@ IIC::regStats(const string &name) IICTag* -IIC::accessBlock(Addr addr, int &lat, int context_src) +IIC::accessBlock(Addr addr, Cycles &lat, int context_src) { Addr tag = extractTag(addr); unsigned set = hash(addr); - int set_lat; + Cycles set_lat; unsigned long chain_ptr = tagNull; @@ -232,11 +232,11 @@ IIC::accessBlock(Addr addr, int &lat, int context_src) setAccess.sample(set); IICTag *tag_ptr = sets[set].findTag(tag, chain_ptr); - set_lat = 1; + set_lat = Cycles(1); if (tag_ptr == NULL && chain_ptr != tagNull) { int secondary_depth; tag_ptr = secondaryChain(tag, chain_ptr, &secondary_depth); - set_lat += secondary_depth; + set_lat += Cycles(secondary_depth); // set depth for statistics fix this later!!! egh sets[set].depth = set_lat; @@ -250,9 +250,7 @@ IIC::accessBlock(Addr addr, int &lat, int context_src) } } - // @todo: is hashDelay is really cycles, then - // multiply with period - set_lat = set_lat * hashDelay + hitLatency; + set_lat = Cycles(set_lat * hashDelay + hitLatency); if (tag_ptr != NULL) { // IIC replacement: if this is not the first element of // list, reorder @@ -263,8 +261,9 @@ IIC::accessBlock(Addr addr, int &lat, int context_src) hitDepthTotal += sets[set].depth; tag_ptr->status |= BlkReferenced; lat = set_lat; - if (tag_ptr->whenReady > curTick() && tag_ptr->whenReady - curTick() > set_lat) { - lat = tag_ptr->whenReady - curTick(); + if (tag_ptr->whenReady > curTick() && + cache->ticksToCycles(tag_ptr->whenReady - curTick()) > set_lat) { + lat = cache->ticksToCycles(tag_ptr->whenReady - curTick()); } tag_ptr->refCount += 1; diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh index 97011d1c5..91e947704 100644 --- a/src/mem/cache/tags/iic.hh +++ b/src/mem/cache/tags/iic.hh @@ -176,7 +176,7 @@ class IIC : public BaseTags /** The associativity of the primary table. */ const unsigned assoc; /** The base hit latency. */ - const unsigned hitLatency; + const Cycles hitLatency; /** The subblock size, used for compression. */ const unsigned subSize; @@ -278,9 +278,9 @@ class IIC : public BaseTags /** The associativity of the primary table. */ unsigned assoc; /** The number of cycles for each hash lookup. */ - unsigned hashDelay; + Cycles hashDelay; /** The number of cycles to read the data. */ - unsigned hitLatency; + Cycles hitLatency; /** The replacement policy. */ Repl *rp; /** The subblock size in bytes. */ @@ -420,7 +420,7 @@ class IIC : public BaseTags * @param lat The access latency. * @return A pointer to the block found, if any. */ - IICTag* accessBlock(Addr addr, int &lat, int context_src); + IICTag* accessBlock(Addr addr, Cycles &lat, int context_src); /** * Find the block, do not update the replacement data. diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index 8d32d4b35..00b13e2d8 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -116,7 +116,7 @@ LRU::~LRU() } LRU::BlkType* -LRU::accessBlock(Addr addr, int &lat, int master_id) +LRU::accessBlock(Addr addr, Cycles &lat, int master_id) { Addr tag = extractTag(addr); unsigned set = extractSet(addr); @@ -128,8 +128,8 @@ LRU::accessBlock(Addr addr, int &lat, int master_id) DPRINTF(CacheRepl, "set %x: moving blk %x to MRU\n", set, regenerateBlkAddr(tag, set)); if (blk->whenReady > curTick() - && blk->whenReady - curTick() > hitLatency) { - lat = blk->whenReady - curTick(); + && cache->ticksToCycles(blk->whenReady - curTick()) > hitLatency) { + lat = cache->ticksToCycles(blk->whenReady - curTick()); } blk->refCount += 1; } diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh index 7938fcc3c..427dba667 100644 --- a/src/mem/cache/tags/lru.hh +++ b/src/mem/cache/tags/lru.hh @@ -68,7 +68,7 @@ class LRU : public BaseTags /** The associativity of the cache. */ const unsigned assoc; /** The hit latency. */ - const unsigned hitLatency; + const Cycles hitLatency; /** The cache sets. */ CacheSet *sets; @@ -139,7 +139,7 @@ public: * @param lat The access latency. * @return Pointer to the cache block if found. */ - BlkType* accessBlock(Addr addr, int &lat, int context_src); + BlkType* accessBlock(Addr addr, Cycles &lat, int context_src); /** * Finds the given address in the cache, do not update replacement data. @@ -221,7 +221,7 @@ public: * Return the hit latency. * @return the hit latency. */ - int getHitLatency() const + Cycles getHitLatency() const { return hitLatency; } diff --git a/tests/configs/inorder-timing.py b/tests/configs/inorder-timing.py index af7609e9f..5f2156ff9 100644 --- a/tests/configs/inorder-timing.py +++ b/tests/configs/inorder-timing.py @@ -33,8 +33,8 @@ m5.util.addToPath('../configs/common') class MyCache(BaseCache): assoc = 2 block_size = 64 - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 mshrs = 10 tgts_per_mshr = 5 @@ -44,8 +44,8 @@ class MyL1Cache(MyCache): cpu = InOrderCPU(cpu_id=0) cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'), MyL1Cache(size = '256kB'), - MyCache(size = '2MB', hit_latency='10ns', - response_latency='10ns')) + MyCache(size = '2MB', hit_latency = 20, + response_latency = 20)) cpu.clock = '2GHz' diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py index 5d60ee0ea..4db2d4f2a 100644 --- a/tests/configs/memtest.py +++ b/tests/configs/memtest.py @@ -34,8 +34,8 @@ from m5.objects import * # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 12 tgts_per_mshr = 8 @@ -47,8 +47,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -65,7 +65,7 @@ system = System(cpu = cpus, funcmem = SimpleMemory(in_addr_map = False), # l2cache & bus system.toL2Bus = CoherentBus(clock="2GHz", width=16) -system.l2c = L2(size='64kB', assoc=8) +system.l2c = L2(clock = '2GHz', size='64kB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master # connect l2c to membus diff --git a/tests/configs/o3-timing-checker.py b/tests/configs/o3-timing-checker.py index 866d57851..a54c9b7ca 100644 --- a/tests/configs/o3-timing-checker.py +++ b/tests/configs/o3-timing-checker.py @@ -42,8 +42,8 @@ m5.util.addToPath('../configs/common') class MyCache(BaseCache): assoc = 2 block_size = 64 - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 mshrs = 10 tgts_per_mshr = 5 @@ -57,6 +57,9 @@ cpu.addCheckerCpu() cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'), MyL1Cache(size = '256kB'), MyCache(size = '2MB')) +# @todo Note that the L2 latency here is unmodified and 2 cycles, +# should set hit latency and response latency to 20 cycles as for +# other scripts cpu.clock = '2GHz' system = System(cpu = cpu, diff --git a/tests/configs/o3-timing-mp.py b/tests/configs/o3-timing-mp.py index 1b3207311..0b10f5766 100644 --- a/tests/configs/o3-timing-mp.py +++ b/tests/configs/o3-timing-mp.py @@ -35,8 +35,8 @@ m5.util.addToPath('../configs/common') # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 20 @@ -48,8 +48,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -61,8 +61,8 @@ cpus = [ DerivO3CPU(cpu_id=i) for i in xrange(nb_cores) ] system = System(cpu = cpus, physmem = SimpleMemory(), membus = CoherentBus()) # l2cache & bus -system.toL2Bus = CoherentBus() -system.l2c = L2(size='4MB', assoc=8) +system.toL2Bus = CoherentBus(clock = '2GHz') +system.l2c = L2(clock = '2GHz', size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master # connect l2c to membus diff --git a/tests/configs/o3-timing.py b/tests/configs/o3-timing.py index 0646f1c26..f87e0e355 100644 --- a/tests/configs/o3-timing.py +++ b/tests/configs/o3-timing.py @@ -33,8 +33,8 @@ m5.util.addToPath('../configs/common') class MyCache(BaseCache): assoc = 2 block_size = 64 - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 mshrs = 10 tgts_per_mshr = 5 @@ -46,6 +46,9 @@ cpu = DerivO3CPU(cpu_id=0) cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'), MyL1Cache(size = '256kB'), MyCache(size = '2MB')) +# @todo Note that the L2 latency here is unmodified and 2 cycles, +# should set hit latency and response latency to 20 cycles as for +# other scripts cpu.clock = '2GHz' system = System(cpu = cpu, diff --git a/tests/configs/pc-o3-timing.py b/tests/configs/pc-o3-timing.py index f75c5776d..729f3bd8f 100644 --- a/tests/configs/pc-o3-timing.py +++ b/tests/configs/pc-o3-timing.py @@ -39,8 +39,8 @@ mem_size = '128MB' # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 20 @@ -52,8 +52,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -64,8 +64,8 @@ class L2(BaseCache): class PageTableWalkerCache(BaseCache): assoc = 2 block_size = 64 - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 mshrs = 10 size = '1kB' tgts_per_mshr = 12 @@ -76,8 +76,8 @@ class PageTableWalkerCache(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -94,7 +94,7 @@ system.kernel = FSConfig.binary('x86_64-vmlinux-2.6.22.9') system.cpu = cpu #create the iocache -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave diff --git a/tests/configs/pc-simple-atomic.py b/tests/configs/pc-simple-atomic.py index b628992ec..7dfec362e 100644 --- a/tests/configs/pc-simple-atomic.py +++ b/tests/configs/pc-simple-atomic.py @@ -39,8 +39,8 @@ mem_size = '128MB' # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 8 @@ -52,8 +52,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -64,8 +64,8 @@ class L2(BaseCache): class PageTableWalkerCache(BaseCache): assoc = 2 block_size = 64 - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 mshrs = 10 size = '1kB' tgts_per_mshr = 12 @@ -77,8 +77,8 @@ class PageTableWalkerCache(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -96,7 +96,7 @@ system.kernel = FSConfig.binary('x86_64-vmlinux-2.6.22.9') system.cpu = cpu #create the iocache -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave diff --git a/tests/configs/pc-simple-timing.py b/tests/configs/pc-simple-timing.py index 8a44300e5..bfbf926dc 100644 --- a/tests/configs/pc-simple-timing.py +++ b/tests/configs/pc-simple-timing.py @@ -40,8 +40,8 @@ mem_size = '128MB' # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 8 @@ -53,8 +53,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -65,8 +65,8 @@ class L2(BaseCache): class PageTableWalkerCache(BaseCache): assoc = 2 block_size = 64 - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 mshrs = 10 size = '1kB' tgts_per_mshr = 12 @@ -77,8 +77,8 @@ class PageTableWalkerCache(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -95,7 +95,7 @@ system.kernel = FSConfig.binary('x86_64-vmlinux-2.6.22.9') system.cpu = cpu #create the iocache -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave diff --git a/tests/configs/realview-o3-checker.py b/tests/configs/realview-o3-checker.py index 961a4a698..a791b3983 100644 --- a/tests/configs/realview-o3-checker.py +++ b/tests/configs/realview-o3-checker.py @@ -46,8 +46,8 @@ import FSConfig # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 20 @@ -59,8 +59,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -71,8 +71,8 @@ class L2(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -89,7 +89,7 @@ system.cpu = cpu cpu.addCheckerCpu() #create the iocache -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave diff --git a/tests/configs/realview-o3-dual.py b/tests/configs/realview-o3-dual.py index aa756c07f..599ef6f8c 100644 --- a/tests/configs/realview-o3-dual.py +++ b/tests/configs/realview-o3-dual.py @@ -37,8 +37,8 @@ from Benchmarks import * # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 20 @@ -50,8 +50,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -62,8 +62,8 @@ class L2(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -74,16 +74,16 @@ class IOCache(BaseCache): cpus = [DerivO3CPU(cpu_id=i) for i in xrange(2) ] #the system system = FSConfig.makeArmSystem('timing', "RealView_PBX", None, False) -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave system.cpu = cpus #create the l1/l2 bus -system.toL2Bus = CoherentBus() +system.toL2Bus = CoherentBus(clock = '2GHz') #connect up the l2 cache -system.l2c = L2(size='4MB', assoc=8) +system.l2c = L2(clock = '2GHz', size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master system.l2c.mem_side = system.membus.slave diff --git a/tests/configs/realview-o3.py b/tests/configs/realview-o3.py index 24e5ca82b..698c6dd14 100644 --- a/tests/configs/realview-o3.py +++ b/tests/configs/realview-o3.py @@ -37,8 +37,8 @@ import FSConfig # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 20 @@ -50,8 +50,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -62,8 +62,8 @@ class L2(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -78,7 +78,7 @@ system = FSConfig.makeArmSystem('timing', "RealView_PBX", None, False) system.cpu = cpu #create the iocache -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave diff --git a/tests/configs/realview-simple-atomic-dual.py b/tests/configs/realview-simple-atomic-dual.py index 67d0c2f32..9bf2fd70d 100644 --- a/tests/configs/realview-simple-atomic-dual.py +++ b/tests/configs/realview-simple-atomic-dual.py @@ -37,8 +37,8 @@ from Benchmarks import * # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 8 @@ -50,8 +50,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -62,8 +62,8 @@ class L2(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -74,16 +74,16 @@ class IOCache(BaseCache): cpus = [AtomicSimpleCPU(cpu_id=i) for i in xrange(2) ] #the system system = FSConfig.makeArmSystem('atomic', "RealView_PBX", None, False) -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave system.cpu = cpus #create the l1/l2 bus -system.toL2Bus = CoherentBus() +system.toL2Bus = CoherentBus(clock = '2GHz') #connect up the l2 cache -system.l2c = L2(size='4MB', assoc=8) +system.l2c = L2(clock = '2GHz', size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master system.l2c.mem_side = system.membus.slave diff --git a/tests/configs/realview-simple-atomic.py b/tests/configs/realview-simple-atomic.py index 55c5d2409..05ef0dd63 100644 --- a/tests/configs/realview-simple-atomic.py +++ b/tests/configs/realview-simple-atomic.py @@ -36,8 +36,8 @@ import FSConfig # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 8 @@ -49,8 +49,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -61,8 +61,8 @@ class L2(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -77,7 +77,7 @@ system = FSConfig.makeArmSystem('atomic', "RealView_PBX", None, False) system.cpu = cpu #create the iocache -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave diff --git a/tests/configs/realview-simple-timing-dual.py b/tests/configs/realview-simple-timing-dual.py index 939602fb5..ee5ffaf06 100644 --- a/tests/configs/realview-simple-timing-dual.py +++ b/tests/configs/realview-simple-timing-dual.py @@ -37,8 +37,8 @@ from Benchmarks import * # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 8 @@ -50,8 +50,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -62,8 +62,8 @@ class L2(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -74,16 +74,16 @@ class IOCache(BaseCache): cpus = [TimingSimpleCPU(cpu_id=i) for i in xrange(2) ] #the system system = FSConfig.makeArmSystem('timing', "RealView_PBX", None, False) -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave system.cpu = cpus #create the l1/l2 bus -system.toL2Bus = CoherentBus() +system.toL2Bus = CoherentBus(clock = '2GHz') #connect up the l2 cache -system.l2c = L2(size='4MB', assoc=8) +system.l2c = L2(clock = '2GHz', size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master system.l2c.mem_side = system.membus.slave diff --git a/tests/configs/realview-simple-timing.py b/tests/configs/realview-simple-timing.py index b5db3e10b..28cd3163f 100644 --- a/tests/configs/realview-simple-timing.py +++ b/tests/configs/realview-simple-timing.py @@ -37,8 +37,8 @@ import FSConfig # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 8 @@ -50,8 +50,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -62,8 +62,8 @@ class L2(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -78,7 +78,7 @@ system = FSConfig.makeArmSystem('timing', "RealView_PBX", None, False) system.cpu = cpu #create the iocache -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py index 6c86eff2d..6dde4ed68 100644 --- a/tests/configs/simple-atomic-mp.py +++ b/tests/configs/simple-atomic-mp.py @@ -34,8 +34,8 @@ from m5.objects import * # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 8 @@ -47,8 +47,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -62,8 +62,8 @@ system = System(cpu = cpus, membus = CoherentBus()) # l2cache & bus -system.toL2Bus = CoherentBus() -system.l2c = L2(size='4MB', assoc=8) +system.toL2Bus = CoherentBus(clock = '2GHz') +system.l2c = L2(clock = '2GHz', size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master # connect l2c to membus diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py index 559cf807a..3e5e92d8c 100644 --- a/tests/configs/simple-timing-mp.py +++ b/tests/configs/simple-timing-mp.py @@ -34,8 +34,8 @@ from m5.objects import * # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 8 @@ -47,8 +47,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -60,8 +60,8 @@ cpus = [ TimingSimpleCPU(cpu_id=i) for i in xrange(nb_cores) ] system = System(cpu = cpus, physmem = SimpleMemory(), membus = CoherentBus()) # l2cache & bus -system.toL2Bus = CoherentBus() -system.l2c = L2(size='4MB', assoc=8) +system.toL2Bus = CoherentBus(clock = '2GHz') +system.l2c = L2(clock = '2GHz', size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master # connect l2c to membus diff --git a/tests/configs/simple-timing.py b/tests/configs/simple-timing.py index cb40ca5c3..beeadced9 100644 --- a/tests/configs/simple-timing.py +++ b/tests/configs/simple-timing.py @@ -32,8 +32,8 @@ from m5.objects import * class MyCache(BaseCache): assoc = 2 block_size = 64 - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 mshrs = 10 tgts_per_mshr = 5 @@ -43,7 +43,8 @@ class MyL1Cache(MyCache): cpu = TimingSimpleCPU(cpu_id=0) cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'), MyL1Cache(size = '256kB'), - MyCache(size = '2MB', hit_latency='10ns', response_latency ='10ns')) + MyCache(size = '2MB', hit_latency= 20, + response_latency = 20)) system = System(cpu = cpu, physmem = SimpleMemory(), membus = CoherentBus()) diff --git a/tests/configs/tsunami-inorder.py b/tests/configs/tsunami-inorder.py index b32a1ff17..163ac3ba6 100644 --- a/tests/configs/tsunami-inorder.py +++ b/tests/configs/tsunami-inorder.py @@ -37,8 +37,8 @@ import FSConfig # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 8 @@ -50,8 +50,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -62,8 +62,8 @@ class L2(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -82,7 +82,7 @@ system = FSConfig.makeLinuxAlphaSystem('timing') system.cpu = cpu #create the iocache -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave diff --git a/tests/configs/tsunami-o3-dual.py b/tests/configs/tsunami-o3-dual.py index a40c44c9b..5ba14753d 100644 --- a/tests/configs/tsunami-o3-dual.py +++ b/tests/configs/tsunami-o3-dual.py @@ -37,8 +37,8 @@ import FSConfig # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 20 @@ -50,8 +50,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -62,8 +62,8 @@ class L2(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -78,14 +78,14 @@ system = FSConfig.makeLinuxAlphaSystem('timing') system.cpu = cpus #create the l1/l2 bus -system.toL2Bus = CoherentBus() -system.iocache = IOCache() +system.toL2Bus = CoherentBus(clock = '2GHz') +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave #connect up the l2 cache -system.l2c = L2(size='4MB', assoc=8) +system.l2c = L2(clock = '2GHz', size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master system.l2c.mem_side = system.membus.slave diff --git a/tests/configs/tsunami-o3.py b/tests/configs/tsunami-o3.py index 75ff66218..2920c878a 100644 --- a/tests/configs/tsunami-o3.py +++ b/tests/configs/tsunami-o3.py @@ -37,8 +37,8 @@ import FSConfig # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 20 @@ -50,8 +50,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -62,8 +62,8 @@ class L2(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -79,7 +79,7 @@ system = FSConfig.makeLinuxAlphaSystem('timing') system.cpu = cpu #create the iocache -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py index e08a1ee0d..851ec847b 100644 --- a/tests/configs/tsunami-simple-atomic-dual.py +++ b/tests/configs/tsunami-simple-atomic-dual.py @@ -36,8 +36,8 @@ import FSConfig # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 8 @@ -49,8 +49,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -61,8 +61,8 @@ class L2(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -74,16 +74,16 @@ class IOCache(BaseCache): cpus = [ AtomicSimpleCPU(cpu_id=i) for i in xrange(2) ] #the system system = FSConfig.makeLinuxAlphaSystem('atomic') -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave system.cpu = cpus #create the l1/l2 bus -system.toL2Bus = CoherentBus() +system.toL2Bus = CoherentBus(clock = '2GHz') #connect up the l2 cache -system.l2c = L2(size='4MB', assoc=8) +system.l2c = L2(clock = '2GHz', size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master system.l2c.mem_side = system.membus.slave diff --git a/tests/configs/tsunami-simple-atomic.py b/tests/configs/tsunami-simple-atomic.py index 7d8743493..c583f9d2b 100644 --- a/tests/configs/tsunami-simple-atomic.py +++ b/tests/configs/tsunami-simple-atomic.py @@ -36,8 +36,8 @@ import FSConfig # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 8 @@ -49,8 +49,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -61,8 +61,8 @@ class L2(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -78,7 +78,7 @@ system = FSConfig.makeLinuxAlphaSystem('atomic') system.cpu = cpu #create the iocache -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py index 71d231e58..e69db3438 100644 --- a/tests/configs/tsunami-simple-timing-dual.py +++ b/tests/configs/tsunami-simple-timing-dual.py @@ -36,8 +36,8 @@ import FSConfig # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 8 @@ -49,8 +49,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -61,8 +61,8 @@ class L2(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -74,16 +74,16 @@ class IOCache(BaseCache): cpus = [ TimingSimpleCPU(cpu_id=i) for i in xrange(2) ] #the system system = FSConfig.makeLinuxAlphaSystem('timing') -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave system.cpu = cpus #create the l1/l2 bus -system.toL2Bus = CoherentBus() +system.toL2Bus = CoherentBus(clock = '2GHz') #connect up the l2 cache -system.l2c = L2(size='4MB', assoc=8) +system.l2c = L2(clock = '2GHz', size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master system.l2c.mem_side = system.membus.slave diff --git a/tests/configs/tsunami-simple-timing.py b/tests/configs/tsunami-simple-timing.py index b6378eb61..e71d75846 100644 --- a/tests/configs/tsunami-simple-timing.py +++ b/tests/configs/tsunami-simple-timing.py @@ -37,8 +37,8 @@ import FSConfig # ==================== class L1(BaseCache): - hit_latency = '1ns' - response_latency = '1ns' + hit_latency = 2 + response_latency = 2 block_size = 64 mshrs = 4 tgts_per_mshr = 8 @@ -50,8 +50,8 @@ class L1(BaseCache): class L2(BaseCache): block_size = 64 - hit_latency = '10ns' - response_latency = '10ns' + hit_latency = 20 + response_latency = 20 mshrs = 92 tgts_per_mshr = 16 write_buffers = 8 @@ -62,8 +62,8 @@ class L2(BaseCache): class IOCache(BaseCache): assoc = 8 block_size = 64 - hit_latency = '50ns' - response_latency = '50ns' + hit_latency = 50 + response_latency = 50 mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -79,7 +79,7 @@ system = FSConfig.makeLinuxAlphaSystem('timing') system.cpu = cpu #create the iocache -system.iocache = IOCache() +system.iocache = IOCache(clock = '1GHz') system.iocache.cpu_side = system.iobus.master system.iocache.mem_side = system.membus.slave -- 2.30.2