From ca89eba79ebe0adc9cea7656c288e0381754171a Mon Sep 17 00:00:00 2001 From: Matt Horsnell Date: Fri, 24 Jan 2014 15:29:30 -0600 Subject: [PATCH] mem: track per-request latencies and access depths in the cache hierarchy Add some values and methods to the request object to track the translation and access latency for a request and which level of the cache hierarchy responded to the request. --- src/cpu/o3/fetch_impl.hh | 1 + src/cpu/o3/lsq_unit_impl.hh | 2 ++ src/cpu/simple/timing.cc | 6 ++++- src/cpu/translation.hh | 1 + src/mem/cache/base.hh | 2 +- src/mem/request.hh | 46 ++++++++++++++++++++++++++++++++++++- 6 files changed, 55 insertions(+), 3 deletions(-) diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index b35dd80f3..b121ba707 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -400,6 +400,7 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt) fetchStatus[tid] = IcacheAccessComplete; } + pkt->req->setAccessLatency(); // Reset the mem req to NULL. delete pkt->req; delete pkt; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 77b67ac69..ade076995 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -129,6 +129,8 @@ LSQUnit::completeDataAccess(PacketPtr pkt) delete state->mainPkt->req; delete state->mainPkt; } + + pkt->req->setAccessLatency(); delete state; delete pkt->req; delete pkt; diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 744bf8397..9253d8005 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -646,7 +646,6 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt) // received a response from the icache: execute the received // instruction - assert(!pkt || !pkt->isError()); assert(_status == IcacheWaitResponse); @@ -655,6 +654,10 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt) numCycles += curCycle() - previousCycle; previousCycle = curCycle(); + if (pkt) + pkt->req->setAccessLatency(); + + preExecute(); if (curStaticInst && curStaticInst->isMemRef()) { // load or store: just send to dcache @@ -749,6 +752,7 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt) assert(_status == DcacheWaitResponse || _status == DTBWaitResponse || pkt->req->getFlags().isSet(Request::NO_ACCESS)); + pkt->req->setAccessLatency(); numCycles += curCycle() - previousCycle; previousCycle = curCycle(); diff --git a/src/cpu/translation.hh b/src/cpu/translation.hh index 90fffa03d..c05cc86a0 100644 --- a/src/cpu/translation.hh +++ b/src/cpu/translation.hh @@ -256,6 +256,7 @@ class DataTranslation : public BaseTLB::Translation assert(mode == state->mode); if (state->finish(fault, index)) { xc->finishTranslation(state); + req->setTranslateLatency(); } delete this; } diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index dce40e915..50ba396b2 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -568,7 +568,7 @@ class BaseCache : public MemObject { assert(pkt->req->masterId() < system->maxMasters()); misses[pkt->cmdToIndex()][pkt->req->masterId()]++; - + pkt->req->incAccessDepth(); if (missCount) { --missCount; if (missCount == 0) diff --git a/src/mem/request.hh b/src/mem/request.hh index a0ff50910..54b671645 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 ARM Limited + * Copyright (c) 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -244,6 +244,7 @@ class Request * default constructor.) */ Request() + : translateDelta(0), accessDelta(0), depth(0) {} /** @@ -304,6 +305,9 @@ class Request _flags.set(flags); privateFlags.clear(~STICKY_PRIVATE_FLAGS); privateFlags.set(VALID_PADDR|VALID_SIZE); + depth = 0; + accessDelta = 0; + //translateDelta = 0; } void @@ -331,6 +335,9 @@ class Request _flags.set(flags); privateFlags.clear(~STICKY_PRIVATE_FLAGS); privateFlags.set(VALID_VADDR|VALID_SIZE|VALID_PC); + depth = 0; + accessDelta = 0; + translateDelta = 0; } /** @@ -381,6 +388,23 @@ class Request return _paddr; } + /** + * Time for the TLB/table walker to successfully translate this request. + */ + Tick translateDelta; + + /** + * Access latency to complete this memory transaction not including + * translation time. + */ + Tick accessDelta; + + /** + * Level of the cache hierachy where this request was responded to + * (e.g. 0 = L1; 1 = L2). + */ + int depth; + /** * Accessor for size. */ @@ -535,6 +559,26 @@ class Request return _pc; } + /** + * Increment/Get the depth at which this request is responded to. + * This currently happens when the request misses in any cache level. + */ + void incAccessDepth() { depth++; } + int getAccessDepth() const { return depth; } + + /** + * Set/Get the time taken for this request to be successfully translated. + */ + void setTranslateLatency() { translateDelta = curTick() - _time; } + Tick getTranslateLatency() const { return translateDelta; } + + /** + * Set/Get the time taken to complete this request's access, not including + * the time to successfully translate the request. + */ + void setAccessLatency() { accessDelta = curTick() - _time - translateDelta; } + Tick getAccessLatency() const { return accessDelta; } + /** Accessor functions for flags. Note that these are for testing only; setting flags should be done via setFlags(). */ bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); } -- 2.30.2