From 3ee4957b4930a252c0185a6bc71bdf1c6ebc5ed9 Mon Sep 17 00:00:00 2001 From: Akash Bagdia Date: Tue, 18 Nov 2014 14:00:48 +0000 Subject: [PATCH] power: Add power states to ClockedObject Add 4 power states to the ClockedObject, provides necessary access functions to check and update the power state. Default power state is UNDEFINED, it is responsibility of the respective simulation model to provide the startup state and any other logic for state change. Add number of transition stat. Add distribution of time spent in clock gated state. Add power state residency stat. Add dump call back function to allow stats update of distribution and residency stats. --- src/arch/alpha/tlb.cc | 2 + src/arch/mips/tlb.cc | 2 + src/arch/power/tlb.cc | 2 + src/cpu/testers/memtest/memtest.cc | 2 + src/dev/arm/flash_device.cc | 2 + src/dev/arm/hdlcd.cc | 2 + src/dev/arm/ufs_device.cc | 2 + src/mem/probes/stack_dist.cc | 2 + .../ruby/network/garnet/BaseGarnetNetwork.cc | 2 + .../network/garnet/fixed-pipeline/Router_d.cc | 2 + src/mem/ruby/network/simple/SimpleNetwork.cc | 2 + src/mem/ruby/network/simple/Switch.cc | 2 + .../slicc_interface/AbstractController.cc | 2 + src/mem/ruby/structures/CacheMemory.cc | 2 + src/mem/ruby/structures/Prefetcher.cc | 2 + src/mem/ruby/system/RubySystem.hh | 5 +- src/mem/ruby/system/Sequencer.cc | 2 + src/mem/snoop_filter.cc | 2 + src/sim/ClockedObject.py | 29 ++- src/sim/SConscript | 1 + src/sim/clock_domain.cc | 2 + src/sim/clocked_object.cc | 177 ++++++++++++++++++ src/sim/clocked_object.hh | 59 +++++- src/sim/voltage_domain.cc | 2 + 24 files changed, 305 insertions(+), 4 deletions(-) create mode 100644 src/sim/clocked_object.cc diff --git a/src/arch/alpha/tlb.cc b/src/arch/alpha/tlb.cc index 3360b34c5..fcd2b518b 100644 --- a/src/arch/alpha/tlb.cc +++ b/src/arch/alpha/tlb.cc @@ -76,6 +76,8 @@ TLB::~TLB() void TLB::regStats() { + BaseTLB::regStats(); + fetch_hits .name(name() + ".fetch_hits") .desc("ITB hits"); diff --git a/src/arch/mips/tlb.cc b/src/arch/mips/tlb.cc index d2aa5ad70..340c83021 100644 --- a/src/arch/mips/tlb.cc +++ b/src/arch/mips/tlb.cc @@ -226,6 +226,8 @@ TLB::unserialize(CheckpointIn &cp) void TLB::regStats() { + BaseTLB::regStats(); + read_hits .name(name() + ".read_hits") .desc("DTB read hits") diff --git a/src/arch/power/tlb.cc b/src/arch/power/tlb.cc index edfb4f453..90a341d85 100644 --- a/src/arch/power/tlb.cc +++ b/src/arch/power/tlb.cc @@ -223,6 +223,8 @@ TLB::unserialize(CheckpointIn &cp) void TLB::regStats() { + BaseTLB::regStats(); + read_hits .name(name() + ".read_hits") .desc("DTB read hits") diff --git a/src/cpu/testers/memtest/memtest.cc b/src/cpu/testers/memtest/memtest.cc index b0dde6d27..223532088 100644 --- a/src/cpu/testers/memtest/memtest.cc +++ b/src/cpu/testers/memtest/memtest.cc @@ -197,6 +197,8 @@ MemTest::completeRequest(PacketPtr pkt, bool functional) void MemTest::regStats() { + MemObject::regStats(); + using namespace Stats; numReadsStat diff --git a/src/dev/arm/flash_device.cc b/src/dev/arm/flash_device.cc index 60c910626..297354b65 100644 --- a/src/dev/arm/flash_device.cc +++ b/src/dev/arm/flash_device.cc @@ -472,6 +472,8 @@ FlashDevice::getUnknownPages(uint32_t index) void FlashDevice::regStats() { + AbstractNVM::regStats(); + using namespace Stats; std::string fd_name = name() + ".FlashDevice"; diff --git a/src/dev/arm/hdlcd.cc b/src/dev/arm/hdlcd.cc index b04de21bf..0f63f23fc 100644 --- a/src/dev/arm/hdlcd.cc +++ b/src/dev/arm/hdlcd.cc @@ -97,6 +97,8 @@ HDLcd::~HDLcd() void HDLcd::regStats() { + AmbaDmaDevice::regStats(); + using namespace Stats; stats.underruns diff --git a/src/dev/arm/ufs_device.cc b/src/dev/arm/ufs_device.cc index 07d50903b..fe05b3279 100644 --- a/src/dev/arm/ufs_device.cc +++ b/src/dev/arm/ufs_device.cc @@ -774,6 +774,8 @@ UFSHostDeviceParams::create() void UFSHostDevice::regStats() { + DmaDevice::regStats(); + using namespace Stats; std::string UFSHost_name = name() + ".UFSDiskHost"; diff --git a/src/mem/probes/stack_dist.cc b/src/mem/probes/stack_dist.cc index a447f49e5..b12c81e2c 100644 --- a/src/mem/probes/stack_dist.cc +++ b/src/mem/probes/stack_dist.cc @@ -57,6 +57,8 @@ StackDistProbe::StackDistProbe(StackDistProbeParams *p) void StackDistProbe::regStats() { + BaseMemProbe::regStats(); + const StackDistProbeParams *p( dynamic_cast(params())); assert(p); diff --git a/src/mem/ruby/network/garnet/BaseGarnetNetwork.cc b/src/mem/ruby/network/garnet/BaseGarnetNetwork.cc index 1213073e9..2bd2acb9f 100644 --- a/src/mem/ruby/network/garnet/BaseGarnetNetwork.cc +++ b/src/mem/ruby/network/garnet/BaseGarnetNetwork.cc @@ -69,6 +69,8 @@ BaseGarnetNetwork::init() void BaseGarnetNetwork::regStats() { + Network::regStats(); + m_flits_received .init(m_virtual_networks) .name(name() + ".flits_received") diff --git a/src/mem/ruby/network/garnet/fixed-pipeline/Router_d.cc b/src/mem/ruby/network/garnet/fixed-pipeline/Router_d.cc index 97bc1abdd..dab9b7dda 100644 --- a/src/mem/ruby/network/garnet/fixed-pipeline/Router_d.cc +++ b/src/mem/ruby/network/garnet/fixed-pipeline/Router_d.cc @@ -158,6 +158,8 @@ Router_d::update_sw_winner(int inport, flit_d *t_flit) void Router_d::regStats() { + BasicRouter::regStats(); + m_buffer_reads .name(name() + ".buffer_reads") .flags(Stats::nozero) diff --git a/src/mem/ruby/network/simple/SimpleNetwork.cc b/src/mem/ruby/network/simple/SimpleNetwork.cc index 25d0b6f4b..2fc7b6440 100644 --- a/src/mem/ruby/network/simple/SimpleNetwork.cc +++ b/src/mem/ruby/network/simple/SimpleNetwork.cc @@ -132,6 +132,8 @@ SimpleNetwork::makeInternalLink(SwitchID src, SwitchID dest, BasicLink* link, void SimpleNetwork::regStats() { + Network::regStats(); + for (MessageSizeType type = MessageSizeType_FIRST; type < MessageSizeType_NUM; ++type) { m_msg_counts[(unsigned int) type] diff --git a/src/mem/ruby/network/simple/Switch.cc b/src/mem/ruby/network/simple/Switch.cc index 747884f16..78f5b609c 100644 --- a/src/mem/ruby/network/simple/Switch.cc +++ b/src/mem/ruby/network/simple/Switch.cc @@ -112,6 +112,8 @@ Switch::getThrottle(LinkID link_number) const void Switch::regStats() { + BasicRouter::regStats(); + for (int link = 0; link < m_throttles.size(); link++) { m_throttles[link]->regStats(name()); } diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc index 2a53e53be..be48628e9 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.cc +++ b/src/mem/ruby/slicc_interface/AbstractController.cc @@ -76,6 +76,8 @@ AbstractController::resetStats() void AbstractController::regStats() { + MemObject::regStats(); + m_fully_busy_cycles .name(name() + ".fully_busy_cycles") .desc("cycles for which number of transistions == max transitions") diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc index f7c196119..36d109769 100644 --- a/src/mem/ruby/structures/CacheMemory.cc +++ b/src/mem/ruby/structures/CacheMemory.cc @@ -488,6 +488,8 @@ CacheMemory::isLocked(Addr address, int context) void CacheMemory::regStats() { + SimObject::regStats(); + m_demand_hits .name(name() + ".demand_hits") .desc("Number of cache demand hits") diff --git a/src/mem/ruby/structures/Prefetcher.cc b/src/mem/ruby/structures/Prefetcher.cc index ce6d36c04..eef51dcf7 100644 --- a/src/mem/ruby/structures/Prefetcher.cc +++ b/src/mem/ruby/structures/Prefetcher.cc @@ -86,6 +86,8 @@ Prefetcher::~Prefetcher() void Prefetcher::regStats() { + SimObject::regStats(); + numMissObserved .name(name() + ".miss_observed") .desc("number of misses observed") diff --git a/src/mem/ruby/system/RubySystem.hh b/src/mem/ruby/system/RubySystem.hh index 62330e19d..8ebd3494a 100644 --- a/src/mem/ruby/system/RubySystem.hh +++ b/src/mem/ruby/system/RubySystem.hh @@ -89,7 +89,10 @@ class RubySystem : public ClockedObject return m_profiler; } - void regStats() override { m_profiler->regStats(name()); } + void regStats() override { + ClockedObject::regStats(); + m_profiler->regStats(name()); + } void collateStats() { m_profiler->collateStats(); } void resetStats() override; diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index dedade3cf..fbaad8407 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -688,6 +688,8 @@ Sequencer::evictionCallback(Addr address) void Sequencer::regStats() { + RubyPort::regStats(); + m_store_waiting_on_load .name(name() + ".store_waiting_on_load") .desc("Number of times a store aliased with a pending load") diff --git a/src/mem/snoop_filter.cc b/src/mem/snoop_filter.cc index 9d02ed249..9e8f8afb8 100755 --- a/src/mem/snoop_filter.cc +++ b/src/mem/snoop_filter.cc @@ -351,6 +351,8 @@ SnoopFilter::updateResponse(const Packet* cpkt, const SlavePort& slave_port) void SnoopFilter::regStats() { + SimObject::regStats(); + totRequests .name(name() + ".tot_requests") .desc("Total number of requests made to the snoop filter."); diff --git a/src/sim/ClockedObject.py b/src/sim/ClockedObject.py index 2562f1f01..b933ea07a 100644 --- a/src/sim/ClockedObject.py +++ b/src/sim/ClockedObject.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012 ARM Limited +# Copyright (c) 2012, 2015 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -39,6 +39,24 @@ from m5.SimObject import SimObject from m5.params import * from m5.proxy import * +# Enumerate set of allowed power states that can be used by a clocked object. +# The list is kept generic to express a base minimal set. +# State definition :- +# Undefined: Invalid state, no power state derived information is available. +# On: The logic block is actively running and consuming dynamic and leakage +# energy depending on the amount of processing required. +# Clk_gated: The clock circuity within the block is gated to save dynamic +# energy, the power supply to the block is still on and leakage +# energy is being consumed by the block. +# Sram_retention: The SRAMs within the logic blocks are pulled into retention +# state to reduce leakage energy further. +# Off: The logic block is power gated and is not consuming any energy. +class PwrState(Enum): vals = ['UNDEFINED', + 'ON', + 'CLK_GATED', + 'SRAM_RETENTION', + 'OFF'] + class ClockedObject(SimObject): type = 'ClockedObject' abstract = True @@ -47,3 +65,12 @@ class ClockedObject(SimObject): # The clock domain this clocked object belongs to, inheriting the # parent's clock domain by default clk_domain = Param.ClockDomain(Parent.clk_domain, "Clock domain") + + # Provide initial power state, should ideally get redefined in startup + # routine + default_p_state = Param.PwrState("UNDEFINED", "Default Power State") + + p_state_clk_gate_min = Param.Latency('1ns', "Min value of the distribution") + p_state_clk_gate_max = Param.Latency('1s', "Max value of the distribution") + p_state_clk_gate_bins = Param.Unsigned('20', + "# bins in clk gated distribution") diff --git a/src/sim/SConscript b/src/sim/SConscript index 3238301ed..e40c43f0c 100644 --- a/src/sim/SConscript +++ b/src/sim/SConscript @@ -69,6 +69,7 @@ Source('voltage_domain.cc') Source('linear_solver.cc') Source('system.cc') Source('dvfs_handler.cc') +Source('clocked_object.cc') if env['TARGET_ISA'] != 'null': SimObject('InstTracer.py') diff --git a/src/sim/clock_domain.cc b/src/sim/clock_domain.cc index 1ccee7f1d..9865c4d11 100644 --- a/src/sim/clock_domain.cc +++ b/src/sim/clock_domain.cc @@ -56,6 +56,8 @@ void ClockDomain::regStats() { + SimObject::regStats(); + using namespace Stats; // Expose the current clock period as a stat for observability in diff --git a/src/sim/clocked_object.cc b/src/sim/clocked_object.cc new file mode 100644 index 000000000..9a682a4ce --- /dev/null +++ b/src/sim/clocked_object.cc @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Akash Bagdia + * David Guillen Fandos + */ + +#include "sim/clocked_object.hh" + +#include "base/misc.hh" + +void +ClockedObject::serialize(CheckpointOut &cp) const +{ + unsigned int currPwrState = (unsigned int)_currPwrState; + + SERIALIZE_SCALAR(currPwrState); + SERIALIZE_SCALAR(prvEvalTick); +} + +void +ClockedObject::unserialize(CheckpointIn &cp) +{ + unsigned int currPwrState; + + UNSERIALIZE_SCALAR(currPwrState); + UNSERIALIZE_SCALAR(prvEvalTick); + + _currPwrState = Enums::PwrState(currPwrState); +} + +void +ClockedObject::pwrState(Enums::PwrState p) +{ + // Function should ideally be called only when there is a state change + if (_currPwrState == p) { + warn("ClockedObject: Already in the requested power state, request "\ + "ignored"); + return; + } + + // No need to compute stats if in the same tick, update state + // though. This can happen in cases like a) during start of the + // simulation multiple state changes happens in init/startup phase, + // b) one takes a decision to migrate state but decides to reverts + // back to the original state in the same tick if other conditions + // are not met elsewhere. Any state change related stats would have + // been recorded on previous call to the pwrState() function. + if (prvEvalTick == curTick()) { + warn("ClockedObject: More than one power state change request "\ + "encountered within the same simulation tick"); + _currPwrState = p; + return; + } + + // Record stats for previous state. + computeStats(); + + _currPwrState = p; + + numPwrStateTransitions++; +} + +void +ClockedObject::computeStats() +{ + // Calculate time elapsed from last (valid) state change + Tick elapsed_time = curTick() - prvEvalTick; + + pwrStateResidencyTicks[_currPwrState] += elapsed_time; + + // Time spent in CLK_GATED state, this might change depending on + // transition to other low power states in respective simulation + // objects. + if (_currPwrState == Enums::PwrState::CLK_GATED) { + pwrStateClkGateDist.sample(elapsed_time); + } + + prvEvalTick = curTick(); +} + +std::vector +ClockedObject::pwrStateWeights() const +{ + // Get residency stats + std::vector ret; + Stats::VCounter residencies; + pwrStateResidencyTicks.value(residencies); + + // Account for current state too! + Tick elapsed_time = curTick() - prvEvalTick; + residencies[_currPwrState] += elapsed_time; + + ret.resize(Enums::PwrState::Num_PwrState); + for (unsigned i = 0; i < Enums::PwrState::Num_PwrState; i++) + ret[i] = residencies[i] / + (pwrStateResidencyTicks.total() + elapsed_time); + + return ret; +} + +void +ClockedObject::regStats() +{ + SimObject::regStats(); + + using namespace Stats; + + numPwrStateTransitions + .name(params()->name + ".numPwrStateTransitions") + .desc("Number of power state transitions") + ; + + // Each sample is time in ticks + unsigned num_bins = std::max(params()->p_state_clk_gate_bins, 10U); + pwrStateClkGateDist + .init(params()->p_state_clk_gate_min, params()->p_state_clk_gate_max, + (params()->p_state_clk_gate_max / num_bins)) + .name(params()->name + ".pwrStateClkGateDist") + .desc("Distribution of time spent in the clock gated state") + .flags(pdf) + ; + + pwrStateResidencyTicks + .init(Enums::PwrState::Num_PwrState) + .name(params()->name + ".pwrStateResidencyTicks") + .desc("Cumulative time (in ticks) in various power states") + ; + for (int i = 0; i < Enums::PwrState::Num_PwrState; i++) { + pwrStateResidencyTicks.subname(i, Enums::PwrStateStrings[i]); + } + + numPwrStateTransitions = 0; + + /** + * For every stats dump, the power state residency and other distribution + * stats should be computed just before the dump to ensure correct stats + * value being reported for current dump window. It avoids things like + * having any unreported time spent in a power state to be forwarded to the + * next dump window which might have rather unpleasant effects (like + * perturbing the distribution stats). + */ + registerDumpCallback(new ClockedObjectDumpCallback(this)); +} diff --git a/src/sim/clocked_object.hh b/src/sim/clocked_object.hh index b9a2481ec..1ba5ca617 100644 --- a/src/sim/clocked_object.hh +++ b/src/sim/clocked_object.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 ARM Limited + * Copyright (c) 2012-2013, 2015 ARM Limited * Copyright (c) 2013 Cornell University * All rights reserved * @@ -37,6 +37,8 @@ * * Authors: Andreas Hansson * Christopher Torng + * Akash Bagdia + * David Guillen Fandos */ /** @@ -47,8 +49,10 @@ #ifndef __SIM_CLOCKED_OBJECT_HH__ #define __SIM_CLOCKED_OBJECT_HH__ +#include "base/callback.hh" #include "base/intmath.hh" #include "base/misc.hh" +#include "enums/PwrState.hh" #include "params/ClockedObject.hh" #include "sim/core.hh" #include "sim/clock_domain.hh" @@ -233,7 +237,58 @@ class ClockedObject { public: ClockedObject(const ClockedObjectParams *p) - : SimObject(p), Clocked(*p->clk_domain) { } + : SimObject(p), Clocked(*p->clk_domain), + _currPwrState(p->default_p_state), + prvEvalTick(0) + { } + + /** Parameters of ClockedObject */ + typedef ClockedObjectParams Params; + const Params* params() const + { return reinterpret_cast(_params); } + + void serialize(CheckpointOut &cp) const override; + void unserialize(CheckpointIn &cp) override; + + inline Enums::PwrState pwrState() const + { return _currPwrState; } + + inline std::string pwrStateName() const + { return Enums::PwrStateStrings[_currPwrState]; } + + /** Returns the percentage residency for each power state */ + std::vector pwrStateWeights() const; + + /** + * Record stats values like state residency by computing the time + * difference from previous update. Also, updates the previous + * evaluation tick once all stats are recorded. + * Usually called on power state change and stats dump callback. + */ + void computeStats(); + + void pwrState(Enums::PwrState); + void regStats(); + + protected: + + /** To keep track of the current power state */ + Enums::PwrState _currPwrState; + + Tick prvEvalTick; + + Stats::Scalar numPwrStateTransitions; + Stats::Distribution pwrStateClkGateDist; + Stats::Vector pwrStateResidencyTicks; + +}; + +class ClockedObjectDumpCallback : public Callback +{ + ClockedObject *co; + public: + ClockedObjectDumpCallback(ClockedObject *co_t) : co(co_t) {} + virtual void process() { co->computeStats(); }; }; #endif //__SIM_CLOCKED_OBJECT_HH__ diff --git a/src/sim/voltage_domain.cc b/src/sim/voltage_domain.cc index b82efda33..61715dfbc 100644 --- a/src/sim/voltage_domain.cc +++ b/src/sim/voltage_domain.cc @@ -128,6 +128,8 @@ VoltageDomain::startup() { void VoltageDomain::regStats() { + SimObject::regStats(); + currentVoltage .method(this, &VoltageDomain::voltage) .name(params()->name + ".voltage") -- 2.30.2