using namespace std;
-Histogram::Histogram(int binsize, int bins)
+Histogram::Histogram(int binsize, uint32_t bins)
{
m_binsize = binsize;
- m_bins = bins;
- clear();
+ clear(bins);
}
Histogram::~Histogram()
}
void
-Histogram::clear(int binsize, int bins)
+Histogram::clear(int binsize, uint32_t bins)
{
m_binsize = binsize;
clear(bins);
}
void
-Histogram::clear(int bins)
+Histogram::clear(uint32_t bins)
{
- m_bins = bins;
m_largest_bin = 0;
m_max = 0;
- m_data.resize(m_bins);
- for (int i = 0; i < m_bins; i++) {
+ m_data.resize(bins);
+ for (uint32_t i = 0; i < bins; i++) {
m_data[i] = 0;
}
+
m_count = 0;
m_max = 0;
-
m_sumSamples = 0;
m_sumSquaredSamples = 0;
}
+void
+Histogram::doubleBinSize()
+{
+ assert(m_binsize != -1);
+ uint32_t t_bins = m_data.size();
+
+ for (uint32_t i = 0; i < t_bins/2; i++) {
+ m_data[i] = m_data[i*2] + m_data[i*2 + 1];
+ }
+ for (uint32_t i = t_bins/2; i < t_bins; i++) {
+ m_data[i] = 0;
+ }
+
+ m_binsize *= 2;
+}
void
Histogram::add(int64 value)
m_sumSamples += value;
m_sumSquaredSamples += (value*value);
- int index;
+ uint32_t index;
+
if (m_binsize == -1) {
// This is a log base 2 histogram
if (value == 0) {
}
} else {
// This is a linear histogram
- while (m_max >= (m_bins * m_binsize)) {
- for (int i = 0; i < m_bins/2; i++) {
- m_data[i] = m_data[i*2] + m_data[i*2 + 1];
- }
- for (int i = m_bins/2; i < m_bins; i++) {
- m_data[i] = 0;
- }
- m_binsize *= 2;
- }
+ uint32_t t_bins = m_data.size();
+
+ while (m_max >= (t_bins * m_binsize)) doubleBinSize();
index = value/m_binsize;
}
- assert(index >= 0);
+
+ assert(index < m_data.size());
m_data[index]++;
m_largest_bin = max(m_largest_bin, index);
}
void
-Histogram::add(const Histogram& hist)
+Histogram::add(Histogram& hist)
{
- assert(hist.getBins() == m_bins);
- assert(hist.getBinSize() == -1); // assume log histogram
- assert(m_binsize == -1);
+ uint32_t t_bins = m_data.size();
- for (int j = 0; j < hist.getData(0); j++) {
- add(0);
+ if (hist.getBins() != t_bins) {
+ fatal("Histograms with different number of bins cannot be combined!");
}
- for (int i = 1; i < m_bins; i++) {
- for (int j = 0; j < hist.getData(i); j++) {
- add(1<<(i-1)); // account for the + 1 index
+ m_max = max(m_max, hist.getMax());
+ m_count += hist.size();
+ m_sumSamples += hist.getTotal();
+ m_sumSquaredSamples += hist.getSquaredTotal();
+
+ // Both histograms are log base 2.
+ if (hist.getBinSize() == -1 && m_binsize == -1) {
+ for (int j = 0; j < hist.getData(0); j++) {
+ add(0);
+ }
+
+ for (uint32_t i = 1; i < t_bins; i++) {
+ for (int j = 0; j < hist.getData(i); j++) {
+ add(1<<(i-1)); // account for the + 1 index
+ }
+ }
+ } else if (hist.getBinSize() >= 1 && m_binsize >= 1) {
+ // Both the histogram are linear.
+ // We are assuming that the two histograms have the same
+ // minimum value that they can store.
+
+ while (m_binsize > hist.getBinSize()) hist.doubleBinSize();
+ while (hist.getBinSize() > m_binsize) doubleBinSize();
+
+ assert(m_binsize == hist.getBinSize());
+
+ for (uint32_t i = 0; i < t_bins; i++) {
+ m_data[i] += hist.getData(i);
+
+ if (m_data[i] > 0) m_largest_bin = i;
}
+ } else {
+ fatal("Don't know how to combine log and linear histograms!");
}
}
<< " | ";
out << "standard deviation: " << getStandardDeviation() << " |";
}
- for (int i = 0; i < m_bins && i <= m_largest_bin; i++) {
+
+ for (uint32_t i = 0; i <= m_largest_bin; i++) {
if (multiplier == 1.0) {
out << " " << m_data[i];
} else {
class Histogram
{
public:
- Histogram(int binsize = 1, int bins = 50);
+ Histogram(int binsize = 1, uint32_t bins = 50);
~Histogram();
void add(int64 value);
- void add(const Histogram& hist);
- void clear() { clear(m_bins); }
- void clear(int bins);
- void clear(int binsize, int bins);
- int64 size() const { return m_count; }
- int getBins() const { return m_bins; }
+ void add(Histogram& hist);
+ void doubleBinSize();
+
+ void clear() { clear(m_data.size()); }
+ void clear(uint32_t bins);
+ void clear(int binsize, uint32_t bins);
+
+ uint64_t size() const { return m_count; }
+ uint32_t getBins() const { return m_data.size(); }
int getBinSize() const { return m_binsize; }
int64 getTotal() const { return m_sumSamples; }
- int64 getData(int index) const { return m_data[index]; }
+ uint64_t getSquaredTotal() const { return m_sumSquaredSamples; }
+ uint64_t getData(int index) const { return m_data[index]; }
+ int64 getMax() const { return m_max; }
void printWithMultiplier(std::ostream& out, double multiplier) const;
void printPercent(std::ostream& out) const;
void print(std::ostream& out) const;
private:
- std::vector<int64> m_data;
+ std::vector<uint64_t> m_data;
int64 m_max; // the maximum value seen so far
- int64 m_count; // the number of elements added
+ uint64_t m_count; // the number of elements added
int m_binsize; // the size of each bucket
- int m_bins; // the number of buckets
- int m_largest_bin; // the largest bin used
+ uint32_t m_largest_bin; // the largest bin used
int64 m_sumSamples; // the sum of all samples
- int64 m_sumSquaredSamples; // the sum of the square of all samples
+ uint64_t m_sumSquaredSamples; // the sum of the square of all samples
double getStandardDeviation() const;
};
virtual void init();
- static int getNumberOfVirtualNetworks() { return m_virtual_networks; }
+ static uint32_t getNumberOfVirtualNetworks() { return m_virtual_networks; }
static uint32_t MessageSizeType_to_int(MessageSizeType size_type);
// returns the queue requested for the given component
out << endl;
}
+void
+Profiler::printDelayProfile(ostream &out)
+{
+ out << "Message Delayed Cycles" << endl;
+ out << "----------------------" << endl;
+
+ uint32_t numVNets = Network::getNumberOfVirtualNetworks();
+ Histogram delayHistogram;
+ std::vector<Histogram> delayVCHistogram(numVNets);
+
+ for (uint32_t i = 0; i < MachineType_NUM; i++) {
+ for (map<uint32_t, AbstractController*>::iterator it =
+ g_abs_controls[i].begin();
+ it != g_abs_controls[i].end(); ++it) {
+
+ AbstractController *ctr = (*it).second;
+ delayHistogram.add(ctr->getDelayHist());
+
+ for (uint32_t i = 0; i < numVNets; i++) {
+ delayVCHistogram[i].add(ctr->getDelayVCHist(i));
+ }
+ }
+ }
+
+ out << "Total_delay_cycles: " << delayHistogram << endl;
+
+ for (int i = 0; i < numVNets; i++) {
+ out << " virtual_network_" << i << "_delay_cycles: "
+ << delayVCHistogram[i] << endl;
+ }
+}
+
void
Profiler::printStats(ostream& out, bool short_stats)
{
}
out << endl;
- out << "Message Delayed Cycles" << endl;
- out << "----------------------" << endl;
- out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl;
- out << "Total_nonPF_delay_cycles: "
- << m_delayedCyclesNonPFHistogram << endl;
- for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) {
- out << " virtual_network_" << i << "_delay_cycles: "
- << m_delayedCyclesVCHistograms[i] << endl;
- }
-
+ printDelayProfile(out);
printResourceUsage(out);
}
}
m_busyBankCount = 0;
- m_delayedCyclesHistogram.clear();
- m_delayedCyclesNonPFHistogram.clear();
- int size = Network::getNumberOfVirtualNetworks();
- m_delayedCyclesVCHistograms.resize(size);
- for (int i = 0; i < size; i++) {
- m_delayedCyclesVCHistograms[i].clear();
- }
-
m_missLatencyHistograms.resize(RubyRequestType_NUM);
for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
m_missLatencyHistograms[i].clear(200);
}
}
-void
-Profiler::profileMsgDelay(uint32_t virtualNetwork, Time delayCycles)
-{
- assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
- m_delayedCyclesHistogram.add(delayCycles);
- m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
- if (virtualNetwork != 0) {
- m_delayedCyclesNonPFHistogram.add(delayCycles);
- }
-}
-
void
Profiler::profilePFWait(Time waitTime)
{
void sequencerRequests(int num) { m_sequencer_requests.add(num); }
- void profileMsgDelay(uint32_t virtualNetwork, Time delayCycles);
-
void print(std::ostream& out) const;
void rubyWatch(int proc);
private:
void printRequestProfile(std::ostream &out);
+ void printDelayProfile(std::ostream &out);
private:
// Private copy constructor and assignment operator
std::vector<Histogram> m_SWPrefetchLatencyHistograms;
std::vector<Histogram> m_SWPrefetchMachLatencyHistograms;
- Histogram m_delayedCyclesHistogram;
- Histogram m_delayedCyclesNonPFHistogram;
- std::vector<Histogram> m_delayedCyclesVCHistograms;
-
Histogram m_outstanding_requests;
Histogram m_outstanding_persistent_requests;
{
m_requestProfileMap.clear();
m_request_count = 0;
+
+ m_delayHistogram.clear();
+
+ uint32_t size = Network::getNumberOfVirtualNetworks();
+ m_delayVCHistogram.resize(size);
+ for (uint32_t i = 0; i < size; i++) {
+ m_delayVCHistogram[i].clear();
+ }
}
void
// default value which is 0
m_requestProfileMap[request]++;
}
+
+void
+AbstractController::profileMsgDelay(uint32_t virtualNetwork, Time delay)
+{
+ assert(virtualNetwork < m_delayVCHistogram.size());
+ m_delayHistogram.add(delay);
+ m_delayVCHistogram[virtualNetwork].add(delay);
+}
#include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/Consumer.hh"
#include "mem/ruby/common/DataBlock.hh"
+#include "mem/ruby/common/Histogram.hh"
#include "mem/ruby/network/Network.hh"
#include "mem/ruby/recorder/CacheRecorder.hh"
#include "mem/ruby/system/MachineID.hh"
const std::map<std::string, uint64_t>& getRequestProfileMap() const
{ return m_requestProfileMap; }
+ Histogram& getDelayHist() { return m_delayHistogram; }
+ Histogram& getDelayVCHist(uint32_t index)
+ { return m_delayVCHistogram[index]; }
+
protected:
//! Profiles original cache requests including PUTs
void profileRequest(const std::string &request);
+ //! Profiles the delay associated with messages.
+ void profileMsgDelay(uint32_t virtualNetwork, Time delay);
protected:
int m_transitions_per_cycle;
//! call requisite function for updating the count.
std::map<std::string, uint64_t> m_requestProfileMap;
uint64_t m_request_count;
+
+ //! Histogram for profiling delay for the messages this controller
+ //! cares for
+ Histogram m_delayHistogram;
+ std::vector<Histogram> m_delayVCHistogram;
};
#endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__
profileSharing(addr, type, requestor, sharers, owner);
}
-void
-profileMsgDelay(uint32_t virtualNetwork, Time delayCycles)
-{
- g_system_ptr->getProfiler()->profileMsgDelay(virtualNetwork, delayCycles);
-}
-
void
profileGetX(const Address& datablock, const Address& PC, const Set& owner,
const Set& sharers, NodeID requestor)
void profile_filter_action(int action);
void profile_persistent_prediction(const Address& addr, AccessType type);
void profile_average_latency_estimate(int latency);
-void profileMsgDelay(uint32_t virtualNetwork, Time delayCycles);
void profile_multicast_retry(const Address& addr, int count);
void profileGetX(const Address& datablock, const Address& PC, const Set& owner,