From 78270ede7be98d78671940b7f0a8e33c6d810164 Mon Sep 17 00:00:00 2001 From: Hoa Nguyen Date: Thu, 22 Oct 2020 17:27:15 -0700 Subject: [PATCH 1/1] mem-ruby: Update stats style This commit makes move stats from several classes in mem/ruby to corresponding Stats::Group's. For ruby's Profiler, additional changes are made: there are stats that are profiled for each of RequestType, for each of MachineType, and for each of combinations of RequestType and MachineType. The current naming scheme is ...... To make it easier for stats parser to know whether the stat is of RequestType, or is of MachineType, or is of (RequestType, MachineType), a prefix is added as follows, ...... where is one of {RequestType, MachineType, RequestTypeMachineType}. Another point of using this naming scheme is that the parser doesn't need to know all of RequestType and MachineType. Change-Id: I8b8bdd771c7798954f984d416f521e8eb42d01ed Signed-off-by: Hoa Nguyen Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36478 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/mem/ruby/network/MessageBuffer.cc | 66 ++--- src/mem/ruby/network/MessageBuffer.hh | 6 +- src/mem/ruby/profiler/Profiler.cc | 334 +++++++++++++--------- src/mem/ruby/profiler/Profiler.hh | 140 +++++---- src/mem/ruby/structures/CacheMemory.cc | 172 +++++------ src/mem/ruby/structures/CacheMemory.hh | 48 ++-- src/mem/ruby/structures/RubyPrefetcher.cc | 75 ++--- src/mem/ruby/structures/RubyPrefetcher.hh | 35 +-- src/mem/ruby/system/GPUCoalescer.cc | 89 +++--- src/mem/ruby/system/GPUCoalescer.hh | 1 - src/mem/ruby/system/HTMSequencer.cc | 31 +- src/mem/ruby/system/RubySystem.hh | 1 - src/mem/ruby/system/Sequencer.cc | 112 ++++---- src/mem/ruby/system/Sequencer.hh | 1 - 14 files changed, 590 insertions(+), 521 deletions(-) diff --git a/src/mem/ruby/network/MessageBuffer.cc b/src/mem/ruby/network/MessageBuffer.cc index 2eec1107b..455b8c1c6 100644 --- a/src/mem/ruby/network/MessageBuffer.cc +++ b/src/mem/ruby/network/MessageBuffer.cc @@ -58,7 +58,14 @@ MessageBuffer::MessageBuffer(const Params &p) m_time_last_time_enqueue(0), m_time_last_time_pop(0), m_last_arrival_time(0), m_strict_fifo(p.ordered), m_randomization(p.randomization), - m_allow_zero_latency(p.allow_zero_latency) + m_allow_zero_latency(p.allow_zero_latency), + ADD_STAT(m_not_avail_count, "Number of times this buffer did not have " + "N slots available"), + ADD_STAT(m_buf_msgs, "Average number of messages in buffer"), + ADD_STAT(m_stall_time, "Average number of cycles messages are stalled in " + "this MB"), + ADD_STAT(m_stall_count, "Number of times messages were stalled"), + ADD_STAT(m_occupancy, "Average occupancy of buffer capacity") { m_msg_counter = 0; m_consumer = NULL; @@ -76,6 +83,28 @@ MessageBuffer::MessageBuffer(const Params &p) m_stall_time = 0; m_dequeue_callback = nullptr; + + // stats + m_not_avail_count + .flags(Stats::nozero); + + m_buf_msgs + .flags(Stats::nozero); + + m_stall_count + .flags(Stats::nozero); + + m_occupancy + .flags(Stats::nozero); + + m_stall_time + .flags(Stats::nozero); + + if (m_max_size > 0) { + m_occupancy = m_buf_msgs / m_max_size; + } else { + m_occupancy = 0; + } } unsigned int @@ -457,41 +486,6 @@ MessageBuffer::isReady(Tick current_time) const (m_prio_heap.front()->getLastEnqueueTime() <= current_time)); } -void -MessageBuffer::regStats() -{ - m_not_avail_count - .name(name() + ".not_avail_count") - .desc("Number of times this buffer did not have N slots available") - .flags(Stats::nozero); - - m_buf_msgs - .name(name() + ".avg_buf_msgs") - .desc("Average number of messages in buffer") - .flags(Stats::nozero); - - m_stall_count - .name(name() + ".num_msg_stalls") - .desc("Number of times messages were stalled") - .flags(Stats::nozero); - - m_occupancy - .name(name() + ".avg_buf_occ") - .desc("Average occupancy of buffer capacity") - .flags(Stats::nozero); - - m_stall_time - .name(name() + ".avg_stall_time") - .desc("Average number of cycles messages are stalled in this MB") - .flags(Stats::nozero); - - if (m_max_size > 0) { - m_occupancy = m_buf_msgs / m_max_size; - } else { - m_occupancy = 0; - } -} - uint32_t MessageBuffer::functionalAccess(Packet *pkt, bool is_read) { diff --git a/src/mem/ruby/network/MessageBuffer.hh b/src/mem/ruby/network/MessageBuffer.hh index 65bae7943..b09cb8ab1 100644 --- a/src/mem/ruby/network/MessageBuffer.hh +++ b/src/mem/ruby/network/MessageBuffer.hh @@ -155,8 +155,6 @@ class MessageBuffer : public SimObject return RubyDummyPort::instance(); } - void regStats() override; - // Function for figuring out if any of the messages in the buffer need // to be updated with the data from the packet. // Return value indicates the number of messages that were updated. @@ -243,8 +241,6 @@ class MessageBuffer : public SimObject unsigned int m_stalled_at_cycle_start; unsigned int m_msgs_this_cycle; - Stats::Scalar m_not_avail_count; // count the # of times I didn't have N - // slots available uint64_t m_msg_counter; int m_priority_rank; const bool m_strict_fifo; @@ -254,6 +250,8 @@ class MessageBuffer : public SimObject int m_input_link_id; int m_vnet_id; + Stats::Scalar m_not_avail_count; // count the # of times I didn't have N + // slots available Stats::Average m_buf_msgs; Stats::Average m_stall_time; Stats::Scalar m_stall_count; diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc index a6933b03f..91d28c609 100644 --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -83,7 +83,8 @@ using m5::stl_helpers::operator<<; Profiler::Profiler(const RubySystemParams &p, RubySystem *rs) : m_ruby_system(rs), m_hot_lines(p.hot_lines), m_all_instructions(p.all_instructions), - m_num_vnets(p.number_of_virtual_networks) + m_num_vnets(p.number_of_virtual_networks), + rubyProfilerStats(rs, this) { m_address_profiler_ptr = new AddressProfiler(p.num_of_sequencers, this); m_address_profiler_ptr->setHotLines(m_hot_lines); @@ -100,244 +101,254 @@ Profiler::~Profiler() { } -void -Profiler::regStats(const std::string &pName) +Profiler:: +ProfilerStats::ProfilerStats(Stats::Group *parent, Profiler *profiler) + : Stats::Group(parent), + perRequestTypeStats(parent), + perMachineTypeStats(parent), + perRequestTypeMachineTypeStats(parent), + ADD_STAT(delayHistogram, "delay histogram for all message"), + ADD_STAT(m_outstandReqHistSeqr, ""), + ADD_STAT(m_outstandReqHistCoalsr, ""), + ADD_STAT(m_latencyHistSeqr, ""), + ADD_STAT(m_latencyHistCoalsr, ""), + ADD_STAT(m_hitLatencyHistSeqr, ""), + ADD_STAT(m_missLatencyHistSeqr, ""), + ADD_STAT(m_missLatencyHistCoalsr, "") { - if (!m_all_instructions) { - m_address_profiler_ptr->regStats(pName); - } - - if (m_all_instructions) { - m_inst_profiler_ptr->regStats(pName); - } - delayHistogram .init(10) - .name(pName + ".delayHist") - .desc("delay histogram for all message") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - for (int i = 0; i < m_num_vnets; i++) { - delayVCHistogram.push_back(new Stats::Histogram()); + for (int i = 0; i < profiler->m_num_vnets; i++) { + delayVCHistogram.push_back(new Stats::Histogram(this)); delayVCHistogram[i] ->init(10) - .name(pName + csprintf(".delayVCHist.vnet_%i", i)) + .name(csprintf("delayVCHist.vnet_%i", i)) .desc(csprintf("delay histogram for vnet_%i", i)) .flags(Stats::nozero | Stats::pdf | Stats::oneline); } m_outstandReqHistSeqr .init(10) - .name(pName + ".outstanding_req_hist_seqr") - .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_outstandReqHistCoalsr .init(10) - .name(pName + ".outstanding_req_hist_coalsr") - .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_latencyHistSeqr .init(10) - .name(pName + ".latency_hist_seqr") - .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_latencyHistCoalsr .init(10) - .name(pName + ".latency_hist_coalsr") - .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_hitLatencyHistSeqr .init(10) - .name(pName + ".hit_latency_hist_seqr") - .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_missLatencyHistSeqr .init(10) - .name(pName + ".miss_latency_hist_seqr") - .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_missLatencyHistCoalsr .init(10) - .name(pName + ".miss_latency_hist_coalsr") - .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); +} +Profiler::ProfilerStats:: +PerRequestTypeStats::PerRequestTypeStats(Stats::Group *parent) + : Stats::Group(parent, "RequestType") +{ for (int i = 0; i < RubyRequestType_NUM; i++) { - m_typeLatencyHistSeqr.push_back(new Stats::Histogram()); + m_typeLatencyHistSeqr.push_back(new Stats::Histogram(this)); m_typeLatencyHistSeqr[i] ->init(10) - .name(pName + csprintf(".%s.latency_hist_seqr", - RubyRequestType(i))) + .name(csprintf("%s.latency_hist_seqr", RubyRequestType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_typeLatencyHistCoalsr.push_back(new Stats::Histogram()); + m_typeLatencyHistCoalsr.push_back(new Stats::Histogram(this)); m_typeLatencyHistCoalsr[i] ->init(10) - .name(pName + csprintf(".%s.latency_hist_coalsr", - RubyRequestType(i))) + .name(csprintf("%s.latency_hist_coalsr", RubyRequestType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_hitTypeLatencyHistSeqr.push_back(new Stats::Histogram()); + m_hitTypeLatencyHistSeqr.push_back(new Stats::Histogram(this)); m_hitTypeLatencyHistSeqr[i] ->init(10) - .name(pName + csprintf(".%s.hit_latency_hist_seqr", - RubyRequestType(i))) + .name(csprintf("%s.hit_latency_hist_seqr", RubyRequestType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_missTypeLatencyHistSeqr.push_back(new Stats::Histogram()); + m_missTypeLatencyHistSeqr.push_back(new Stats::Histogram(this)); m_missTypeLatencyHistSeqr[i] ->init(10) - .name(pName + csprintf(".%s.miss_latency_hist_seqr", - RubyRequestType(i))) + .name(csprintf("%s.miss_latency_hist_seqr", RubyRequestType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_missTypeLatencyHistCoalsr.push_back(new Stats::Histogram()); + m_missTypeLatencyHistCoalsr.push_back(new Stats::Histogram(this)); m_missTypeLatencyHistCoalsr[i] ->init(10) - .name(pName + csprintf(".%s.miss_latency_hist_coalsr", - RubyRequestType(i))) + .name(csprintf("%s.miss_latency_hist_coalsr", RubyRequestType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); } +} +Profiler::ProfilerStats:: +PerMachineTypeStats::PerMachineTypeStats(Stats::Group *parent) + : Stats::Group(parent, "MachineType") +{ for (int i = 0; i < MachineType_NUM; i++) { - m_hitMachLatencyHistSeqr.push_back(new Stats::Histogram()); + m_hitMachLatencyHistSeqr.push_back(new Stats::Histogram(this)); m_hitMachLatencyHistSeqr[i] ->init(10) - .name(pName + csprintf(".%s.hit_mach_latency_hist_seqr", - MachineType(i))) + .name(csprintf("%s.hit_mach_latency_hist_seqr", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_missMachLatencyHistSeqr.push_back(new Stats::Histogram()); + m_missMachLatencyHistSeqr.push_back(new Stats::Histogram(this)); m_missMachLatencyHistSeqr[i] ->init(10) - .name(pName + csprintf(".%s.miss_mach_latency_hist_seqr", - MachineType(i))) + .name(csprintf("%s.miss_mach_latency_hist_seqr", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_missMachLatencyHistCoalsr.push_back(new Stats::Histogram()); + m_missMachLatencyHistCoalsr.push_back(new Stats::Histogram(this)); m_missMachLatencyHistCoalsr[i] ->init(10) - .name(pName + csprintf(".%s.miss_mach_latency_hist_coalsr", - MachineType(i))) + .name(csprintf("%s.miss_mach_latency_hist_coalsr", + MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_IssueToInitialDelayHistSeqr.push_back(new Stats::Histogram()); + m_IssueToInitialDelayHistSeqr.push_back(new Stats::Histogram(this)); m_IssueToInitialDelayHistSeqr[i] ->init(10) - .name(pName + csprintf( - ".%s.miss_latency_hist_seqr.issue_to_initial_request", + .name(csprintf( + "%s.miss_latency_hist_seqr.issue_to_initial_request", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_IssueToInitialDelayHistCoalsr.push_back(new Stats::Histogram()); + m_IssueToInitialDelayHistCoalsr.push_back(new Stats::Histogram(this)); m_IssueToInitialDelayHistCoalsr[i] ->init(10) - .name(pName + csprintf( - ".%s.miss_latency_hist_coalsr.issue_to_initial_request", + .name(csprintf( + "%s.miss_latency_hist_coalsr.issue_to_initial_request", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_InitialToForwardDelayHistSeqr.push_back(new Stats::Histogram()); + m_InitialToForwardDelayHistSeqr.push_back(new Stats::Histogram(this)); m_InitialToForwardDelayHistSeqr[i] ->init(10) - .name(pName + csprintf(".%s.miss_latency_hist_seqr.initial_to_forward", - MachineType(i))) + .name(csprintf("%s.miss_latency_hist_seqr.initial_to_forward", + MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_InitialToForwardDelayHistCoalsr.push_back(new Stats::Histogram()); + m_InitialToForwardDelayHistCoalsr + .push_back(new Stats::Histogram(this)); m_InitialToForwardDelayHistCoalsr[i] ->init(10) - .name(pName + csprintf(".%s.miss_latency_hist_coalsr.initial_to_forward", - MachineType(i))) + .name(csprintf("%s.miss_latency_hist_coalsr.initial_to_forward", + MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_ForwardToFirstResponseDelayHistSeqr.push_back(new Stats::Histogram()); + m_ForwardToFirstResponseDelayHistSeqr + .push_back(new Stats::Histogram(this)); + m_ForwardToFirstResponseDelayHistSeqr[i] ->init(10) - .name(pName + csprintf( - ".%s.miss_latency_hist_seqr.forward_to_first_response", + .name(csprintf( + "%s.miss_latency_hist_seqr.forward_to_first_response", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_ForwardToFirstResponseDelayHistCoalsr.push_back(new Stats::Histogram()); + m_ForwardToFirstResponseDelayHistCoalsr + .push_back(new Stats::Histogram(this)); m_ForwardToFirstResponseDelayHistCoalsr[i] ->init(10) - .name(pName + csprintf( - ".%s.miss_latency_hist_coalsr.forward_to_first_response", + .name(csprintf( + "%s.miss_latency_hist_coalsr.forward_to_first_response", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_FirstResponseToCompletionDelayHistSeqr.push_back(new Stats::Histogram()); + m_FirstResponseToCompletionDelayHistSeqr + .push_back(new Stats::Histogram(this)); m_FirstResponseToCompletionDelayHistSeqr[i] ->init(10) - .name(pName + csprintf( - ".%s.miss_latency_hist_seqr.first_response_to_completion", + .name(csprintf( + "%s.miss_latency_hist_seqr.first_response_to_completion", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_FirstResponseToCompletionDelayHistCoalsr.push_back(new Stats::Histogram()); + m_FirstResponseToCompletionDelayHistCoalsr + .push_back(new Stats::Histogram(this)); m_FirstResponseToCompletionDelayHistCoalsr[i] ->init(10) - .name(pName + csprintf( - ".%s.miss_latency_hist_coalsr.first_response_to_completion", + .name(csprintf( + "%s.miss_latency_hist_coalsr.first_response_to_completion", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); + m_IncompleteTimesSeqr.push_back(new Stats::Scalar(this)); m_IncompleteTimesSeqr[i] - .name(pName + csprintf(".%s.incomplete_times_seqr", MachineType(i))) + ->name(csprintf("%s.incomplete_times_seqr", MachineType(i))) .desc("") .flags(Stats::nozero); } +} +Profiler::ProfilerStats:: +PerRequestTypeMachineTypeStats:: +PerRequestTypeMachineTypeStats(Stats::Group *parent) + : Stats::Group(parent, "RequestTypeMachineType") +{ for (int i = 0; i < RubyRequestType_NUM; i++) { - m_hitTypeMachLatencyHistSeqr.push_back(std::vector()); - m_missTypeMachLatencyHistSeqr.push_back(std::vector()); - m_missTypeMachLatencyHistCoalsr.push_back(std::vector()); + m_hitTypeMachLatencyHistSeqr + .push_back(std::vector()); + m_missTypeMachLatencyHistSeqr + .push_back(std::vector()); + m_missTypeMachLatencyHistCoalsr + .push_back(std::vector()); for (int j = 0; j < MachineType_NUM; j++) { - m_hitTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram()); + m_hitTypeMachLatencyHistSeqr[i] + .push_back(new Stats::Histogram(this)); m_hitTypeMachLatencyHistSeqr[i][j] ->init(10) - .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist_seqr", - RubyRequestType(i), MachineType(j))) + .name(csprintf("%s.%s.hit_type_mach_latency_hist_seqr", + RubyRequestType(i), MachineType(j))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_missTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram()); + m_missTypeMachLatencyHistSeqr[i] + .push_back(new Stats::Histogram(this)); m_missTypeMachLatencyHistSeqr[i][j] ->init(10) - .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_seqr", - RubyRequestType(i), MachineType(j))) + .name(csprintf("%s.%s.miss_type_mach_latency_hist_seqr", + RubyRequestType(i), MachineType(j))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_missTypeMachLatencyHistCoalsr[i].push_back(new Stats::Histogram()); + m_missTypeMachLatencyHistCoalsr[i] + .push_back(new Stats::Histogram(this)); m_missTypeMachLatencyHistCoalsr[i][j] ->init(10) - .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_coalsr", - RubyRequestType(i), MachineType(j))) + .name(csprintf("%s.%s.miss_type_mach_latency_hist_coalsr", + RubyRequestType(i), MachineType(j))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); } @@ -361,10 +372,11 @@ Profiler::collateStats() it != m_ruby_system->m_abstract_controls[i].end(); ++it) { AbstractController *ctr = (*it).second; - delayHistogram.add(ctr->getDelayHist()); + rubyProfilerStats.delayHistogram.add(ctr->getDelayHist()); for (uint32_t i = 0; i < m_num_vnets; i++) { - delayVCHistogram[i]->add(ctr->getDelayVCHist(i)); + rubyProfilerStats. + delayVCHistogram[i]->add(ctr->getDelayVCHist(i)); } } } @@ -377,12 +389,14 @@ Profiler::collateStats() AbstractController *ctr = (*it).second; Sequencer *seq = ctr->getCPUSequencer(); if (seq != NULL) { - m_outstandReqHistSeqr.add(seq->getOutstandReqHist()); + rubyProfilerStats. + m_outstandReqHistSeqr.add(seq->getOutstandReqHist()); } #ifdef BUILD_GPU GPUCoalescer *coal = ctr->getGPUCoalescer(); if (coal != NULL) { - m_outstandReqHistCoalsr.add(coal->getOutstandReqHist()); + rubyProfilerStats. + m_outstandReqHistCoalsr.add(coal->getOutstandReqHist()); } #endif } @@ -397,48 +411,80 @@ Profiler::collateStats() Sequencer *seq = ctr->getCPUSequencer(); if (seq != NULL) { // add all the latencies - m_latencyHistSeqr.add(seq->getLatencyHist()); - m_hitLatencyHistSeqr.add(seq->getHitLatencyHist()); - m_missLatencyHistSeqr.add(seq->getMissLatencyHist()); + rubyProfilerStats. + m_latencyHistSeqr.add(seq->getLatencyHist()); + rubyProfilerStats. + m_hitLatencyHistSeqr.add(seq->getHitLatencyHist()); + rubyProfilerStats. + m_missLatencyHistSeqr.add(seq->getMissLatencyHist()); // add the per request type latencies for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { - m_typeLatencyHistSeqr[j] + rubyProfilerStats + .perRequestTypeStats + .m_typeLatencyHistSeqr[j] ->add(seq->getTypeLatencyHist(j)); - m_hitTypeLatencyHistSeqr[j] + rubyProfilerStats + .perRequestTypeStats + .m_hitTypeLatencyHistSeqr[j] ->add(seq->getHitTypeLatencyHist(j)); - m_missTypeLatencyHistSeqr[j] + rubyProfilerStats + .perRequestTypeStats + .m_missTypeLatencyHistSeqr[j] ->add(seq->getMissTypeLatencyHist(j)); } // add the per machine type miss latencies for (uint32_t j = 0; j < MachineType_NUM; ++j) { - m_hitMachLatencyHistSeqr[j] + rubyProfilerStats + .perMachineTypeStats + .m_hitMachLatencyHistSeqr[j] ->add(seq->getHitMachLatencyHist(j)); - m_missMachLatencyHistSeqr[j] + rubyProfilerStats + .perMachineTypeStats + .m_missMachLatencyHistSeqr[j] ->add(seq->getMissMachLatencyHist(j)); - m_IssueToInitialDelayHistSeqr[j]->add( - seq->getIssueToInitialDelayHist(MachineType(j))); - - m_InitialToForwardDelayHistSeqr[j]->add( - seq->getInitialToForwardDelayHist(MachineType(j))); - m_ForwardToFirstResponseDelayHistSeqr[j]->add(seq-> - getForwardRequestToFirstResponseHist(MachineType(j))); - - m_FirstResponseToCompletionDelayHistSeqr[j]->add(seq-> - getFirstResponseToCompletionDelayHist( - MachineType(j))); - m_IncompleteTimesSeqr[j] += - seq->getIncompleteTimes(MachineType(j)); + rubyProfilerStats + .perMachineTypeStats + .m_IssueToInitialDelayHistSeqr[j] + ->add(seq->getIssueToInitialDelayHist(MachineType(j))); + + rubyProfilerStats + .perMachineTypeStats + .m_InitialToForwardDelayHistSeqr[j] + ->add(seq + ->getInitialToForwardDelayHist(MachineType(j))); + rubyProfilerStats + .perMachineTypeStats + .m_ForwardToFirstResponseDelayHistSeqr[j] + ->add(seq + ->getForwardRequestToFirstResponseHist( + MachineType(j))); + + rubyProfilerStats + .perMachineTypeStats + .m_FirstResponseToCompletionDelayHistSeqr[j] + ->add(seq + ->getFirstResponseToCompletionDelayHist( + MachineType(j))); + + *(rubyProfilerStats + .perMachineTypeStats + .m_IncompleteTimesSeqr[j]) += + seq->getIncompleteTimes(MachineType(j)); } // add the per (request, machine) type miss latencies for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { for (uint32_t k = 0; k < MachineType_NUM; k++) { - m_hitTypeMachLatencyHistSeqr[j][k]->add( + rubyProfilerStats + .perRequestTypeMachineTypeStats + .m_hitTypeMachLatencyHistSeqr[j][k]->add( seq->getHitTypeMachLatencyHist(j,k)); - m_missTypeMachLatencyHistSeqr[j][k]->add( + rubyProfilerStats + .perRequestTypeMachineTypeStats + .m_missTypeMachLatencyHistSeqr[j][k]->add( seq->getMissTypeMachLatencyHist(j,k)); } } @@ -447,40 +493,61 @@ Profiler::collateStats() GPUCoalescer *coal = ctr->getGPUCoalescer(); if (coal != NULL) { // add all the latencies - m_latencyHistCoalsr.add(coal->getLatencyHist()); - m_missLatencyHistCoalsr.add(coal->getMissLatencyHist()); + rubyProfilerStats. + m_latencyHistCoalsr.add(coal->getLatencyHist()); + rubyProfilerStats. + m_missLatencyHistCoalsr.add(coal->getMissLatencyHist()); // add the per request type latencies for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { - m_typeLatencyHistCoalsr[j] + rubyProfilerStats + .perRequestTypeStats + .m_typeLatencyHistCoalsr[j] ->add(coal->getTypeLatencyHist(j)); - m_missTypeLatencyHistCoalsr[j] + rubyProfilerStats + .perRequestTypeStats + .m_missTypeLatencyHistCoalsr[j] ->add(coal->getMissTypeLatencyHist(j)); } // add the per machine type miss latencies for (uint32_t j = 0; j < MachineType_NUM; ++j) { - m_missMachLatencyHistCoalsr[j] + rubyProfilerStats + .perMachineTypeStats + .m_missMachLatencyHistCoalsr[j] ->add(coal->getMissMachLatencyHist(j)); - m_IssueToInitialDelayHistCoalsr[j]->add( - coal->getIssueToInitialDelayHist(MachineType(j))); + rubyProfilerStats + .perMachineTypeStats + .m_IssueToInitialDelayHistCoalsr[j] + ->add(coal->getIssueToInitialDelayHist( + MachineType(j))); - m_InitialToForwardDelayHistCoalsr[j]->add( - coal->getInitialToForwardDelayHist(MachineType(j))); - m_ForwardToFirstResponseDelayHistCoalsr[j]->add(coal-> - getForwardRequestToFirstResponseHist(MachineType(j))); + rubyProfilerStats + .perMachineTypeStats + .m_InitialToForwardDelayHistCoalsr[j] + ->add(coal->getInitialToForwardDelayHist( + MachineType(j))); + rubyProfilerStats + .perMachineTypeStats + .m_ForwardToFirstResponseDelayHistCoalsr[j] + ->add(coal->getForwardRequestToFirstResponseHist( + MachineType(j))); - m_FirstResponseToCompletionDelayHistCoalsr[j]->add(coal-> - getFirstResponseToCompletionDelayHist( + rubyProfilerStats + .perMachineTypeStats + .m_FirstResponseToCompletionDelayHistCoalsr[j] + ->add(coal->getFirstResponseToCompletionDelayHist( MachineType(j))); } // add the per (request, machine) type miss latencies for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { for (uint32_t k = 0; k < MachineType_NUM; k++) { - m_missTypeMachLatencyHistCoalsr[j][k]->add( - coal->getMissTypeMachLatencyHist(j,k)); + rubyProfilerStats + .perRequestTypeMachineTypeStats + .m_missTypeMachLatencyHistCoalsr[j][k] + ->add(coal->getMissTypeMachLatencyHist(j,k)); } } } @@ -504,3 +571,4 @@ Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id) msg.getType(), msg.getAccessMode(), id, false); } } + diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh index c1d08e4e4..1c81eebcc 100644 --- a/src/mem/ruby/profiler/Profiler.hh +++ b/src/mem/ruby/profiler/Profiler.hh @@ -46,6 +46,7 @@ #define __MEM_RUBY_PROFILER_PROFILER_HH__ #include +#include #include #include @@ -70,7 +71,7 @@ class Profiler RubySystem *m_ruby_system; void wakeup(); - void regStats(const std::string &name); + void regStats(); void collateStats(); AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } @@ -90,58 +91,101 @@ class Profiler AddressProfiler* m_address_profiler_ptr; AddressProfiler* m_inst_profiler_ptr; - Stats::Histogram delayHistogram; - std::vector delayVCHistogram; - - //! Histogram for number of outstanding requests per cycle. - Stats::Histogram m_outstandReqHistSeqr; - Stats::Histogram m_outstandReqHistCoalsr; - - //! Histogram for holding latency profile of all requests. - Stats::Histogram m_latencyHistSeqr; - Stats::Histogram m_latencyHistCoalsr; - std::vector m_typeLatencyHistSeqr; - std::vector m_typeLatencyHistCoalsr; - - //! Histogram for holding latency profile of all requests that - //! hit in the controller connected to this sequencer. - Stats::Histogram m_hitLatencyHistSeqr; - std::vector m_hitTypeLatencyHistSeqr; - - //! Histograms for profiling the latencies for requests that - //! did not required external messages. - std::vector m_hitMachLatencyHistSeqr; - std::vector< std::vector > m_hitTypeMachLatencyHistSeqr; - - //! Histogram for holding latency profile of all requests that - //! miss in the controller connected to this sequencer. - Stats::Histogram m_missLatencyHistSeqr; - Stats::Histogram m_missLatencyHistCoalsr; - std::vector m_missTypeLatencyHistSeqr; - std::vector m_missTypeLatencyHistCoalsr; - - //! Histograms for profiling the latencies for requests that - //! required external messages. - std::vector m_missMachLatencyHistSeqr; - std::vector< std::vector > m_missTypeMachLatencyHistSeqr; - std::vector m_missMachLatencyHistCoalsr; - std::vector< std::vector > m_missTypeMachLatencyHistCoalsr; - - //! Histograms for recording the breakdown of miss latency - std::vector m_IssueToInitialDelayHistSeqr; - std::vector m_InitialToForwardDelayHistSeqr; - std::vector m_ForwardToFirstResponseDelayHistSeqr; - std::vector m_FirstResponseToCompletionDelayHistSeqr; - Stats::Scalar m_IncompleteTimesSeqr[MachineType_NUM]; - std::vector m_IssueToInitialDelayHistCoalsr; - std::vector m_InitialToForwardDelayHistCoalsr; - std::vector m_ForwardToFirstResponseDelayHistCoalsr; - std::vector m_FirstResponseToCompletionDelayHistCoalsr; + struct ProfilerStats : public Stats::Group + { + ProfilerStats(Stats::Group *parent, Profiler *profiler); + + struct PerRequestTypeStats : public Stats::Group + { + PerRequestTypeStats(Stats::Group *parent); + + // Histogram of the latency of each request type + std::vector m_typeLatencyHistSeqr; + std::vector m_typeLatencyHistCoalsr; + + // Histogram of the latency of requests that hit in the controller + // connected to this sequencer for each type of request + std::vector m_hitTypeLatencyHistSeqr; + + // Histogram of the latency of requests that miss in the controller + // connected to this sequencer for each type of request + std::vector m_missTypeLatencyHistSeqr; + std::vector m_missTypeLatencyHistCoalsr; + } perRequestTypeStats; + + struct PerMachineTypeStats : public Stats::Group + { + PerMachineTypeStats(Stats::Group *parent); + + //! Histograms for profiling the latencies for requests that + //! did not required external messages. + std::vector m_hitMachLatencyHistSeqr; + + //! Histograms for profiling the latencies for requests that + //! required external messages. + std::vector m_missMachLatencyHistSeqr; + std::vector m_missMachLatencyHistCoalsr; + + //! Histograms for recording the breakdown of miss latency + std::vector m_IssueToInitialDelayHistSeqr; + std::vector m_InitialToForwardDelayHistSeqr; + std::vector + m_ForwardToFirstResponseDelayHistSeqr; + std::vector + m_FirstResponseToCompletionDelayHistSeqr; + std::vector m_IncompleteTimesSeqr; + std::vector m_IssueToInitialDelayHistCoalsr; + std::vector m_InitialToForwardDelayHistCoalsr; + std::vector + m_ForwardToFirstResponseDelayHistCoalsr; + std::vector + m_FirstResponseToCompletionDelayHistCoalsr; + } perMachineTypeStats; + + struct PerRequestTypeMachineTypeStats : public Stats::Group + { + PerRequestTypeMachineTypeStats(Stats::Group *parent); + + //! Histograms for profiling the latencies for requests that + //! did not required external messages. + std::vector< std::vector > + m_hitTypeMachLatencyHistSeqr; + + //! Histograms for profiling the latencies for requests that + //! required external messages. + std::vector< std::vector > + m_missTypeMachLatencyHistSeqr; + std::vector< std::vector > + m_missTypeMachLatencyHistCoalsr; + } perRequestTypeMachineTypeStats; + + Stats::Histogram delayHistogram; + std::vector delayVCHistogram; + + //! Histogram for number of outstanding requests per cycle. + Stats::Histogram m_outstandReqHistSeqr; + Stats::Histogram m_outstandReqHistCoalsr; + + //! Histogram for holding latency profile of all requests. + Stats::Histogram m_latencyHistSeqr; + Stats::Histogram m_latencyHistCoalsr; + + //! Histogram for holding latency profile of all requests that + //! hit in the controller connected to this sequencer. + Stats::Histogram m_hitLatencyHistSeqr; + + //! Histogram for holding latency profile of all requests that + //! miss in the controller connected to this sequencer. + Stats::Histogram m_missLatencyHistSeqr; + Stats::Histogram m_missLatencyHistCoalsr; + }; //added by SS const bool m_hot_lines; const bool m_all_instructions; const uint32_t m_num_vnets; + + ProfilerStats rubyProfilerStats; }; #endif // __MEM_RUBY_PROFILER_PROFILER_HH__ diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc index 62e6022bc..3a27f8e6b 100644 --- a/src/mem/ruby/structures/CacheMemory.cc +++ b/src/mem/ruby/structures/CacheMemory.cc @@ -67,7 +67,17 @@ CacheMemory::CacheMemory(const Params &p) dataArray(p.dataArrayBanks, p.dataAccessLatency, p.start_index_bit, p.ruby_system), tagArray(p.tagArrayBanks, p.tagAccessLatency, - p.start_index_bit, p.ruby_system) + p.start_index_bit, p.ruby_system), + cacheMemoryStats(this), + ADD_STAT(m_demand_hits, "Number of cache demand hits"), + ADD_STAT(m_demand_misses, "Number of cache demand misses"), + ADD_STAT(m_demand_accesses, "Number of cache demand accesses", + m_demand_hits + m_demand_misses), + ADD_STAT(m_sw_prefetches, "Number of software prefetches"), + ADD_STAT(m_hw_prefetches, "Number of hardware prefetches"), + ADD_STAT(m_prefetches, "Number of prefetches", + m_sw_prefetches + m_hw_prefetches), + ADD_STAT(m_accessModeType, "") { m_cache_size = p.size; m_cache_assoc = p.assoc; @@ -78,6 +88,26 @@ CacheMemory::CacheMemory(const Params &p) m_block_size = p.block_size; // may be 0 at this point. Updated in init() m_use_occupancy = dynamic_cast( m_replacementPolicy_ptr) ? true : false; + + m_sw_prefetches + .flags(Stats::nozero); + + m_hw_prefetches + .flags(Stats::nozero); + + m_prefetches + .flags(Stats::nozero); + + m_accessModeType + .init(RubyRequestType_NUM) + .flags(Stats::pdf | Stats::total); + + for (int i = 0; i < RubyAccessMode_NUM; i++) { + m_accessModeType + .subname(i, RubyAccessMode_to_string(RubyAccessMode(i))) + .flags(Stats::nozero) + ; + } } void @@ -514,123 +544,57 @@ CacheMemory::isLocked(Addr address, int context) return entry->isLocked(context); } -void -CacheMemory::regStats() +CacheMemory:: +CacheMemoryStats::CacheMemoryStats(Stats::Group *parent) + : Stats::Group(parent), + ADD_STAT(numDataArrayReads, "Number of data array reads"), + ADD_STAT(numDataArrayWrites, "Number of data array writes"), + ADD_STAT(numTagArrayReads, "Number of tag array reads"), + ADD_STAT(numTagArrayWrites, "Number of tag array writes"), + ADD_STAT(numTagArrayStalls, "Number of stalls caused by tag array"), + ADD_STAT(numDataArrayStalls, "Number of stalls caused by data array"), + ADD_STAT(htmTransCommitReadSet, "Read set size of a committed " + "transaction"), + ADD_STAT(htmTransCommitWriteSet, "Write set size of a committed " + "transaction"), + ADD_STAT(htmTransAbortReadSet, "Read set size of a aborted transaction"), + ADD_STAT(htmTransAbortWriteSet, "Write set size of a aborted " + "transaction") { - SimObject::regStats(); - - m_demand_hits - .name(name() + ".demand_hits") - .desc("Number of cache demand hits") - ; - - m_demand_misses - .name(name() + ".demand_misses") - .desc("Number of cache demand misses") - ; - - m_demand_accesses - .name(name() + ".demand_accesses") - .desc("Number of cache demand accesses") - ; - - m_demand_accesses = m_demand_hits + m_demand_misses; - - m_sw_prefetches - .name(name() + ".total_sw_prefetches") - .desc("Number of software prefetches") - .flags(Stats::nozero) - ; - - m_hw_prefetches - .name(name() + ".total_hw_prefetches") - .desc("Number of hardware prefetches") - .flags(Stats::nozero) - ; - - m_prefetches - .name(name() + ".total_prefetches") - .desc("Number of prefetches") - .flags(Stats::nozero) - ; - - m_prefetches = m_sw_prefetches + m_hw_prefetches; - - m_accessModeType - .init(RubyRequestType_NUM) - .name(name() + ".access_mode") - .flags(Stats::pdf | Stats::total) - ; - for (int i = 0; i < RubyAccessMode_NUM; i++) { - m_accessModeType - .subname(i, RubyAccessMode_to_string(RubyAccessMode(i))) - .flags(Stats::nozero) - ; - } - numDataArrayReads - .name(name() + ".num_data_array_reads") - .desc("number of data array reads") - .flags(Stats::nozero) - ; + .flags(Stats::nozero); numDataArrayWrites - .name(name() + ".num_data_array_writes") - .desc("number of data array writes") - .flags(Stats::nozero) - ; + .flags(Stats::nozero); numTagArrayReads - .name(name() + ".num_tag_array_reads") - .desc("number of tag array reads") - .flags(Stats::nozero) - ; + .flags(Stats::nozero); numTagArrayWrites - .name(name() + ".num_tag_array_writes") - .desc("number of tag array writes") - .flags(Stats::nozero) - ; + .flags(Stats::nozero); numTagArrayStalls - .name(name() + ".num_tag_array_stalls") - .desc("number of stalls caused by tag array") - .flags(Stats::nozero) - ; + .flags(Stats::nozero); numDataArrayStalls - .name(name() + ".num_data_array_stalls") - .desc("number of stalls caused by data array") - .flags(Stats::nozero) - ; + .flags(Stats::nozero); htmTransCommitReadSet .init(8) - .name(name() + ".htm_transaction_committed_read_set") - .desc("read set size of a committed transaction") - .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan) - ; + .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan); htmTransCommitWriteSet .init(8) - .name(name() + ".htm_transaction_committed_write_set") - .desc("write set size of a committed transaction") - .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan) - ; + .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan); htmTransAbortReadSet .init(8) - .name(name() + ".htm_transaction_aborted_read_set") - .desc("read set size of a aborted transaction") - .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan) - ; + .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan); htmTransAbortWriteSet .init(8) - .name(name() + ".htm_transaction_aborted_write_set") - .desc("write set size of a aborted transaction") - .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan) - ; + .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan); + } // assumption: SLICC generated files will only call this function @@ -644,22 +608,22 @@ CacheMemory::recordRequestType(CacheRequestType requestType, Addr addr) case CacheRequestType_DataArrayRead: if (m_resource_stalls) dataArray.reserve(addressToCacheSet(addr)); - numDataArrayReads++; + cacheMemoryStats.numDataArrayReads++; return; case CacheRequestType_DataArrayWrite: if (m_resource_stalls) dataArray.reserve(addressToCacheSet(addr)); - numDataArrayWrites++; + cacheMemoryStats.numDataArrayWrites++; return; case CacheRequestType_TagArrayRead: if (m_resource_stalls) tagArray.reserve(addressToCacheSet(addr)); - numTagArrayReads++; + cacheMemoryStats.numTagArrayReads++; return; case CacheRequestType_TagArrayWrite: if (m_resource_stalls) tagArray.reserve(addressToCacheSet(addr)); - numTagArrayWrites++; + cacheMemoryStats.numTagArrayWrites++; return; default: warn("CacheMemory access_type not found: %s", @@ -680,7 +644,7 @@ CacheMemory::checkResourceAvailable(CacheResourceType res, Addr addr) DPRINTF(RubyResourceStalls, "Tag array stall on addr %#x in set %d\n", addr, addressToCacheSet(addr)); - numTagArrayStalls++; + cacheMemoryStats.numTagArrayStalls++; return false; } } else if (res == CacheResourceType_DataArray) { @@ -689,7 +653,7 @@ CacheMemory::checkResourceAvailable(CacheResourceType res, Addr addr) DPRINTF(RubyResourceStalls, "Data array stall on addr %#x in set %d\n", addr, addressToCacheSet(addr)); - numDataArrayStalls++; + cacheMemoryStats.numDataArrayStalls++; return false; } } else { @@ -739,8 +703,8 @@ CacheMemory::htmAbortTransaction() } } - htmTransAbortReadSet.sample(htmReadSetSize); - htmTransAbortWriteSet.sample(htmWriteSetSize); + cacheMemoryStats.htmTransAbortReadSet.sample(htmReadSetSize); + cacheMemoryStats.htmTransAbortWriteSet.sample(htmWriteSetSize); DPRINTF(HtmMem, "htmAbortTransaction: read set=%u write set=%u\n", htmReadSetSize, htmWriteSetSize); } @@ -769,8 +733,8 @@ CacheMemory::htmCommitTransaction() } } - htmTransCommitReadSet.sample(htmReadSetSize); - htmTransCommitWriteSet.sample(htmWriteSetSize); + cacheMemoryStats.htmTransCommitReadSet.sample(htmReadSetSize); + cacheMemoryStats.htmTransCommitWriteSet.sample(htmWriteSetSize); DPRINTF(HtmMem, "htmCommitTransaction: read set=%u write set=%u\n", htmReadSetSize, htmWriteSetSize); } diff --git a/src/mem/ruby/structures/CacheMemory.hh b/src/mem/ruby/structures/CacheMemory.hh index 9434660cb..c126a535d 100644 --- a/src/mem/ruby/structures/CacheMemory.hh +++ b/src/mem/ruby/structures/CacheMemory.hh @@ -140,7 +140,6 @@ class CacheMemory : public SimObject void print(std::ostream& out) const; void printData(std::ostream& out) const; - void regStats(); bool checkResourceAvailable(CacheResourceType res, Addr addr); void recordRequestType(CacheRequestType requestType, Addr addr); @@ -149,29 +148,24 @@ class CacheMemory : public SimObject void htmCommitTransaction(); public: - Stats::Scalar m_demand_hits; - Stats::Scalar m_demand_misses; - Stats::Formula m_demand_accesses; - - Stats::Scalar m_sw_prefetches; - Stats::Scalar m_hw_prefetches; - Stats::Formula m_prefetches; - - Stats::Vector m_accessModeType; + struct CacheMemoryStats : public Stats::Group + { + CacheMemoryStats(Stats::Group *parent); - Stats::Scalar numDataArrayReads; - Stats::Scalar numDataArrayWrites; - Stats::Scalar numTagArrayReads; - Stats::Scalar numTagArrayWrites; + Stats::Scalar numDataArrayReads; + Stats::Scalar numDataArrayWrites; + Stats::Scalar numTagArrayReads; + Stats::Scalar numTagArrayWrites; - Stats::Scalar numTagArrayStalls; - Stats::Scalar numDataArrayStalls; + Stats::Scalar numTagArrayStalls; + Stats::Scalar numDataArrayStalls; - // hardware transactional memory - Stats::Histogram htmTransCommitReadSet; - Stats::Histogram htmTransCommitWriteSet; - Stats::Histogram htmTransAbortReadSet; - Stats::Histogram htmTransAbortWriteSet; + // hardware transactional memory + Stats::Histogram htmTransCommitReadSet; + Stats::Histogram htmTransCommitWriteSet; + Stats::Histogram htmTransAbortReadSet; + Stats::Histogram htmTransAbortWriteSet; + }; int getCacheSize() const { return m_cache_size; } int getCacheAssoc() const { return m_cache_assoc; } @@ -229,6 +223,18 @@ class CacheMemory : public SimObject * false. */ bool m_use_occupancy; + + public: + CacheMemoryStats cacheMemoryStats; + Stats::Scalar m_demand_hits; + Stats::Scalar m_demand_misses; + Stats::Formula m_demand_accesses; + + Stats::Scalar m_sw_prefetches; + Stats::Scalar m_hw_prefetches; + Stats::Formula m_prefetches; + + Stats::Vector m_accessModeType; }; std::ostream& operator<<(std::ostream& out, const CacheMemory& obj); diff --git a/src/mem/ruby/structures/RubyPrefetcher.cc b/src/mem/ruby/structures/RubyPrefetcher.cc index feee1abde..7848b142f 100644 --- a/src/mem/ruby/structures/RubyPrefetcher.cc +++ b/src/mem/ruby/structures/RubyPrefetcher.cc @@ -56,51 +56,28 @@ RubyPrefetcher::RubyPrefetcher(const Params &p) negativeFilter(p.unit_filter), nonUnitFilter(p.nonunit_filter), m_prefetch_cross_pages(p.cross_page), - m_page_shift(p.sys->getPageShift()) + m_page_shift(p.sys->getPageShift()), + rubyPrefetcherStats(this) { assert(m_num_streams > 0); assert(m_num_startup_pfs <= MAX_PF_INFLIGHT); } -void -RubyPrefetcher::regStats() +RubyPrefetcher:: +RubyPrefetcherStats::RubyPrefetcherStats(Stats::Group *parent) + : Stats::Group(parent, "RubyPrefetcher"), + ADD_STAT(numMissObserved, "Number of misses observed"), + ADD_STAT(numAllocatedStreams, "Number of streams allocated for " + "prefetching"), + ADD_STAT(numPrefetchRequested, "Number of prefetch requests made"), + ADD_STAT(numHits, "Number of prefetched blocks accessed " + "(for the first time)"), + ADD_STAT(numPartialHits, "Number of misses observed for a block being " + "prefetched"), + ADD_STAT(numPagesCrossed, "Number of prefetches across pages"), + ADD_STAT(numMissedPrefetchedBlocks, "Number of misses for blocks that " + "were prefetched, yet missed") { - SimObject::regStats(); - - numMissObserved - .name(name() + ".miss_observed") - .desc("number of misses observed") - ; - - numAllocatedStreams - .name(name() + ".allocated_streams") - .desc("number of streams allocated for prefetching") - ; - - numPrefetchRequested - .name(name() + ".prefetches_requested") - .desc("number of prefetch requests made") - ; - - numHits - .name(name() + ".hits") - .desc("number of prefetched blocks accessed (for the first time)") - ; - - numPartialHits - .name(name() + ".partial_hits") - .desc("number of misses observed for a block being prefetched") - ; - - numPagesCrossed - .name(name() + ".pages_crossed") - .desc("number of prefetches across pages") - ; - - numMissedPrefetchedBlocks - .name(name() + ".misses_on_prefetched_blocks") - .desc("number of misses for blocks that were prefetched, yet missed") - ; } void @@ -108,7 +85,7 @@ RubyPrefetcher::observeMiss(Addr address, const RubyRequestType& type) { DPRINTF(RubyPrefetcher, "Observed miss for %#x\n", address); Addr line_addr = makeLineAddress(address); - numMissObserved++; + rubyPrefetcherStats.numMissObserved++; // check to see if we have already issued a prefetch for this block uint32_t index = 0; @@ -118,12 +95,12 @@ RubyPrefetcher::observeMiss(Addr address, const RubyRequestType& type) if (pfEntry->requestCompleted[index]) { // We prefetched too early and now the prefetch block no // longer exists in the cache - numMissedPrefetchedBlocks++; + rubyPrefetcherStats.numMissedPrefetchedBlocks++; return; } else { // The controller has issued the prefetch request, // but the request for the block arrived earlier. - numPartialHits++; + rubyPrefetcherStats.numPartialHits++; observePfMiss(line_addr); return; } @@ -152,7 +129,7 @@ RubyPrefetcher::observeMiss(Addr address, const RubyRequestType& type) void RubyPrefetcher::observePfMiss(Addr address) { - numPartialHits++; + rubyPrefetcherStats.numPartialHits++; DPRINTF(RubyPrefetcher, "Observed partial hit for %#x\n", address); issueNextPrefetch(address, NULL); } @@ -160,7 +137,7 @@ RubyPrefetcher::observePfMiss(Addr address) void RubyPrefetcher::observePfHit(Addr address) { - numHits++; + rubyPrefetcherStats.numHits++; DPRINTF(RubyPrefetcher, "Observed hit for %#x\n", address); issueNextPrefetch(address, NULL); } @@ -193,11 +170,11 @@ RubyPrefetcher::issueNextPrefetch(Addr address, PrefetchEntry *stream) stream->m_is_valid = false; return; } - numPagesCrossed++; + rubyPrefetcherStats.numPagesCrossed++; } // launch next prefetch - numPrefetchRequested++; + rubyPrefetcherStats.numPrefetchRequested++; stream->m_address = line_addr; stream->m_use_time = m_controller->curCycle(); DPRINTF(RubyPrefetcher, "Requesting prefetch for %#x\n", line_addr); @@ -227,7 +204,7 @@ void RubyPrefetcher::initializeStream(Addr address, int stride, uint32_t index, const RubyRequestType& type) { - numAllocatedStreams++; + rubyPrefetcherStats.numAllocatedStreams++; // initialize the stream prefetcher PrefetchEntry *mystream = &(m_array[index]); @@ -251,11 +228,11 @@ RubyPrefetcher::initializeStream(Addr address, int stride, mystream->m_is_valid = false; return; } - numPagesCrossed++; + rubyPrefetcherStats.numPagesCrossed++; } // launch prefetch - numPrefetchRequested++; + rubyPrefetcherStats.numPrefetchRequested++; DPRINTF(RubyPrefetcher, "Requesting prefetch for %#x\n", line_addr); m_controller->enqueuePrefetch(line_addr, m_array[index].m_type); } diff --git a/src/mem/ruby/structures/RubyPrefetcher.hh b/src/mem/ruby/structures/RubyPrefetcher.hh index 89acb3b8b..1d3028baf 100644 --- a/src/mem/ruby/structures/RubyPrefetcher.hh +++ b/src/mem/ruby/structures/RubyPrefetcher.hh @@ -123,8 +123,6 @@ class RubyPrefetcher : public SimObject void setController(AbstractController *_ctrl) { m_controller = _ctrl; } - void regStats(); - private: struct UnitFilterEntry { @@ -236,20 +234,25 @@ class RubyPrefetcher : public SimObject const Addr m_page_shift; - //! Count of accesses to the prefetcher - Stats::Scalar numMissObserved; - //! Count of prefetch streams allocated - Stats::Scalar numAllocatedStreams; - //! Count of prefetch requests made - Stats::Scalar numPrefetchRequested; - //! Count of successful prefetches - Stats::Scalar numHits; - //! Count of partial successful prefetches - Stats::Scalar numPartialHits; - //! Count of pages crossed - Stats::Scalar numPagesCrossed; - //! Count of misses incurred for blocks that were prefetched - Stats::Scalar numMissedPrefetchedBlocks; + struct RubyPrefetcherStats : public Stats::Group + { + RubyPrefetcherStats(Stats::Group *parent); + + //! Count of accesses to the prefetcher + Stats::Scalar numMissObserved; + //! Count of prefetch streams allocated + Stats::Scalar numAllocatedStreams; + //! Count of prefetch requests made + Stats::Scalar numPrefetchRequested; + //! Count of successful prefetches + Stats::Scalar numHits; + //! Count of partial successful prefetches + Stats::Scalar numPartialHits; + //! Count of pages crossed + Stats::Scalar numPagesCrossed; + //! Count of misses incurred for blocks that were prefetched + Stats::Scalar numMissedPrefetchedBlocks; + } rubyPrefetcherStats; }; #endif // __MEM_RUBY_STRUCTURES_PREFETCHER_HH__ diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc index b51a9e734..e9c108fa1 100644 --- a/src/mem/ruby/system/GPUCoalescer.cc +++ b/src/mem/ruby/system/GPUCoalescer.cc @@ -214,6 +214,49 @@ GPUCoalescer::GPUCoalescer(const Params &p) assert(m_dataCache_ptr); m_runningGarnetStandalone = p.garnet_standalone; + + + // These statistical variables are not for display. + // The profiler will collate these across different + // coalescers and display those collated statistics. + m_outstandReqHist.init(10); + m_latencyHist.init(10); + m_missLatencyHist.init(10); + + for (int i = 0; i < RubyRequestType_NUM; i++) { + m_typeLatencyHist.push_back(new Stats::Histogram()); + m_typeLatencyHist[i]->init(10); + + m_missTypeLatencyHist.push_back(new Stats::Histogram()); + m_missTypeLatencyHist[i]->init(10); + } + + for (int i = 0; i < MachineType_NUM; i++) { + m_missMachLatencyHist.push_back(new Stats::Histogram()); + m_missMachLatencyHist[i]->init(10); + + m_IssueToInitialDelayHist.push_back(new Stats::Histogram()); + m_IssueToInitialDelayHist[i]->init(10); + + m_InitialToForwardDelayHist.push_back(new Stats::Histogram()); + m_InitialToForwardDelayHist[i]->init(10); + + m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram()); + m_ForwardToFirstResponseDelayHist[i]->init(10); + + m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram()); + m_FirstResponseToCompletionDelayHist[i]->init(10); + } + + for (int i = 0; i < RubyRequestType_NUM; i++) { + m_missTypeMachLatencyHist.push_back(std::vector()); + + for (int j = 0; j < MachineType_NUM; j++) { + m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram()); + m_missTypeMachLatencyHist[i][j]->init(10); + } + } + } GPUCoalescer::~GPUCoalescer() @@ -907,49 +950,3 @@ GPUCoalescer::recordMissLatency(CoalescedRequest* crequest, { } -void -GPUCoalescer::regStats() -{ - RubyPort::regStats(); - - // These statistical variables are not for display. - // The profiler will collate these across different - // coalescers and display those collated statistics. - m_outstandReqHist.init(10); - m_latencyHist.init(10); - m_missLatencyHist.init(10); - - for (int i = 0; i < RubyRequestType_NUM; i++) { - m_typeLatencyHist.push_back(new Stats::Histogram()); - m_typeLatencyHist[i]->init(10); - - m_missTypeLatencyHist.push_back(new Stats::Histogram()); - m_missTypeLatencyHist[i]->init(10); - } - - for (int i = 0; i < MachineType_NUM; i++) { - m_missMachLatencyHist.push_back(new Stats::Histogram()); - m_missMachLatencyHist[i]->init(10); - - m_IssueToInitialDelayHist.push_back(new Stats::Histogram()); - m_IssueToInitialDelayHist[i]->init(10); - - m_InitialToForwardDelayHist.push_back(new Stats::Histogram()); - m_InitialToForwardDelayHist[i]->init(10); - - m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram()); - m_ForwardToFirstResponseDelayHist[i]->init(10); - - m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram()); - m_FirstResponseToCompletionDelayHist[i]->init(10); - } - - for (int i = 0; i < RubyRequestType_NUM; i++) { - m_missTypeMachLatencyHist.push_back(std::vector()); - - for (int j = 0; j < MachineType_NUM; j++) { - m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram()); - m_missTypeMachLatencyHist[i][j]->init(10); - } - } -} diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh index 709b491a8..05d5269e9 100644 --- a/src/mem/ruby/system/GPUCoalescer.hh +++ b/src/mem/ruby/system/GPUCoalescer.hh @@ -243,7 +243,6 @@ class GPUCoalescer : public RubyPort void printProgress(std::ostream& out) const; void resetStats() override; void collateStats(); - void regStats() override; // each store request needs two callbacks: // (1) writeCallback is called when the store is received and processed diff --git a/src/mem/ruby/system/HTMSequencer.cc b/src/mem/ruby/system/HTMSequencer.cc index 2f24be638..15071fabc 100644 --- a/src/mem/ruby/system/HTMSequencer.cc +++ b/src/mem/ruby/system/HTMSequencer.cc @@ -63,10 +63,37 @@ HTMSequencer::htmRetCodeConversion( } HTMSequencer::HTMSequencer(const RubyHTMSequencerParams &p) - : Sequencer(p) + : Sequencer(p), + ADD_STAT(m_htm_transaction_cycles, "number of cycles spent in an outer " + "transaction"), + ADD_STAT(m_htm_transaction_instructions, "number of instructions spent " + "in an outer transaction"), + ADD_STAT(m_htm_transaction_abort_cause, "cause of htm transaction abort") { m_htmstart_tick = 0; m_htmstart_instruction = 0; + + // hardware transactional memory + m_htm_transaction_cycles + .init(10) + .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan) + ; + m_htm_transaction_instructions + .init(10) + .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan) + ; + auto num_causes = static_cast(HtmFailureFaultCause::NUM_CAUSES); + m_htm_transaction_abort_cause + .init(num_causes) + .flags(Stats::total | Stats::pdf | Stats::dist | Stats::nozero) + ; + + for (unsigned cause_idx = 0; cause_idx < num_causes; ++cause_idx) { + m_htm_transaction_abort_cause.subname( + cause_idx, + htmFailureToStr(HtmFailureFaultCause(cause_idx))); + } + } HTMSequencer::~HTMSequencer() @@ -178,8 +205,6 @@ HTMSequencer::htmCallback(Addr address, void HTMSequencer::regStats() { - Sequencer::regStats(); - // hardware transactional memory m_htm_transaction_cycles .init(10) diff --git a/src/mem/ruby/system/RubySystem.hh b/src/mem/ruby/system/RubySystem.hh index 0c4ffc163..f0b8aff34 100644 --- a/src/mem/ruby/system/RubySystem.hh +++ b/src/mem/ruby/system/RubySystem.hh @@ -79,7 +79,6 @@ class RubySystem : public ClockedObject void regStats() override { ClockedObject::regStats(); - m_profiler->regStats(name()); } void collateStats() { m_profiler->collateStats(); } void resetStats() override; diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 407d27015..76bb385fe 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -76,6 +76,60 @@ Sequencer::Sequencer(const Params &p) assert(m_deadlock_threshold > 0); m_runningGarnetStandalone = p.garnet_standalone; + + + // These statistical variables are not for display. + // The profiler will collate these across different + // sequencers and display those collated statistics. + m_outstandReqHist.init(10); + m_latencyHist.init(10); + m_hitLatencyHist.init(10); + m_missLatencyHist.init(10); + + for (int i = 0; i < RubyRequestType_NUM; i++) { + m_typeLatencyHist.push_back(new Stats::Histogram()); + m_typeLatencyHist[i]->init(10); + + m_hitTypeLatencyHist.push_back(new Stats::Histogram()); + m_hitTypeLatencyHist[i]->init(10); + + m_missTypeLatencyHist.push_back(new Stats::Histogram()); + m_missTypeLatencyHist[i]->init(10); + } + + for (int i = 0; i < MachineType_NUM; i++) { + m_hitMachLatencyHist.push_back(new Stats::Histogram()); + m_hitMachLatencyHist[i]->init(10); + + m_missMachLatencyHist.push_back(new Stats::Histogram()); + m_missMachLatencyHist[i]->init(10); + + m_IssueToInitialDelayHist.push_back(new Stats::Histogram()); + m_IssueToInitialDelayHist[i]->init(10); + + m_InitialToForwardDelayHist.push_back(new Stats::Histogram()); + m_InitialToForwardDelayHist[i]->init(10); + + m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram()); + m_ForwardToFirstResponseDelayHist[i]->init(10); + + m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram()); + m_FirstResponseToCompletionDelayHist[i]->init(10); + } + + for (int i = 0; i < RubyRequestType_NUM; i++) { + m_hitTypeMachLatencyHist.push_back(std::vector()); + m_missTypeMachLatencyHist.push_back(std::vector()); + + for (int j = 0; j < MachineType_NUM; j++) { + m_hitTypeMachLatencyHist[i].push_back(new Stats::Histogram()); + m_hitTypeMachLatencyHist[i][j]->init(10); + + m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram()); + m_missTypeMachLatencyHist[i][j]->init(10); + } + } + } Sequencer::~Sequencer() @@ -781,61 +835,3 @@ Sequencer::evictionCallback(Addr address) llscClearMonitor(address); ruby_eviction_callback(address); } - -void -Sequencer::regStats() -{ - RubyPort::regStats(); - - // These statistical variables are not for display. - // The profiler will collate these across different - // sequencers and display those collated statistics. - m_outstandReqHist.init(10); - m_latencyHist.init(10); - m_hitLatencyHist.init(10); - m_missLatencyHist.init(10); - - for (int i = 0; i < RubyRequestType_NUM; i++) { - m_typeLatencyHist.push_back(new Stats::Histogram()); - m_typeLatencyHist[i]->init(10); - - m_hitTypeLatencyHist.push_back(new Stats::Histogram()); - m_hitTypeLatencyHist[i]->init(10); - - m_missTypeLatencyHist.push_back(new Stats::Histogram()); - m_missTypeLatencyHist[i]->init(10); - } - - for (int i = 0; i < MachineType_NUM; i++) { - m_hitMachLatencyHist.push_back(new Stats::Histogram()); - m_hitMachLatencyHist[i]->init(10); - - m_missMachLatencyHist.push_back(new Stats::Histogram()); - m_missMachLatencyHist[i]->init(10); - - m_IssueToInitialDelayHist.push_back(new Stats::Histogram()); - m_IssueToInitialDelayHist[i]->init(10); - - m_InitialToForwardDelayHist.push_back(new Stats::Histogram()); - m_InitialToForwardDelayHist[i]->init(10); - - m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram()); - m_ForwardToFirstResponseDelayHist[i]->init(10); - - m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram()); - m_FirstResponseToCompletionDelayHist[i]->init(10); - } - - for (int i = 0; i < RubyRequestType_NUM; i++) { - m_hitTypeMachLatencyHist.push_back(std::vector()); - m_missTypeMachLatencyHist.push_back(std::vector()); - - for (int j = 0; j < MachineType_NUM; j++) { - m_hitTypeMachLatencyHist[i].push_back(new Stats::Histogram()); - m_hitTypeMachLatencyHist[i][j]->init(10); - - m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram()); - m_missTypeMachLatencyHist[i][j]->init(10); - } - } -} diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index 83eea68b6..904d76417 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -95,7 +95,6 @@ class Sequencer : public RubyPort virtual void wakeup(); // Used only for deadlock detection void resetStats() override; void collateStats(); - void regStats() override; void writeCallback(Addr address, DataBlock& data, -- 2.30.2